[pypy-commit] benchmarks default: add dropbox/pyxl benchmark

fijal pypy.commits at gmail.com
Wed Jan 20 15:47:51 EST 2016


Author: fijal
Branch: 
Changeset: r345:283ed0844257
Date: 2016-01-20 21:47 +0100
http://bitbucket.org/pypy/benchmarks/changeset/283ed0844257/

Log:	add dropbox/pyxl benchmark

diff too long, truncating to 2000 out of 5559 lines

diff --git a/benchmarks.py b/benchmarks.py
--- a/benchmarks.py
+++ b/benchmarks.py
@@ -60,6 +60,7 @@
 opts = {
     'gcbench' : {'iteration_scaling' : .10},
     'pidigits': {'iteration_scaling' : .10},
+    'pyxl_bench': {'bm_env': {'PYTHONPATH': relative('lib/pyxl')}},
     'eparse'  : {'bm_env': {'PYTHONPATH': relative('lib/monte')}},
     'bm_mako' : {'bm_env': {'PYTHONPATH': relative('lib/mako')}},
     'bm_dulwich_log': {'bm_env': {'PYTHONPATH': relative('lib/dulwich-0.9.1')}},
@@ -83,7 +84,7 @@
              'raytrace-simple', 'crypto_pyaes', 'bm_mako', 'bm_chameleon',
              'json_bench', 'pidigits', 'hexiom2', 'eparse', 'deltablue',
              'bm_dulwich_log', 'bm_krakatau', 'bm_mdp', 'pypy_interp',
-             'sqlitesynth']:
+             'sqlitesynth', 'pyxl_bench']:
     _register_new_bm(name, name, globals(), **opts.get(name, {}))
 
 for name in ['names', 'iteration', 'tcp', 'pb', ]:#'web']:#, 'accepts']:
diff --git a/lib/pyxl/LICENSE b/lib/pyxl/LICENSE
new file mode 100644
--- /dev/null
+++ b/lib/pyxl/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/lib/pyxl/MANIFEST b/lib/pyxl/MANIFEST
new file mode 100644
--- /dev/null
+++ b/lib/pyxl/MANIFEST
@@ -0,0 +1,19 @@
+# file GENERATED by distutils, do NOT edit
+README
+finish_install.py
+pyxl.pth
+setup.py
+emacs/pyxl-mode.el
+pyxl/__init__.py
+pyxl/base.py
+pyxl/element.py
+pyxl/html.py
+pyxl/utils.py
+pyxl/codec/__init__.py
+pyxl/codec/parser.py
+pyxl/codec/register.py
+pyxl/codec/tokenizer.py
+pyxl/examples/__init__.py
+pyxl/examples/hello_world.py
+pyxl/scripts/__init__.py
+pyxl/scripts/parse_file.py
diff --git a/lib/pyxl/MANIFEST.in b/lib/pyxl/MANIFEST.in
new file mode 100644
--- /dev/null
+++ b/lib/pyxl/MANIFEST.in
@@ -0,0 +1,2 @@
+include README pyxl.pth finish_install.py
+recursive-include emacs *.el
diff --git a/lib/pyxl/README b/lib/pyxl/README
new file mode 120000
--- /dev/null
+++ b/lib/pyxl/README
@@ -0,0 +1,1 @@
+README.md
\ No newline at end of file
diff --git a/lib/pyxl/README.md b/lib/pyxl/README.md
new file mode 100644
--- /dev/null
+++ b/lib/pyxl/README.md
@@ -0,0 +1,235 @@
+Pyxl is an open source package that extends Python to support inline HTML. It converts HTML fragments into valid Python expressions, and is meant as a replacement for traditional python templating systems like [Mako](http://www.makotemplates.org/) or [Cheetah](http://www.cheetahtemplate.org/). It automatically escapes data, enforces correct markup and makes it easier to write reusable and well structured UI code. Pyxl was inspired by the [XHP](https://github.com/facebook/xhp/wiki) project at Facebook.
+
+This project only supports Python 2. However, a [Python 3 fork](https://github.com/gvanrossum/pyxl3) is available.
+
+## Motivation
+
+At Cove, where Pyxl was developed, we found that using templates was getting in the way of quickly building new features. There were the usual issues of remembering to escape data to prevent XSS holes, avoiding invalid markup and deciphering cryptic stack traces. More importantly, our templates were getting hard to manage and understand which made iterating on our product more work than should be necessary.
+
+Existing templating systems do support things like logic and reusable modules - but they are essentially like having a different programming language for writing UI which falls well short of python itself. The primary reason templating systems exist is because creating HTML in languages like python means writing crazy string manipulation code, or losing the niceness of writing actual HTML by doing something like this:
+
+```py
+import html
+print (
+    html.head().appendChild(
+        html.body().appendChild(
+                html.text("Hello World!"))))
+```
+
+To get around these limitations, we developed Pyxl which allowed us to treat HTML as a part of the python language itself. So, writing the above example with Pyxl would look like:
+
+```py
+# coding: pyxl
+print <html><body>Hello World!</body></html>
+```
+
+This meant no longer dealing with a separate "templating" language, and a lot more control over how we wrote our front-end code. Also, since Pyxl maps HTML to structured python objects and expressions instead of arbitrary blobs of strings, adding support for things like automatically escaping data was trivial. Switching to Pyxl led to much cleaner and modularized UI code, and allowed us to write new features and pages a lot quicker.
+
+## Installation
+
+Clone the repo and run the following commands from the directory you cloned to.
+
+```sh
+python setup.py build
+sudo python setup.py install
+sudo python finish_install.py
+```
+
+To confirm that Pyxl was correctly installed, run the following command from the same directory:
+
+```sh
+python pyxl/examples/hello_world.py
+```
+
+You should see the string `<html><body>Hello World!</body></html>` printed out. Thats it! You're ready to use Pyxl.
+
+## Running the tests
+
+After installing pyxl:
+
+```sh
+easy_install unittest2
+python pyxl_tests.py
+```
+
+## How it works
+
+Pyxl converts HTML tags into python objects before the file is run through the interpreter, so the code that actually runs is regular python. For example, the `Hello World` example above is converted into:
+
+```py
+print x_head().append_children(x_body().append_children("Hello World!"))
+```
+
+Pyxl's usefulness comes from being able to write HTML rather than unwieldy object instantiations and function calls. Note that Pyxl automatically adds objects for all HTML tags to Python builtins, so there is no need to import `x_head` or `x_body` in the example above.
+
+The conversion to Python is relatively straightforward: Opening tags are converted into object instantiations for the respective tag, nested tags are passed in as arguments to the `append_children` method, and closing tags close the bracket to the `append_children` call. As a result, a big advantage of this is that stack traces on errors map directly to what you've written. To learn more about how Pyxl does this, see the **Implementation Details** section below.
+
+## Documentation
+
+All python files with inline HTML must have the following first line:
+
+```py
+# coding: pyxl
+```
+
+With that, you can start using HTML in your python file.
+
+### Inline Python Expressions
+
+Anything wrapped with {}'s is evaluated as a python expression. Please note that attribute values must be wrapped inside quotes, regardless of whether it contains a python expression or not. When used in attribute values, the python expression must evaluate to something that can be cast to unicode. When used inside a tag, the expression can evaluate to anything that can be cast to unicode, an HTML tag, or a list containing those two types. This is demonstrated in the example below:
+
+```py
+image_name = "bolton.png"
+image = <img src="/static/images/{image_name}" />
+
+text = "Michael Bolton"
+block = <div>{image}{text}</div>
+
+element_list = [image, text]
+block2 = <div>{element_list}</div>
+```
+
+### Dynamic Elements
+
+Pyxl converts tags into python objects in the background, which inherit from a class called [`x_base`](https://github.com/dropbox/pyxl/blob/master/pyxl/pyxl/base.py). This means that tags have certain methods you can call on them. Here is an example snippet that uses the `append` function to dynamically create an unordered list.
+
+```py
+items = ['Puppies', 'Dragons']
+nav = <ul />
+for text in items:
+    nav.append(<li>{text}</li>)
+```
+
+Another useful function is `children()`, which returns a list of all the child nodes for an element. `children()` accepts an optional selector string as an argument to filter the children. Currently, there is only support for filtering the children by a class (format: ".class_name"), id (format: "#id_string") or tag name. Here is a snippet which adds all `input` elements from an existing form to a new form:
+
+```py
+new_form = <form action="/submit" method="POST">{old_form.children("input")}</form>
+```
+
+### Attributes
+
+You can access any attribute of a tag as a member variable on the tag, or via the `attr(attr_name)` function. Setting attribute must happen via the `set_attr(attr_name, attr_value)` function i.e. do not set attrs by directly setting member variables. To access attributes that contain '-' (hypen) as a member variable, replace the hypen with '_' (underscore). For this reason, pyxl does not allow attributes with an underscore in their name. Here is an example that demonstrates all these principles:
+
+```py
+fruit = <div data-text="tangerine" />
+print fruit.data_text
+fruit.set_attr('data-text', 'clementine')
+print fruit.attr('data-text') # alternate method for accessing attributes
+```
+
+### Escaping
+
+Pyxl automatically escapes all data and attribute values, therefore all your markup is XSS safe by default. One can explicitly avoid escaping by wrapping data in a call to `rawhtml`, but that only applies to data inside a tag. Everything in attribute values is always escaped. Note that static text inside tags (i.e. anything not inside {}'s) is considered regular HTML and is not escaped.
+
+```py
+safe_value = "<b>Puppies!</b>"
+unsafe_value = "<script>bad();</script>"
+unsafe_attr = '">'
+print (<div class="{unsafe_attr}">
+           {unsafe_value}
+           {rawhtml(safe_value)}
+       </div>)
+```
+
+The above script will print out:
+
+```html
+<div class="">">
+    <script>bad();</script>
+    <b>Puppies!</b>
+</div>
+```
+
+### UI Modules
+
+UI Modules are especially useful for creating re-usable building blocks in your application, making it quicker to implement new features, and keeping the UI consistent. Pyxl thinks of UI modules as user defined HTML tags, and so they are used just like you would use a `<div>` or any other tag.
+
+Creating UI modules in Pyxl simply means creating a class that inherits from [`x_element`](https://github.com/dropboxe/pyxl/blob/master/pyxl/pyxl/element.py) and implements the `render()` method. Modules must be prefixed with `x_`. This is an arbitrary requirement, but is useful in separating out pyxl modules from other things.
+
+Arguments to a UI module are passed as attributes to the UI module tag. Attribute values for these tags need not evaluate to samething that can be cast to unicode, ONLY if the attribute value is a single python expression i.e. the only thing inside the quotes is a {} wrapped python expression. This allows one to pass in any type to a UI module. To demonstrate, a useful UI module is a user badge, which displays a user profile picture with the user's name and some arbitrary content to the right of it:
+
+```py
+# coding: pyxl
+from pyxl.element import x_element
+
+class x_user_badge(x_element):
+    __attrs__ = {
+        'user': object,
+    }
+    def render(self):
+        return (
+            <div>
+                <img src="{self.user.profile_picture}" style="float: left; margin-right: 10px;"/>
+                <div style="display: table-cell;">
+                    <div>{self.user.name}</div>
+                    {self.children()}
+                </div>
+            </div>)
+```
+
+This makes the tag `<user_badge>` available to us which accepts `user` as an attribute which is an object that contains the user's name and profile picture. Here is an example of this new UI module being used.
+
+```py
+# coding: pyxl
+from some_module import x_user_badge
+
+user = User.get(some_user_id)
+content = <div>Any arbitrary content...</div>
+print <user_badge user="{user}">{content}</user_badge>
+```
+
+Some things to note about UI modules.
+
+* Modules names must begin with `x_` and be an instance of `x_element`
+* Modules must specify the attributes they accept via the `__attrs__` class variable. This is a dictionary where the key is the attribute name, and the value is the attribute type. Passing an attribute that is not listed in `__attrs__` will result in an error. The only exceptions are attributes accepted by all pyxl elements i.e. id, class, style, onclick, title and anything prefixed with "data-" or "aria-"
+* Providing a `class` attribute for a UI module element will automatically append the class string to the underlying HTML element the UI module renders. This is useful when you want to style UI modules differently based on where it is being rendered.
+
+### Fragments
+
+The [`pyxl.html`](https://github.com/dropbox/pyxl/blob/master/pyxl/pyxl/html.py) module provides the `<frag>` tag, which allows one to group a set of HTML tags without a parent. Rendering the `<frag>` tag simply renders all the children, and doesn't add to the markup.
+
+### Conditional HTML
+
+Pyxl avoids support for logic within the HTML flow, except for one case where we found it especially useful: conditionally rendering HTML. That is why Pyxl provides the `<if>` tag, which takes an attr called `cond`. Children of an `<if>` are only rendered if `cond` evaluates to True.
+
+## Implementation Details
+
+### Parsing
+
+Pyxl uses support for specifying source code encodings as described in [PEP 263](http://www.python.org/dev/peps/pep-0263/) to do what it does. The functionality was originally provided so that python developers could write code in non-ascii languages (eg. chinese variable names). Pyxl creates a custom encoding called pyxl which allows it to convert XML into regular python before the file is compiled. Once the pyxl codec is registered, any file starting with `# coding: pyxl` is run through the pyxl parser before compilation.
+
+To register the pyxl codec, one must import the [`pyxl.codec.register`](https://github.com/dropbox/pyxl/blob/master/pyxl/pyxl/codec/register.py) module. The **Installation Process** makes it so that this always happens at python startup via the final `sudo python finish_install.py` step. What this step is doing is adding a file called `pyxl.pth` in your python site-packages directory, which imports the `pyxl.codec.register` module. Anything with a `.pth` extension in the site-packages directory is run automatically at python startup. Read more about that [here](http://docs.python.org/library/site.html).
+
+Some people may prefer avoiding adding pyxl.pth to their site-packages directory, in which case they should skip the final step of the installation process and explicitly import `pyxl.codec.register` in the entry point of their application.
+
+The pyxl encoding is a wrapper around utf-8, but every time it encounters a blob of HTML in the file, it runs it through python's [`HTMLParser`](http://docs.python.org/library/htmlparser.html) and replaces the HTML with python objects. As explained above, opening tags are converted into object instantiations for the respective tag, nested tags are passed in as arguments to the `append_children` method, and closing tags close the bracket to the `append_children` call. The code for these conversions can be seen [here](https://github.com/dropbox/pyxl/blob/master/pyxl/pyxl/codec/parser.py).
+
+### HTML Objects
+
+Though the syntactic sugar of being able to write HTML in python is pyxl's biggest usefulness, pyxl does also provide a basic framework for dealing with HTML tags as objects. This is not a full DOM implementation, but provides most of the necessary functionality. All the basic HTML tags are represented by objects defined in the [`pyxl.html`](https://github.com/dropbox/pyxl/blob/master/pyxl/pyxl/html.py) module, all of which inherit from the [`x_base`](https://github.com/dropbox/pyxl/blob/master/pyxl/pyxl/base.py) class.
+
+An HTML tag is rendered by calling the `to_string()` method (called automatically when tags are cast to strings), which recursively calls `to_string()` on all its children. Therefore, it should be noted that almost all the work happens only once `to_string()` is called. It is also at this stage where attribute values and data is escaped. Most of the work consists of string concatenations, and performance based on applications we've written is equivalent to templating engines like Cheetah. Note that there is probably some low hanging fruit in performance improvements that we haven't looked in to (mostly because it hasn't been a problem).
+
+## Editor Support
+
+### Emacs
+
+Grab pyxl-mode.el from the downloaded package under `pyxl/emacs/pyxl-mode.el` or copy it from [here](https://github.com/dropbox/pyxl/blob/master/emacs/pyxl-mode.el). To install, drop the file anywhere on your load path, and add the following to your ~/.emacs file (GNU Emacs) or ~/.xemacs/init.el file (XEmacs):
+
+```py
+(autoload 'pyxl-mode "pyxl-mode" "Major mode for editing pyxl" t)
+(setq auto-mode-alist
+     (cons '("\\.py\\'" . pyxl-mode) auto-mode-alist))
+```
+
+### Vim
+
+Pyxl detection, syntax, and indent files are in the `vim` directory. The easiest way to install the vim support is via [pathogen](https://github.com/tpope/vim-pathogen); with pathogen, you can simply link or copy the directory into your bundle directory. Without pathogen, place the various files in the corresponding subdirectories of your .vim directory.
+
+### Pycharm
+
+See [pycharm-pyxl](https://github.com/christoffer/pycharm-pyxl).
+
+### Sublime Text
+
+See [sublime-pyxl](https://github.com/yyjhao/sublime-pyxl).
diff --git a/lib/pyxl/emacs/pyxl-mode.el b/lib/pyxl/emacs/pyxl-mode.el
new file mode 100644
--- /dev/null
+++ b/lib/pyxl/emacs/pyxl-mode.el
@@ -0,0 +1,132 @@
+;;; pyxl-mode.el --- major mode for editing pyxl enabled Python
+;;;
+;;; @author Akhil Wable
+;;;
+;;; To install, drop this anywhere on your load path, and add the following to
+;;; your ~/.emacs file (GNU Emacs) or ~/.xemacs/init.el file (XEmacs):
+;;;
+;;; (autoload 'pyxl-mode "pyxl-mode" "Major mode for editing pyxl" t)
+;;; (setq auto-mode-alist
+;;;      (cons '("\\.py\\'" . pyxl-mode) auto-mode-alist))
+;;;
+
+(require 'cl)
+(require 'python)
+
+(defcustom pyxl-mode-hook nil
+  "list of functions to be executed on entry to pyxl-mode."
+  :type 'hook
+  :group 'python)
+
+(defun pyxl-context-p ()
+  "Does the range include some HTML?"
+  (let ((start-rexp "([ \n\t]*<")
+        (end-rexp ">[ \n\t]*)"))
+    (let ((backward-start (save-excursion (re-search-backward start-rexp nil t)))
+          (backward-end (save-excursion (re-search-backward end-rexp nil t))))
+      (if (and backward-start
+               (or (not backward-end) (< backward-end backward-start)))
+          backward-start
+        nil))))
+
+(defun pyxl-back-to-indentation ()
+  (let ((first-non-indent
+         (save-excursion
+           (back-to-indentation)
+           (point))))
+    (if (< (point) first-non-indent)
+        (back-to-indentation))))
+
+(defun pyx-indent-line-helper ()
+  "Indent a line containing html."
+  ;; nesting regex matches either an opening tag OR a closing tag
+  (let ((nesting-regex "\\(<[:a-zA-Z][:a-zA-Z0-9_]*\\)\\|\\(</\\|/>\\)")
+        (indent-from (line-beginning-position))
+        (depth 1))
+    (save-excursion
+      (re-search-backward "([ \n\t]*<" nil t)
+      (let ((starting-indent (current-indentation)))
+        (while (and (< (point) indent-from)
+                    (re-search-forward nesting-regex indent-from t))
+          (if (match-string 1) (incf depth))
+          (if (match-string 2) (decf depth)))
+        (goto-char indent-from)
+        (indent-line-to
+         (+ starting-indent
+            (* 4 depth)
+            (if (looking-at "[ \t]*\\(?:</\\|/>\\)") -4 0)))))
+    (pyxl-back-to-indentation)))
+
+(defun pyxl-indent-line ()
+  "Modify indent for a line of html."
+  (interactive)
+  (save-excursion
+    (if (pyxl-context-p)
+     ;; If a line is inside html, use the custom indent function
+        (pyx-indent-line-helper)
+    ;; Fall back to regular python indentation for no html
+    (python-indent-line)))
+
+  (pyxl-back-to-indentation))
+
+(defun pyxl-indent-region (start end)
+  (save-excursion
+    (goto-char end)
+    (setq end (point-marker))
+    (goto-char start)
+    (or (bolp) (forward-line 1))
+    (while (< (point) end)
+      (or (and (bolp) (eolp))
+          (pyxl-indent-line))
+      (forward-line 1))
+    (move-marker end nil)))
+
+(defcustom pyxl-default-face 'default
+  "Default face in pyxl-mode buffers."
+  :type 'face
+  :group 'pyxl-mode)
+
+(defconst pyxl-font-lock-keywords
+  (append
+   (list
+    ;; tags
+    '("\\(</?\\)\\([:a-zA-Z0-9_]+\\)" (1 pyxl-default-face) (2 font-lock-function-name-face))
+
+    ;; comments
+    '("<!--[^>]*-->" (0 font-lock-comment-face))
+
+    ;; XML entities
+    '("&\\w+;" . font-lock-constant-face)
+    )
+   python-font-lock-keywords)
+  "Font Lock for pyxl mode.")
+
+;;;###autoload
+(define-derived-mode pyxl-mode python-mode "pyxl"
+  "Major mode for editing Python code with pyxl."
+
+  ;; Adapted from python-mode.el
+  (set (make-local-variable 'font-lock-defaults)
+       '(pyxl-font-lock-keywords
+         nil
+         nil
+         nil
+         nil
+         '(font-lock-syntactic-keywords . python-font-lock-syntactic-keywords)
+         ;; This probably isn't worth it.
+         ;; (font-lock-syntactic-face-function
+         ;;  . python-font-lock-syntactic-face-function)
+         ))
+
+  (setq indent-line-function 'pyxl-indent-line)
+  (setq indent-region-function 'pyxl-indent-region)
+  (run-hooks 'pyxl-mode-hook))
+
+(provide 'pyxl-mode)
+
+;; In python-mode.el RET is bound to newline-and-indent, which indents the next line if necessary.
+;; In python.el which we're extending, this is bound to C-j instead.
+;; This binds RET to newline-and-indent
+(add-hook
+ 'python-mode-hook
+ '(lambda () (define-key python-mode-map "\C-m" 'newline-and-indent)))
diff --git a/lib/pyxl/finish_install.py b/lib/pyxl/finish_install.py
new file mode 100644
--- /dev/null
+++ b/lib/pyxl/finish_install.py
@@ -0,0 +1,5 @@
+import shutil
+from distutils.sysconfig import get_python_lib
+
+python_lib = get_python_lib()
+shutil.copy('pyxl.pth', python_lib)
diff --git a/lib/pyxl/pyxl.pth b/lib/pyxl/pyxl.pth
new file mode 100644
--- /dev/null
+++ b/lib/pyxl/pyxl.pth
@@ -0,0 +1,1 @@
+import pyxl.codec.register
diff --git a/lib/pyxl/pyxl/__init__.py b/lib/pyxl/pyxl/__init__.py
new file mode 100755
--- /dev/null
+++ b/lib/pyxl/pyxl/__init__.py
@@ -0,0 +1,1 @@
+#!/usr/bin/env python
diff --git a/lib/pyxl/pyxl/base.py b/lib/pyxl/pyxl/base.py
new file mode 100755
--- /dev/null
+++ b/lib/pyxl/pyxl/base.py
@@ -0,0 +1,245 @@
+#!/usr/bin/env python
+
+# We want a way to generate non-colliding 'pyxl<num>' ids for elements, so we're
+# using a non-cryptographically secure random number generator. We want it to be
+# insecure because these aren't being used for anything cryptographic and it's
+# much faster (2x). We're also not using NumPy (which is even faster) because
+# it's a difficult dependency to fulfill purely to generate random numbers.
+import random
+import sys
+
+from pyxl.utils import escape
+
+class PyxlException(Exception):
+    pass
+
+class x_base_metaclass(type):
+    def __init__(self, name, parents, attrs):
+        super(x_base_metaclass, self).__init__(name, parents, attrs)
+        x_base_parents = [parent for parent in parents if hasattr(parent, '__attrs__')]
+        parent_attrs = x_base_parents[0].__attrs__ if len(x_base_parents) else {}
+        self_attrs = self.__dict__.get('__attrs__', {})
+
+        # Dont allow '_' in attr names
+        for attr_name in self_attrs:
+            assert '_' not in attr_name, (
+                "%s: '_' not allowed in attr names, use '-' instead" % attr_name)
+
+        combined_attrs = dict(parent_attrs)
+        combined_attrs.update(self_attrs)
+        setattr(self, '__attrs__', combined_attrs)
+        setattr(self, '__tag__', name[2:])
+
+class x_base(object):
+
+    __metaclass__ = x_base_metaclass
+    __attrs__ = {
+        # HTML attributes
+        'accesskey': unicode,
+        'class': unicode,
+        'dir': unicode,
+        'id': unicode,
+        'lang': unicode,
+        'maxlength': unicode,
+        'role': unicode,
+        'style': unicode,
+        'tabindex': int,
+        'title': unicode,
+        'xml:lang': unicode,
+
+        # Microdata HTML attributes
+        'itemtype': unicode,
+        'itemscope': unicode,
+        'itemprop': unicode,
+        'itemid': unicode,
+        'itemref': unicode,
+
+        # JS attributes
+        'onabort': unicode,
+        'onblur': unicode,
+        'onchange': unicode,
+        'onclick': unicode,
+        'ondblclick': unicode,
+        'onerror': unicode,
+        'onfocus': unicode,
+        'onkeydown': unicode,
+        'onkeypress': unicode,
+        'onkeyup': unicode,
+        'onload': unicode,
+        'onmousedown': unicode,
+        'onmouseenter': unicode,
+        'onmouseleave': unicode,
+        'onmousemove': unicode,
+        'onmouseout': unicode,
+        'onmouseover': unicode,
+        'onmouseup': unicode,
+        'onreset': unicode,
+        'onresize': unicode,
+        'onselect': unicode,
+        'onsubmit': unicode,
+        'onunload': unicode,
+        }
+
+    def __init__(self, **kwargs):
+        self.__attributes__ = {}
+        self.__children__ = []
+
+        for name, value in kwargs.iteritems():
+            self.set_attr(x_base._fix_attribute_name(name), value)
+
+    def __call__(self, *children):
+        self.append_children(children)
+        return self
+
+    def get_id(self):
+        eid = self.attr('id')
+        if not eid:
+            eid = 'pyxl%d' % random.randint(0, sys.maxint)
+            self.set_attr('id', eid)
+        return eid
+
+    def children(self, selector=None, exclude=False):
+        if not selector:
+            return self.__children__
+
+        # filter by class
+        if selector[0] == '.':
+            select = lambda x: selector[1:] in x.get_class() 
+
+        # filter by id
+        elif selector[0] == '#':
+            select = lambda x: selector[1:] == x.get_id()
+
+        # filter by tag name
+        else:
+            select = lambda x: x.__class__.__name__ == ('x_%s' % selector)
+
+        if exclude:
+            func = lambda x: not select(x)
+        else:
+            func = select
+
+        return filter(func, self.__children__)
+
+    def append(self, child):
+        if type(child) in (list, tuple) or hasattr(child, '__iter__'):
+            self.__children__.extend(c for c in child if c is not None and c is not False)
+        elif child is not None and child is not False:
+            self.__children__.append(child)
+
+    def prepend(self, child):
+        if child is not None and child is not False:
+            self.__children__.insert(0, child)
+
+    def __getattr__(self, name):
+        return self.attr(name.replace('_', '-'))
+
+    def attr(self, name, default=None):
+        # this check is fairly expensive (~8% of cost)
+        if not self.allows_attribute(name):
+            raise PyxlException('<%s> has no attr named "%s"' % (self.__tag__, name))
+
+        value = self.__attributes__.get(name)
+
+        if value is not None:
+            return value
+
+        attr_type = self.__attrs__.get(name, unicode)
+        if type(attr_type) == list:
+            if not attr_type:
+                raise PyxlException('Invalid attribute definition')
+
+            if None in attr_type[1:]:
+                raise PyxlException('None must be the first, default value')
+
+            return attr_type[0]
+
+        return default
+
+    def transfer_attributes(self, element):
+        for name, value in self.__attributes__.iteritems():
+            if element.allows_attribute(name) and element.attr(name) is None:
+                element.set_attr(name, value)
+
+    def set_attr(self, name, value):
+        # this check is fairly expensive (~8% of cost)
+        if not self.allows_attribute(name):
+            raise PyxlException('<%s> has no attr named "%s"' % (self.__tag__, name))
+
+        if value is not None:
+            attr_type = self.__attrs__.get(name, unicode)
+
+            if type(attr_type) == list:
+                # support for enum values in pyxl attributes
+                values_enum = attr_type
+                assert values_enum, 'Invalid attribute definition'
+
+                if value not in values_enum:
+                    msg = '%s: %s: incorrect value "%s" for "%s". Expecting enum value %s' % (
+                        self.__tag__, self.__class__.__name__, value, name, values_enum)
+                    raise PyxlException(msg)
+
+            else:
+                try:
+                    # Validate type of attr and cast to correct type if possible
+                    value = value if isinstance(value, attr_type) else attr_type(value)
+                except Exception:
+                    exc_type, exc_obj, exc_tb = sys.exc_info()
+                    msg = '%s: %s: incorrect type for "%s". expected %s, got %s' % (
+                        self.__tag__, self.__class__.__name__, name, attr_type, type(value))
+                    exception = PyxlException(msg)
+                    raise exception, None, exc_tb
+
+            self.__attributes__[name] = value
+
+        elif name in self.__attributes__:
+            del self.__attributes__[name]
+
+    def get_class(self):
+        return self.attr('class', '')
+
+    def add_class(self, xclass):
+        if not xclass: return
+        current_class = self.attr('class')
+        if current_class: current_class += ' ' + xclass
+        else: current_class = xclass
+        self.set_attr('class', current_class)
+
+    def append_children(self, children):
+        for child in children:
+            self.append(child)
+
+    def attributes(self):
+        return self.__attributes__
+
+    def set_attributes(self, attrs_dict):
+        for name, value in attrs_dict.iteritems():
+            self.set_attr(name, value)
+
+    def allows_attribute(self, name):
+        return (name in self.__attrs__ or name.startswith('data-') or name.startswith('aria-'))
+
+    def to_string(self):
+        l = []
+        self._to_list(l)
+        return u''.join(l)
+
+    def _to_list(self, l):
+        raise NotImplementedError()
+
+    def __str__(self):
+        return self.to_string()
+
+    def __unicode__(self):
+        return self.to_string()
+
+    @staticmethod
+    def _render_child_to_list(child, l):
+        if isinstance(child, x_base): child._to_list(l)
+        elif child is not None: l.append(escape(child))
+
+    @staticmethod
+    def _fix_attribute_name(name):
+        if name == 'xclass': return 'class'
+        if name == 'xfor': return 'for'
+        return name.replace('_', '-').replace('COLON', ':')
diff --git a/lib/pyxl/pyxl/browser_hacks.py b/lib/pyxl/pyxl/browser_hacks.py
new file mode 100644
--- /dev/null
+++ b/lib/pyxl/pyxl/browser_hacks.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from pyxl.base import x_base
+from pyxl.utils import escape
+
+class x_cond_comment(x_base):
+    __attrs__ = {
+        'cond': unicode,
+        }
+
+    def _to_list(self, l):
+        # allow '&', escape everything else from cond
+        cond = self.__attributes__.get('cond', '')
+        cond = '&'.join(map(escape, cond.split('&')))
+
+        l.extend((u'<!--[if ', cond, u']>'))
+
+        for child in self.__children__:
+            x_base._render_child_to_list(child, l)
+
+        l.append(u'<![endif]-->')
+
+class x_cond_noncomment(x_base):
+    ''' This is a conditional comment where browsers which don't support conditional comments
+        will parse the children by default. '''
+    __attrs__ = {
+        'cond': unicode,
+        }
+
+    def _to_list(self, l):
+        # allow '&', escape everything else from cond
+        cond = self.__attributes__.get('cond', '')
+        cond = '&'.join(map(escape, cond.split('&')))
+
+        l.extend((u'<!--[if ', cond, u']><!-->'))
+
+        for child in self.__children__:
+            x_base._render_child_to_list(child, l)
+
+        l.append(u'<!--<![endif]-->')
+
diff --git a/lib/pyxl/pyxl/codec/__init__.py b/lib/pyxl/pyxl/codec/__init__.py
new file mode 100755
--- /dev/null
+++ b/lib/pyxl/pyxl/codec/__init__.py
@@ -0,0 +1,1 @@
+#!/usr/bin/env python
diff --git a/lib/pyxl/pyxl/codec/html_tokenizer.py b/lib/pyxl/pyxl/codec/html_tokenizer.py
new file mode 100644
--- /dev/null
+++ b/lib/pyxl/pyxl/codec/html_tokenizer.py
@@ -0,0 +1,416 @@
+"""
+A naive but strict HTML tokenizer. Based directly on
+http://www.w3.org/TR/2011/WD-html5-20110525/tokenization.html
+
+In the ATTRIBUTE_VALUE and BEFORE_ATTRIBUTE_VALUE states, python tokens are accepted.
+"""
+
+import sys
+from collections import OrderedDict
+
+class State(object):
+    DATA = 1
+    # unused states: charrefs, RCDATA, script, RAWTEXT, PLAINTEXT
+    TAG_OPEN = 7
+    END_TAG_OPEN = 8
+    TAG_NAME = 9
+    # unused states: RCDATA, RAWTEXT, script
+    BEFORE_ATTRIBUTE_NAME = 34
+    ATTRIBUTE_NAME = 35
+    AFTER_ATTRIBUTE_NAME = 36
+    BEFORE_ATTRIBUTE_VALUE = 37
+    ATTRIBUTE_VALUE_DOUBLE_QUOTED = 38
+    ATTRIBUTE_VALUE_SINGLE_QUOTED = 39
+    ATTRIBUTE_VALUE_UNQUOTED = 40
+    # unused state: CHARREF_IN_ATTRIBUTE_VALUE = 41
+    AFTER_ATTRIBUTE_VALUE = 42
+    SELF_CLOSING_START_TAG = 43
+    # unused state: BOGUS_COMMENT_STATE = 44
+    MARKUP_DECLARATION_OPEN = 45
+    COMMENT_START = 46
+    COMMENT_START_DASH = 47
+    COMMENT = 48
+    COMMENT_END_DASH = 49
+    COMMENT_END = 50
+    # unused state: COMMENT_END_BANG = 51
+    DOCTYPE = 52
+    DOCTYPE_CONTENTS = 53 # Gross oversimplification. Not to spec.
+    # unused states: doctypes
+    CDATA_SECTION = 68
+
+    @classmethod
+    def state_name(cls, state_val):
+        for k, v in cls.__dict__.iteritems():
+            if v == state_val:
+                return k
+        assert False, "impossible state value %r!" % state_val
+
+class Tag(object):
+    def __init__(self):
+        self.tag_name = None
+        self.attrs = OrderedDict()
+        self.endtag = False
+        self.startendtag = False
+
+class ParseError(Exception):
+    pass
+
+class BadCharError(Exception):
+    def __init__(self, state, char):
+        super(BadCharError, self).__init__("unexpected character %r in state %r" %
+                                           (char, State.state_name(state)))
+
+class Unimplemented(Exception):
+    pass
+
+class HTMLTokenizer(object):
+
+    def __init__(self):
+        self.state = State.DATA
+
+        # attribute_value is a list, where each element is either a string or a list of python
+        # tokens.
+
+        self.data = ""
+        self.tag = None
+        self.tag_name = None
+        self.attribute_name = None
+        self.attribute_value = None
+        self.markup_declaration_buffer = None
+
+    def handle_data(self, data):
+        assert False, "subclass should override"
+
+    def handle_starttag(self, tag_name, attrs):
+        assert False, "subclass should override"
+
+    def handle_startendtag(self, tag_name, attrs):
+        assert False, "subclass should override"
+
+    def handle_endtag(self, tag_name):
+        assert False, "subclass should override"
+
+    def handle_comment(self, tag_name):
+        assert False, "subclass should override"
+
+    def handle_doctype(self, data):
+        assert False, "subclass should override"
+
+    def handle_cdata(self, tag_name):
+        assert False, "subclass should override"
+
+    def emit_data(self):
+        self.handle_data(self.data)
+        self.data = ""
+
+    def emit_tag(self):
+        if self.tag.startendtag and self.tag.endtag:
+            raise ParseError("both startendtag and endtag!?")
+        if self.tag.startendtag:
+            self.handle_startendtag(self.tag.tag_name, self.tag.attrs)
+        elif self.tag.endtag:
+            self.handle_endtag(self.tag.tag_name)
+        else:
+            self.handle_starttag(self.tag.tag_name, self.tag.attrs)
+
+    def emit_comment(self):
+        self.handle_comment(self.data)
+        self.data = ""
+
+    def emit_doctype(self):
+        self.handle_doctype(self.data)
+        self.data = ""
+
+    def emit_cdata(self):
+        self.handle_cdata(self.data)
+        self.data = ""
+
+    def got_attribute(self):
+        if self.attribute_name in self.tag.attrs:
+            raise ParseError("repeat attribute name %r" % self.attribute_name)
+        self.tag.attrs[self.attribute_name] = self.attribute_value
+        self.attribute_name = None
+        self.attribute_value = None
+
+    def add_data_char(self, build, c):
+        """ For adding a new character to e.g. an attribute value """
+        if len(build) and type(build[-1]) == str:
+            build[-1] += c
+        else:
+            build.append(c)
+
+    def feed(self, c):
+        if self.state == State.DATA:
+            if c == '<':
+                self.emit_data()
+                self.state = State.TAG_OPEN
+            # Pass through; it's the browser's problem to understand these.
+            #elif c == '&':
+            #    raise Unimplemented
+            else:
+                self.data += c
+
+        elif self.state == State.TAG_OPEN:
+            self.tag = Tag()
+            if c == '!':
+                self.markup_declaration_buffer = ""
+                self.state = State.MARKUP_DECLARATION_OPEN
+            elif c == '/':
+                self.state = State.END_TAG_OPEN
+            elif c.isalpha():
+                self.tag.tag_name = c
+                self.state = State.TAG_NAME
+            else:
+                raise BadCharError(self.state, c)
+
+        elif self.state == State.END_TAG_OPEN:
+            self.tag.endtag = True
+            if c.isalpha():
+                self.tag.tag_name = c
+                self.state = State.TAG_NAME
+            else:
+                raise BadCharError(self.state, c)
+
+        elif self.state == State.TAG_NAME:
+            if c in '\t\n\f ':
+                self.state = State.BEFORE_ATTRIBUTE_NAME
+            elif c == '/':
+                self.state = State.SELF_CLOSING_START_TAG
+            elif c == '>':
+                self.emit_tag()
+                self.state = State.DATA
+            else:
+                self.tag.tag_name += c
+
+        elif self.state == State.BEFORE_ATTRIBUTE_NAME:
+            if c in '\t\n\f ':
+                pass
+            elif c == '/':
+                self.state = State.SELF_CLOSING_START_TAG
+            elif c == '>':
+                self.emit_tag()
+                self.state = State.DATA
+            elif c in "\"'<=":
+                raise BadCharError(self.state, c)
+            else:
+                self.attribute_name = c.lower()
+                self.state = State.ATTRIBUTE_NAME
+
+        elif self.state == State.ATTRIBUTE_NAME:
+            if c in '\t\n\f ':
+                self.state = State.AFTER_ATTRIBUTE_NAME
+            elif c == '/':
+                self.got_attribute()
+                self.state = State.SELF_CLOSING_START_TAG
+            elif c == '=':
+                self.state = State.BEFORE_ATTRIBUTE_VALUE
+            elif c == '>':
+                self.emit_tag()
+                self.state = State.DATA
+            elif c in "\"'<":
+                raise BadCharError(self.state, c)
+            else:
+                self.attribute_name += c.lower()
+
+        elif self.state == State.AFTER_ATTRIBUTE_NAME:
+            if c in '\t\n\f ':
+                pass
+            elif c == '/':
+                self.got_attribute()
+                self.state = State.SELF_CLOSING_START_TAG
+            elif c == '=':
+                self.state = State.BEFORE_ATTRIBUTE_VALUE
+            elif c == '>':
+                self.got_attribute()
+                self.emit_tag()
+                self.state = State.DATA
+            elif c in "\"'<":
+                raise BadCharError(self.state, c)
+
+        elif self.state == State.BEFORE_ATTRIBUTE_VALUE:
+            if c in '\t\n\f ':
+                pass
+            elif c == '"':
+                self.attribute_value = []
+                self.state = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED
+            elif c == '&':
+                self.attribute_value = []
+                self.state = State.ATTRIBUTE_VALUE_UNQUOTED
+                self.feed(c) # rehandle c
+            elif c == "'":
+                self.attribute_value = []
+                self.state = State.ATTRIBUTE_VALUE_SINGLE_QUOTED
+            elif c in '><=`':
+                raise BadCharError(self.state, c)
+            else:
+                self.attribute_value = [c]
+                self.state = State.ATTRIBUTE_VALUE_UNQUOTED
+
+        elif self.state == State.ATTRIBUTE_VALUE_DOUBLE_QUOTED:
+            if c == '"':
+                self.state = State.AFTER_ATTRIBUTE_VALUE
+            # Pass through; it's the browser's problem to understand these.
+            #elif c == '&':
+            #    raise Unimplemented
+            else:
+                self.add_data_char(self.attribute_value, c)
+
+        elif self.state == State.ATTRIBUTE_VALUE_SINGLE_QUOTED:
+            if c == "'":
+                self.state = State.AFTER_ATTRIBUTE_VALUE
+            # Pass through; it's the browser's problem to understand these.
+            #elif c == '&':
+            #    raise Unimplemented
+            else:
+                self.add_data_char(self.attribute_value, c)
+
+        elif self.state == State.ATTRIBUTE_VALUE_UNQUOTED:
+            if c in '\t\n\f ':
+                self.got_attribute()
+                self.state = State.BEFORE_ATTRIBUTE_NAME
+            elif c == '>':
+                self.got_attribute()
+                self.emit_tag()
+                self.state = State.DATA
+            elif c in "\"'<=`":
+                raise BadCharError(self.state, c)
+            # Pass through; it's the browser's problem to understand these.
+            #elif c == '&':
+            #    raise Unimplemented
+            else:
+                self.add_data_char(self.attribute_value, c)
+
+        elif self.state == State.AFTER_ATTRIBUTE_VALUE:
+            self.got_attribute()
+            if c in '\t\n\f ':
+                self.state = State.BEFORE_ATTRIBUTE_NAME
+            elif c == '/':
+                self.state = State.SELF_CLOSING_START_TAG
+            elif c == '>':
+                self.emit_tag()
+                self.state = State.DATA
+            else:
+                raise BadCharError(self.state, c)
+
+        elif self.state == State.SELF_CLOSING_START_TAG:
+            self.tag.startendtag = True
+            if c == '>':
+                self.emit_tag()
+                self.state = State.DATA
+            else:
+                raise BadCharError(self.state, c)
+
+        elif self.state == State.MARKUP_DECLARATION_OPEN:
+            self.markup_declaration_buffer += c
+            if self.markup_declaration_buffer == "--":
+                self.data = ""
+                self.state = State.COMMENT_START
+            elif self.markup_declaration_buffer.lower() == "DOCTYPE".lower():
+                self.state = State.DOCTYPE
+            elif self.markup_declaration_buffer == "[CDATA[":
+                self.data = ""
+                self.cdata_buffer = ""
+                self.state = State.CDATA_SECTION
+            elif not ("--".startswith(self.markup_declaration_buffer) or
+                      "DOCTYPE".lower().startswith(self.markup_declaration_buffer.lower()) or
+                      "[CDATA[".startswith(self.markup_declaration_buffer)):
+                raise BadCharError(self.state, c)
+
+        elif self.state == State.COMMENT_START:
+            if c == "-":
+                self.state = State.COMMENT_START_DASH
+            elif c == ">":
+                raise BadCharError(self.state, c)
+            else:
+                self.data += c
+                self.state = State.COMMENT
+
+        elif self.state == State.COMMENT_START_DASH:
+            if c == "-":
+                self.state = State.COMMENT_END
+            elif c == ">":
+                raise BadCharError(self.state, c)
+            else:
+                self.data += "-" + c
+                self.state = State.COMMENT
+
+        elif self.state == State.COMMENT:
+            if c == "-":
+                self.state = State.COMMENT_END_DASH
+            else:
+                self.data += c
+
+        elif self.state == State.COMMENT_END_DASH:
+            if c == "-":
+                self.state = State.COMMENT_END
+            else:
+                self.data += "-" + c
+                self.state = State.COMMENT
+
+        elif self.state == State.COMMENT_END:
+            if c == ">":
+                self.emit_comment()
+                self.state = State.DATA
+            else:
+                raise BadCharError(self.state, c)
+
+        elif self.state == State.DOCTYPE:
+            if c in "\t\n\f ":
+                self.data = ""
+                self.state = State.DOCTYPE_CONTENTS
+            else:
+                raise BadCharError(self.state, c)
+
+        elif self.state == State.DOCTYPE_CONTENTS:
+            if c == ">":
+                self.emit_doctype()
+                self.state = State.DATA
+            else:
+                self.data += c
+
+        elif self.state == State.CDATA_SECTION:
+            self.cdata_buffer += c
+            if self.cdata_buffer == "]]>":
+                self.emit_cdata()
+                self.state = State.DATA
+            else:
+                while self.cdata_buffer and not "]]>".startswith(self.cdata_buffer):
+                    self.data += self.cdata_buffer[0]
+                    self.cdata_buffer = self.cdata_buffer[1:]
+
+        else:
+            assert False, "bad state! %r" % self.state
+
+    def feed_python(self, tokens):
+        if self.state == State.BEFORE_ATTRIBUTE_VALUE:
+            self.attribute_value = [tokens]
+            self.state = State.ATTRIBUTE_VALUE_UNQUOTED
+        elif self.state in [State.ATTRIBUTE_VALUE_DOUBLE_QUOTED,
+                            State.ATTRIBUTE_VALUE_SINGLE_QUOTED,
+                            State.ATTRIBUTE_VALUE_UNQUOTED]:
+            self.attribute_value.append(tokens)
+        else:
+            raise ParseError("python not allow in state %r" % State.state_name(self.state))
+
+class HTMLTokenDumper(HTMLTokenizer):
+    def handle_data(self, data):
+        print "DATA %r" % data
+
+    def handle_starttag(self, tag_name, attrs):
+        print "STARTTAG %r %r" % (tag_name, attrs)
+
+    def handle_startendtag(self, tag_name, attrs):
+        print "STARTENDTAG %r %r" % (tag_name, attrs)
+
+    def handle_endtag(self, tag_name):
+        print "ENDTAG %r" % tag_name
+
+def main(filename):
+    dumper = HTMLTokenDumper()
+    with open(filename) as f:
+        for line in f:
+            for c in line:
+                dumper.feed(c)
+
+if __name__ == "__main__":
+    main(*sys.argv[1:])
diff --git a/lib/pyxl/pyxl/codec/parser.py b/lib/pyxl/pyxl/codec/parser.py
new file mode 100755
--- /dev/null
+++ b/lib/pyxl/pyxl/codec/parser.py
@@ -0,0 +1,284 @@
+#!/usr/bin/env python
+
+import tokenize
+from pyxl import html
+from html_tokenizer import (
+        HTMLTokenizer,
+        ParseError as TokenizerParseError,
+        State,
+)
+from pytokenize import Untokenizer
+
+class ParseError(Exception):
+    def __init__(self, message, pos=None):
+        if pos is not None:
+            super(ParseError, self).__init__("%s at line %d char %d" % ((message,) + pos))
+        else:
+            super(ParseError, self).__init__(message)
+
+class PyxlParser(HTMLTokenizer):
+    def __init__(self, row, col):
+        super(PyxlParser, self).__init__()
+        self.start = self.end = (row, col)
+        self.output = []
+        self.open_tags = []
+        self.remainder = None
+        self.next_thing_is_python = False
+        self.last_thing_was_python = False
+        self.last_thing_was_close_if_tag = False
+
+    def feed(self, token):
+        ttype, tvalue, tstart, tend, tline = token
+
+        assert tstart[0] >= self.end[0], "row went backwards"
+        if tstart[0] > self.end[0]:
+            self.output.append("\n" * (tstart[0] - self.end[0]))
+
+        # interpret jumps on the same line as a single space
+        elif tstart[1] > self.end[1]:
+            super(PyxlParser, self).feed(" ")
+
+        self.end = tstart
+
+        if ttype != tokenize.INDENT:
+            while tvalue and not self.done():
+                c, tvalue = tvalue[0], tvalue[1:]
+                if c == "\n":
+                    self.end = (self.end[0]+1, 0)
+                else:
+                    self.end = (self.end[0], self.end[1]+1)
+                try:
+                    super(PyxlParser, self).feed(c)
+                except TokenizerParseError:
+                    raise ParseError("HTML Parsing error", self.end)
+        if self.done():
+            self.remainder = (ttype, tvalue, self.end, tend, tline)
+        else:
+            self.end = tend
+
+    def feed_python(self, tokens):
+        ttype, tvalue, tstart, tend, tline = tokens[0]
+        assert tstart[0] >= self.end[0], "row went backwards"
+        if tstart[0] > self.end[0]:
+            self.output.append("\n" * (tstart[0] - self.end[0]))
+        ttype, tvalue, tstart, tend, tline = tokens[-1]
+        self.end = tend
+
+        if self.state in [State.DATA, State.CDATA_SECTION]:
+            self.next_thing_is_python = True
+            self.emit_data()
+            self.output.append("%s, " % Untokenizer().untokenize(tokens))
+            self.next_thing_is_python = False
+            self.last_thing_was_python = True
+        elif self.state in [State.BEFORE_ATTRIBUTE_VALUE,
+                            State.ATTRIBUTE_VALUE_DOUBLE_QUOTED,
+                            State.ATTRIBUTE_VALUE_SINGLE_QUOTED,
+                            State.ATTRIBUTE_VALUE_UNQUOTED]:
+            super(PyxlParser, self).feed_python(tokens)
+
+    def feed_position_only(self, token):
+        """update with any whitespace we might have missed, and advance position to after the
+        token"""
+        ttype, tvalue, tstart, tend, tline = token
+        self.feed((ttype, '', tstart, tstart, tline))
+        self.end = tend
+
+    def python_comment_allowed(self):
+        """Returns true if we're in a state where a # starts a comment.
+
+        <a # comment before attribute name
+           class="bar"# comment after attribute value
+           href="#notacomment">
+            # comment in data
+            Link text
+        </a>
+        """
+        return self.state in (State.DATA, State.TAG_NAME,
+                              State.BEFORE_ATTRIBUTE_NAME, State.AFTER_ATTRIBUTE_NAME,
+                              State.BEFORE_ATTRIBUTE_VALUE, State.AFTER_ATTRIBUTE_VALUE,
+                              State.COMMENT, State.DOCTYPE_CONTENTS, State.CDATA_SECTION)
+
+    def python_mode_allowed(self):
+        """Returns true if we're in a state where a { starts python mode.
+
+        <!-- {this isn't python} -->
+        """
+        return self.state not in (State.COMMENT,)
+
+    def feed_comment(self, token):
+        ttype, tvalue, tstart, tend, tline = token
+        self.feed((ttype, '', tstart, tstart, tline))
+        self.output.append(tvalue)
+        self.end = tend
+
+    def get_remainder(self):
+        return self.remainder
+
+    def done(self):
+        return len(self.open_tags) == 0 and self.state == State.DATA and self.output
+
+    def get_token(self):
+        return (tokenize.STRING, ''.join(self.output), self.start, self.end, '')
+
+    @staticmethod
+    def safe_attr_name(name):
+        if name == "class":
+            return "xclass"
+        if name == "for":
+            return "xfor"
+        return name.replace('-', '_').replace(':', 'COLON')
+
+    def _handle_attr_value(self, attr_value):
+        def format_parts():
+            prev_was_python = False
+            for i, part in enumerate(attr_value):
+                if type(part) == list:
+                    yield part
+                    prev_was_python = True
+                else:
+                    next_is_python = bool(i+1 < len(attr_value) and type(attr_value[i+1]) == list)
+                    part = self._normalize_data_whitespace(part, prev_was_python, next_is_python)
+                    if part:
+                        yield part
+                    prev_was_python = False
+
+        attr_value = list(format_parts())
+        if len(attr_value) == 1:
+            part = attr_value[0]
+            if type(part) == list:
+                self.output.append(Untokenizer().untokenize(part))
+            else:
+                self.output.append(repr(part))
+        else:
+            self.output.append('u"".join((')
+            for part in attr_value:
+                if type(part) == list:
+                    self.output.append('unicode(')
+                    self.output.append(Untokenizer().untokenize(part))
+                    self.output.append(')')
+                else:
+                    self.output.append(repr(part))
+                self.output.append(', ')
+            self.output.append('))')
+
+    @staticmethod
+    def _normalize_data_whitespace(data, prev_was_py, next_is_py):
+        if not data:
+            return ''
+        if '\n' in data and not data.strip():
+            if prev_was_py and next_is_py:
+                return ' '
+            else:
+                return ''
+        if prev_was_py and data.startswith('\n'):
+                data = " " + data.lstrip('\n')
+        if next_is_py and data.endswith('\n'):
+                data = data.rstrip('\n') + " "
+        data = data.strip('\n')
+        data = data.replace('\r', ' ')
+        data = data.replace('\n', ' ')
+        return data
+
+    def handle_starttag(self, tag, attrs, call=True):
+        self.open_tags.append({'tag':tag, 'row': self.end[0]})
+        if tag == 'if':
+            if len(attrs) != 1:
+                raise ParseError("if tag only takes one attr called 'cond'", self.end)
+            if 'cond' not in attrs:
+                raise ParseError("if tag must contain the 'cond' attr", self.end)
+
+            self.output.append('html._push_condition(bool(')
+            self._handle_attr_value(attrs['cond'])
+            self.output.append(')) and html.x_frag()(')
+            self.last_thing_was_python = False
+            self.last_thing_was_close_if_tag = False
+            return
+        elif tag == 'else':
+            if len(attrs) != 0:
+                raise ParseError("else tag takes no attrs", self.end)
+            if not self.last_thing_was_close_if_tag:
+                raise ParseError("<else> tag must come right after </if>", self.end)
+
+            self.output.append('(not html._last_if_condition) and html.x_frag()(')
+            self.last_thing_was_python = False
+            self.last_thing_was_close_if_tag = False
+            return
+
+        module, dot, identifier = tag.rpartition('.')
+        identifier = 'x_%s' % identifier
+        x_tag = module + dot + identifier
+
+        if hasattr(html, x_tag):
+            self.output.append('html.')
+        self.output.append('%s(' % x_tag)
+
+        first_attr = True
+        for attr_name, attr_value in attrs.iteritems():
+            if first_attr: first_attr = False
+            else: self.output.append(', ')
+
+            self.output.append(self.safe_attr_name(attr_name))
+            self.output.append('=')
+            self._handle_attr_value(attr_value)
+
+        self.output.append(')')
+        if call:
+            # start call to __call__
+            self.output.append('(')
+        self.last_thing_was_python = False
+        self.last_thing_was_close_if_tag = False
+
+    def handle_endtag(self, tag_name, call=True):
+        if call:
+            # finish call to __call__
+            self.output.append(")")
+
+        assert self.open_tags, "got </%s> but tag stack empty; parsing should be over!" % tag_name
+
+        open_tag = self.open_tags.pop()
+        if open_tag['tag'] != tag_name:
+            raise ParseError("<%s> on line %d closed by </%s> on line %d" %
+                             (open_tag['tag'], open_tag['row'], tag_name, self.end[0]))
+
+        if open_tag['tag'] == 'if':
+            self.output.append(',html._leave_if()')
+            self.last_thing_was_close_if_tag = True
+        else:
+            self.last_thing_was_close_if_tag = False
+
+        if len(self.open_tags):
+            self.output.append(",")
+        self.last_thing_was_python = False
+
+    def handle_startendtag(self, tag_name, attrs):
+        self.handle_starttag(tag_name, attrs, call=False)
+        self.handle_endtag(tag_name, call=False)
+
+    def handle_data(self, data):
+        data = self._normalize_data_whitespace(
+                data, self.last_thing_was_python, self.next_thing_is_python)
+        if not data:
+            return
+
+        # XXX XXX mimics old pyxl, but this is gross and likely wrong. I'm pretty sure we actually
+        # want %r instead of this crazy quote substitution and u"%s".
+        data = data.replace('"', '\\"')
+        self.output.append('html.rawhtml(u"%s"), ' % data)
+
+        self.last_thing_was_python = False
+        self.last_thing_was_close_if_tag = False
+
+    def handle_comment(self, data):
+        self.handle_startendtag("html_comment", {"comment": [data.strip()]})
+        self.last_thing_was_python = False
+        self.last_thing_was_close_if_tag = False
+
+    def handle_doctype(self, data):
+        self.handle_startendtag("html_decl", {"decl": ['DOCTYPE ' + data]})
+        self.last_thing_was_python = False
+        self.last_thing_was_close_if_tag = False
+
+    def handle_cdata(self, data):
+        self.handle_startendtag("html_marked_decl", {"decl": ['CDATA[' + data]})
+        self.last_thing_was_python = False
+        self.last_thing_was_close_if_tag = False
diff --git a/lib/pyxl/pyxl/codec/pytokenize.py b/lib/pyxl/pyxl/codec/pytokenize.py
new file mode 100644
--- /dev/null
+++ b/lib/pyxl/pyxl/codec/pytokenize.py
@@ -0,0 +1,468 @@
+"""Tokenization help for Python programs.
+
+generate_tokens(readline) is a generator that breaks a stream of
+text into Python tokens.  It accepts a readline-like method which is called
+repeatedly to get the next line of input (or "" for EOF).  It generates
+5-tuples with these members:
+
+    the token type (see token.py)
+    the token (a string)
+    the starting (row, column) indices of the token (a 2-tuple of ints)
+    the ending (row, column) indices of the token (a 2-tuple of ints)
+    the original line (string)
+
+It is designed to match the working of the Python tokenizer exactly, except
+that it produces COMMENT tokens for comments and gives type OP for all
+operators
+
+Older entry points
+    tokenize_loop(readline, tokeneater)
+    tokenize(readline, tokeneater=printtoken)
+are the same, except instead of generating tokens, tokeneater is a callback
+function to which the 5 fields described above are passed as 5 arguments,
+each time a new token is found.
+
+
+This file was taken from the python 2.7.4 library and modified for use by
+the Pyxl decoder. Changes made:
+    - When it encounters an unexpected EOF, the tokenizer does not raise an
+      exception, and instead yields an errortoken if appropriate.
+    - When it encounters an unexpected dedent, the tokenizer does not
+      raise an exception.
+    - The Untokenizer class was heavily modified.
+
+
+PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+--------------------------------------------
+
+1. This LICENSE AGREEMENT is between the Python Software Foundation
+("PSF"), and the Individual or Organization ("Licensee") accessing and
+otherwise using this software ("Python") in source or binary form and
+its associated documentation.
+
+2. Subject to the terms and conditions of this License Agreement, PSF hereby
+grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
+analyze, test, perform and/or display publicly, prepare derivative works,
+distribute, and otherwise use Python alone or in any derivative version,
+provided, however, that PSF's License Agreement and PSF's notice of copyright,
+i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+2011, 2012, 2013 Python Software Foundation; All Rights Reserved" are retained
+in Python alone or in any derivative version prepared by Licensee.
+
+3. In the event Licensee prepares a derivative work that is based on
+or incorporates Python or any part thereof, and wants to make
+the derivative work available to others as provided herein, then
+Licensee hereby agrees to include in any such work a brief summary of
+the changes made to Python.
+
+4. PSF is making Python available to Licensee on an "AS IS"
+basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
+INFRINGE ANY THIRD PARTY RIGHTS.
+
+5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
+OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+6. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+
+7. Nothing in this License Agreement shall be deemed to create any
+relationship of agency, partnership, or joint venture between PSF and
+Licensee.  This License Agreement does not grant permission to use PSF
+trademarks or trade name in a trademark sense to endorse or promote
+products or services of Licensee, or any third party.
+
+8. By copying, installing or otherwise using Python, Licensee
+agrees to be bound by the terms and conditions of this License
+Agreement.
+"""
+
+__author__ = 'Ka-Ping Yee <ping at lfw.org>'
+__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
+               'Skip Montanaro, Raymond Hettinger')
+
+import string, re
+from token import *
+
+import token
+__all__ = [x for x in dir(token) if not x.startswith("_")]
+__all__ += ["COMMENT", "tokenize", "generate_tokens", "NL", "untokenize"]
+del x
+del token
+
+COMMENT = N_TOKENS
+tok_name[COMMENT] = 'COMMENT'
+NL = N_TOKENS + 1
+tok_name[NL] = 'NL'
+N_TOKENS += 2
+
+def group(*choices): return '(' + '|'.join(choices) + ')'
+def any(*choices): return group(*choices) + '*'
+def maybe(*choices): return group(*choices) + '?'
+
+Whitespace = r'[ \f\t]*'
+Comment = r'#[^\r\n]*'
+Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
+Name = r'[a-zA-Z_]\w*'
+
+Hexnumber = r'0[xX][\da-fA-F]+[lL]?'
+Octnumber = r'(0[oO][0-7]+)|(0[0-7]*)[lL]?'
+Binnumber = r'0[bB][01]+[lL]?'
+Decnumber = r'[1-9]\d*[lL]?'
+Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
+Exponent = r'[eE][-+]?\d+'
+Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
+Expfloat = r'\d+' + Exponent
+Floatnumber = group(Pointfloat, Expfloat)
+Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
+Number = group(Imagnumber, Floatnumber, Intnumber)
+
+# Tail end of ' string.
+Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
+# Tail end of " string.
+Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
+# Tail end of ''' string.
+Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
+# Tail end of """ string.
+Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
+Triple = group("[uUbB]?[rR]?'''", '[uUbB]?[rR]?"""')
+# Single-line ' or " string.
+String = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
+               r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
+
+# Because of leftmost-then-longest match semantics, be sure to put the
+# longest operators first (e.g., if = came before ==, == would get
+# recognized as two instances of =).
+Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
+                 r"//=?",
+                 r"[+\-*/%&|^=<>]=?",
+                 r"~")
+
+Bracket = '[][(){}]'
+Special = group(r'\r?\n', r'[:;.,`@]')
+Funny = group(Operator, Bracket, Special)
+
+PlainToken = group(Number, Funny, String, Name)
+Token = Ignore + PlainToken
+
+# First (or only) line of ' or " string.
+ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
+                group("'", r'\\\r?\n'),
+                r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
+                group('"', r'\\\r?\n'))
+PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
+PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
+
+tokenprog, pseudoprog, single3prog, double3prog = map(
+    re.compile, (Token, PseudoToken, Single3, Double3))
+endprogs = {"'": re.compile(Single), '"': re.compile(Double),
+            "'''": single3prog, '"""': double3prog,
+            "r'''": single3prog, 'r"""': double3prog,
+            "u'''": single3prog, 'u"""': double3prog,
+            "ur'''": single3prog, 'ur"""': double3prog,
+            "R'''": single3prog, 'R"""': double3prog,
+            "U'''": single3prog, 'U"""': double3prog,
+            "uR'''": single3prog, 'uR"""': double3prog,
+            "Ur'''": single3prog, 'Ur"""': double3prog,
+            "UR'''": single3prog, 'UR"""': double3prog,
+            "b'''": single3prog, 'b"""': double3prog,
+            "br'''": single3prog, 'br"""': double3prog,
+            "B'''": single3prog, 'B"""': double3prog,
+            "bR'''": single3prog, 'bR"""': double3prog,
+            "Br'''": single3prog, 'Br"""': double3prog,
+            "BR'''": single3prog, 'BR"""': double3prog,
+            'r': None, 'R': None, 'u': None, 'U': None,
+            'b': None, 'B': None}
+
+triple_quoted = {}
+for t in ("'''", '"""',
+          "r'''", 'r"""', "R'''", 'R"""',
+          "u'''", 'u"""', "U'''", 'U"""',
+          "ur'''", 'ur"""', "Ur'''", 'Ur"""',
+          "uR'''", 'uR"""', "UR'''", 'UR"""',
+          "b'''", 'b"""', "B'''", 'B"""',
+          "br'''", 'br"""', "Br'''", 'Br"""',
+          "bR'''", 'bR"""', "BR'''", 'BR"""'):
+    triple_quoted[t] = t
+single_quoted = {}
+for t in ("'", '"',
+          "r'", 'r"', "R'", 'R"',
+          "u'", 'u"', "U'", 'U"',
+          "ur'", 'ur"', "Ur'", 'Ur"',
+          "uR'", 'uR"', "UR'", 'UR"',
+          "b'", 'b"', "B'", 'B"',
+          "br'", 'br"', "Br'", 'Br"',
+          "bR'", 'bR"', "BR'", 'BR"' ):
+    single_quoted[t] = t
+
+tabsize = 8
+
+class TokenError(Exception): pass
+
+class StopTokenizing(Exception): pass
+
+def printtoken(type, token, srow_scol, erow_ecol, line): # for testing
+    srow, scol = srow_scol
+    erow, ecol = erow_ecol
+    print "%d,%d-%d,%d:\t%s\t%s" % \
+        (srow, scol, erow, ecol, tok_name[type], repr(token))
+
+def tokenize(readline, tokeneater=printtoken):
+    """
+    The tokenize() function accepts two parameters: one representing the
+    input stream, and one providing an output mechanism for tokenize().
+
+    The first parameter, readline, must be a callable object which provides
+    the same interface as the readline() method of built-in file objects.
+    Each call to the function should return one line of input as a string.
+
+    The second parameter, tokeneater, must also be a callable object. It is
+    called once for each token, with five arguments, corresponding to the
+    tuples generated by generate_tokens().
+    """
+    try:
+        tokenize_loop(readline, tokeneater)
+    except StopTokenizing:
+        pass
+
+# backwards compatible interface
+def tokenize_loop(readline, tokeneater):
+    for token_info in generate_tokens(readline):
+        tokeneater(*token_info)
+
+class Untokenizer:
+
+    # PYXL MODIFICATION: This entire class.
+
+    def __init__(self, row=None, col=None):
+        self.tokens = []
+        self.prev_row = row
+        self.prev_col = col
+
+    def add_whitespace(self, start):
+        row, col = start
+        assert row >= self.prev_row, "row (%r) should be >= prev_row (%r)" % (row, self.prev_row)
+        row_offset = row - self.prev_row
+        if row_offset:
+            self.tokens.append("\n" * row_offset)
+        col_offset = col - self.prev_col
+        if col_offset:
+            self.tokens.append(" " * col_offset)
+
+    def feed(self, t):
+        assert len(t) == 5
+        tok_type, token, start, end, line = t
+        if (self.prev_row is None):
+            self.prev_row, self.prev_col = start
+        self.add_whitespace(start)
+        self.tokens.append(token)
+        self.prev_row, self.prev_col = end
+        if tok_type in (NEWLINE, NL):
+            self.prev_row += 1
+            self.prev_col = 0
+
+    def finish(self):
+        return "".join(self.tokens)
+
+    def untokenize(self, iterable):
+        for t in iterable:
+            self.feed(t)
+        return self.finish()
+
+def untokenize(iterable):
+    """Transform tokens back into Python source code.
+
+    Each element returned by the iterable must be a token sequence
+    with at least two elements, a token number and token value.  If
+    only two tokens are passed, the resulting output is poor.
+
+    Round-trip invariant for full input:
+        Untokenized source will match input source exactly
+
+    Round-trip invariant for limited intput:
+        # Output text will tokenize the back to the input
+        t1 = [tok[:2] for tok in generate_tokens(f.readline)]
+        newcode = untokenize(t1)
+        readline = iter(newcode.splitlines(1)).next
+        t2 = [tok[:2] for tok in generate_tokens(readline)]
+        assert t1 == t2
+    """
+    ut = Untokenizer()
+    return ut.untokenize(iterable)
+
+def generate_tokens(readline):
+    """
+    The generate_tokens() generator requires one argment, readline, which
+    must be a callable object which provides the same interface as the
+    readline() method of built-in file objects. Each call to the function
+    should return one line of input as a string.  Alternately, readline
+    can be a callable function terminating with StopIteration:
+        readline = open(myfile).next    # Example of alternate readline
+
+    The generator produces 5-tuples with these members: the token type; the
+    token string; a 2-tuple (srow, scol) of ints specifying the row and


More information about the pypy-commit mailing list