[pypy-commit] benchmarks default: (cfbolz, arigo)

arigo noreply at buildbot.pypy.org
Mon Dec 8 16:38:11 CET 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r282:d9d9946e1cda
Date: 2014-12-08 15:37 +0000
http://bitbucket.org/pypy/benchmarks/changeset/d9d9946e1cda/

Log:	(cfbolz, arigo)

	Add the Krakatau benchmark (Robert Grosse on pypy-dev). It's
	actually not all about the warm-up time; after warm-up it is still
	much slower on PyPy than on CPython.

diff too long, truncating to 2000 out of 14612 lines

diff --git a/own/bm_krakatau.py b/own/bm_krakatau.py
new file mode 100644
--- /dev/null
+++ b/own/bm_krakatau.py
@@ -0,0 +1,66 @@
+import sys, os
+import time
+import util, optparse
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'krakatau/Krakatau'))
+print sys.path
+
+import Krakatau.ssa
+from Krakatau.environment import Environment
+from Krakatau.java import javaclass
+from Krakatau.verifier.inference_verifier import verifyBytecode
+
+
+def makeGraph(m):
+    v = verifyBytecode(m.code)
+    s = Krakatau.ssa.ssaFromVerified(m.code, v)
+
+    # print _stats(s)
+    if s.procs:
+        # s.mergeSingleSuccessorBlocks()
+        # s.removeUnusedVariables()
+        s.inlineSubprocs()
+
+    s.condenseBlocks()
+    s.mergeSingleSuccessorBlocks()
+    # print _stats(s)
+    s.removeUnusedVariables()
+    s.constraintPropagation()
+    s.disconnectConstantVariables()
+    s.simplifyJumps()
+    s.mergeSingleSuccessorBlocks()
+    s.removeUnusedVariables()
+    # print _stats(s)
+    return s
+
+def decompileClass():
+    path = ['krakatau/rt.jar']
+    targets = ['javax/swing/plaf/nimbus/ToolBarSouthState']
+    e = Environment()
+    for part in path:
+        e.addToPath(part)
+
+    with e:
+        for i,target in enumerate(targets):
+            for _ in range(100):
+                c = e.getClass(target)
+                source = javaclass.generateAST(c, makeGraph).print_()
+
+
+def main(n):
+    l = []
+    for i in range(n):
+        t0 = time.time()
+        decompileClass()
+        time_elapsed = time.time() - t0
+        l.append(time_elapsed)
+    return l
+
+if __name__ == "__main__":
+    parser = optparse.OptionParser(
+        usage="%prog [options]",
+        description="Test the performance of the krakatau benchmark")
+    util.add_standard_options_to(parser)
+    options, args = parser.parse_args()
+
+    util.run_benchmark(options, options.num_runs, main)
diff --git a/own/cache.txt b/own/cache.txt
new file mode 100644
--- /dev/null
+++ b/own/cache.txt
@@ -0,0 +1,13 @@
+java/lang/Object;SUPER,PUBLIC
+java/lang/Object,javax/swing/plaf/nimbus/State;ABSTRACT,SUPER,PUBLIC
+java/lang/Object,javax/swing/plaf/nimbus/State,javax/swing/plaf/nimbus/ToolBarSouthState;SUPER
+java/lang/Object,java/lang/String;SUPER,FINAL,PUBLIC
+java/lang/Object,java/lang/Throwable;SUPER,PUBLIC
+java/lang/Object,java/lang/Throwable,java/lang/Exception;SUPER,PUBLIC
+java/lang/Object,java/lang/Throwable,java/lang/Exception,java/lang/RuntimeException;SUPER,PUBLIC
+java/lang/Object,java/lang/Throwable,java/lang/Exception,java/lang/RuntimeException,java/lang/IllegalMonitorStateException;SUPER,PUBLIC
+java/lang/Object,java/awt/Component;ABSTRACT,SUPER,PUBLIC
+java/lang/Object,java/awt/Component,java/awt/Container;SUPER,PUBLIC
+java/lang/Object,java/awt/Component,java/awt/Container,javax/swing/JComponent;ABSTRACT,SUPER,PUBLIC
+java/lang/Object,java/awt/Component,java/awt/Container,javax/swing/JComponent,javax/swing/JToolBar;SUPER,PUBLIC
+java/lang/Object,java/lang/Throwable,java/lang/Exception,java/lang/RuntimeException,java/lang/ClassCastException;SUPER,PUBLIC
diff --git a/own/krakatau/Krakatau/.gitattributes b/own/krakatau/Krakatau/.gitattributes
new file mode 100644
--- /dev/null
+++ b/own/krakatau/Krakatau/.gitattributes
@@ -0,0 +1,2 @@
+* text=auto
+*.test binary
\ No newline at end of file
diff --git a/own/krakatau/Krakatau/.gitignore b/own/krakatau/Krakatau/.gitignore
new file mode 100644
--- /dev/null
+++ b/own/krakatau/Krakatau/.gitignore
@@ -0,0 +1,6 @@
+*.pyc
+*.pyo
+test*.py
+Krakatau/plugins/*
+cache.txt
+tests/*.test
diff --git a/own/krakatau/Krakatau/Documentation/assembler.txt b/own/krakatau/Krakatau/Documentation/assembler.txt
new file mode 100644
--- /dev/null
+++ b/own/krakatau/Krakatau/Documentation/assembler.txt
@@ -0,0 +1,88 @@
+Krakatau Assembler Syntax
+
+This guide is intended to help write bytecode assembly files for use with the Krakatau assembler. It assumes that you are already familiar with the JVM classfile format and how to write bytecode. If not, you can find a simple tutorial to writing bytecode at https://greyhat.gatech.edu/wiki/index.php?title=Java_Bytecode_Tutorial. You can also find some examples of assembler files in the examples directory.
+
+Krakatau syntax is largely backwards compatible with the classic Jasmin assembler syntax. In a couple of places, backwards compatibility is broken either by the introduction of new keywords or to fix ambiguities in the Jasmin syntax. However, Krakatau is not necessarily compatible with the extensions introduced by JasminXT.
+
+The basic format for an assembler file consists of a list of classfile entries. Each entry will result in the generation of a seperate classfile, so a single assembly file can contain multiple classes where convienent. These entries are completely independent - mutiple classes never share constant pool entries, fields, methods, or directives, even the version directive. Each one has the format
+
+.bytecode major minor  (optional)
+class directives
+.class classref
+.super classref
+interface declarations
+class directives
+topitems
+.end class
+
+The .end class on the final entry may be ommitted. So the simplest possible assembler file to declare a class named Foo would be
+
+.class Foo
+.super java/lang/Object
+
+To declare three classes A, B, and C in the same file with B and C inheriting from A and different versions, you could do
+
+.class A
+.super java/lang/Object
+.end class
+.class B
+.super A
+.end class
+.class C
+.super A
+
+The classfile version is specified by the .bytecode directive. It is specified by major, minor, a pair of decimal integers. If ommitted, the default is version 49.0. So the following is equivalent to the earlier example
+
+.bytecode 49 0
+.class Foo
+.super java/lang/Object
+
+Other class directives include .runtimevisible, .runtimeinvisible, .signature, .attribute, .source, .inner, .innerlength, and .enclosing. These are used to control the attributes of the class and will be covered later.
+
+Topitems are the actual meat of the class. There are three types: fields, methods, and constant definitions. The last is unique to Krakatau and is closely related to the rest of the syntax. In Krakatau, there are multiple ways to specify a constant pool entry. The most common are via WORD tokens, symbolic references and numerical references. The later constist of square brackets with lowercase alphanumerics and underscores inside.
+
+When you specify .class Foo, the string Foo isn't directly included in the output. The classfile format says that the class field is actually a two byte index into the constant pool of the classfile. This points to a Class_info which points to a Utf8_info which holds the actual name of the class. Therefore, Krakatau implicitly creates constant pool entries and inserts the appropriate references. But this process can be controlled more directly.
+
+Instead of writing
+.class Foo
+.super java/lang/Object
+
+You could explicitly write out all the classfile references as follows
+
+.class [foocls]
+.super [objcls]
+
+.const [foocls] = Class [fooutf]
+.const [fooutf] = Utf8 Foo
+.const [objcls] = Class [objutf]
+.const [objutf] = Utf8 java/lang/Object
+
+There are two types of references. If the contents are a decimal int, then it is a direct numerical reference to a particular slot in the constant pool. You are responsible for making sure that everything is consistent and that the contents of that slot are valid. This option is most useful for specifiying the null entry [0]. For example, to express the Object class itself, one would do
+
+.class java/lang/Object
+.super [0]
+
+If the contents are any other nonempty lowercase alphanumeric + underscores string, it is interperted as a symbolic reference. This is a reference to some slot but you don't care which one. Krakatau will pick an available slot and fill it in automatically. Symbolic references may be ommitted from the generated classfile if unused or merged with identical entries, including automatically generated entries.
+
+With that out of the way, the basic form of a constant definition is
+
+.const ref = entrytype arguments
+
+You can also just define one reference in terms of another. You are responsible for making sure there are no circular references or duplicated definitions. If not, unpredictable results may occur.
+
+Examples include
+
+.const [1] = Class Foo
+.const [1] = Class [fooutf]
+.const [89] = Long 1234567L
+.const [mynat] = NameAndType main ([Ljava/lang/String;)V
+.const [myref] = [myotherref]
+.const [really42] = [42]
+
+
+
+
+
+
+
+
diff --git a/own/krakatau/Krakatau/Krakatau/__init__.py b/own/krakatau/Krakatau/Krakatau/__init__.py
new file mode 100644
--- /dev/null
+++ b/own/krakatau/Krakatau/Krakatau/__init__.py
@@ -0,0 +1,6 @@
+'''Monkeypatch a fix for bugs.python.org/issue9825 in case users are running an old version.'''
+import collections
+try:
+    del collections.OrderedDict.__del__
+except AttributeError:
+    pass
\ No newline at end of file
diff --git a/own/krakatau/Krakatau/Krakatau/assembler/__init__.py b/own/krakatau/Krakatau/Krakatau/assembler/__init__.py
new file mode 100644
diff --git a/own/krakatau/Krakatau/Krakatau/assembler/assembler.py b/own/krakatau/Krakatau/Krakatau/assembler/assembler.py
new file mode 100644
--- /dev/null
+++ b/own/krakatau/Krakatau/Krakatau/assembler/assembler.py
@@ -0,0 +1,580 @@
+import collections
+import struct, operator
+
+from . import instructions, codes
+from .. import constant_pool
+from ..classfile import ClassFile
+from ..method import Method
+from ..field import Field
+
+class AssemblerError(Exception):
+    def __init__(self, message, data=None):
+        super(AssemblerError, self).__init__(message)
+        self.data = data
+
+def error(msg):
+    raise AssemblerError(msg)
+
+class PoolRef(object):
+    def __init__(self, *args, **kwargs):
+        self.index = kwargs.get('index')
+        self.lbl = kwargs.get('lbl')
+        self.args = args
+
+    def toIndex(self, pool, forbidden=(), **kwargs):
+        if self.index is not None:
+            return self.index
+        if self.lbl:
+            self.index = pool.getLabel(self.lbl, forbidden, **kwargs)
+        else:
+            self.args = [(x.toIndex(pool) if isinstance(x, PoolRef) else x) for x in self.args]
+            self.index = pool.getItem(*self.args, **kwargs)
+        return self.index
+
+class PoolInfo(object):
+    def __init__(self):
+        self.pool = constant_pool.ConstPool()
+        self.lbls = {}
+        self.fixed = {} # constant pool entries in a specific slot
+        self.bootstrap = [] #entries for the BootstrapMethods attribute if any
+
+    def getLabel(self, lbl, forbidden=(), **kwargs):
+        if lbl in forbidden:
+            error('Circular constant pool reference: ' + ', '.join(forbidden))
+        forbidden = forbidden + (lbl,)
+        return self.lbls[lbl].toIndex(self, forbidden, **kwargs)
+
+    def getItem(self, type_, *args, **kwargs):
+        if type_ == 'InvokeDynamic':
+            self.bootstrap.append(args[:-1])
+            args = len(self.bootstrap)-1, args[-1]    
+        return self.pool.addItem((type_, tuple(args)), **kwargs)
+
+    def Utf8(self, s):
+        return self.getItem('Utf8', s)
+
+    def assignFixedSlots(self):
+        self.pool.reserved.update(self.fixed)
+        for i,v in self.fixed.items():
+            if v.args and v.args[0] in ('Double','Long'):
+                self.pool.reserved.add(i+1)
+                
+        #TODO - order these in terms of dependencies?
+        for index, value in self.fixed.items():
+            used = value.toIndex(self, index=index)
+            if used != index: #we need to copy an existing item
+                self.pool.copyItem(used, index)
+
+_format_ops = collections.defaultdict(tuple)
+_format_ops[''] = instructions.instrs_noarg
+_format_ops['>B'] = 'iload', 'lload', 'fload', 'dload', 'aload', 'istore', 'lstore', 'fstore', 'dstore', 'astore', 'ret'
+_format_ops['>h'] = 'ifeq', 'ifne', 'iflt', 'ifge', 'ifgt', 'ifle', 'if_icmpeq', 'if_icmpne', 'if_icmplt', 'if_icmpge', 'if_icmpgt', 'if_icmple', 'if_acmpeq', 'if_acmpne', 'goto', 'jsr', 'ifnull', 'ifnonnull'
+_format_ops['>H'] = 'ldc_w', 'ldc2_w', 'getstatic', 'putstatic', 'getfield', 'putfield', 'invokevirtual', 'invokespecial', 'invokestatic', 'new', 'anewarray', 'checkcast', 'instanceof'
+
+_format_ops['>b'] += 'bipush', 
+_format_ops['>Bb'] += 'iinc', 
+_format_ops['>h'] += 'sipush', 
+_format_ops['>HB'] += 'multianewarray',
+_format_ops['>HBB'] += 'invokeinterface',
+_format_ops['>HH'] += 'invokedynamic',
+_format_ops['>B'] += 'ldc', 'newarray'
+_format_ops['>i'] += 'goto_w', 'jsr_w'
+
+op_structs = {}
+for fmt, ops in _format_ops.items():
+    _s = struct.Struct(fmt)
+    for _op in ops:
+        op_structs[_op] = _s
+
+def getPadding(pos):
+    return (3-pos) % 4
+
+def getInstrLen(instr, pos):
+    op = instr[0]
+    if op in op_structs:
+        return 1 + op_structs[op].size
+    elif op == 'wide':
+        return 2 + 2 * len(instr[1][1])
+    else:
+        padding = getPadding(pos)
+        count = len(instr[1][1])
+        if op == 'tableswitch':
+            return 13 + padding + 4*count
+        else:
+            return 9 + padding + 8*count 
+
+def assembleInstruction(instr, labels, pos, pool):
+    def lbl2Off(lbl):
+        if lbl not in labels:
+            del labels[None]
+            error('Undefined label: {}\nDefined labels for current method are: {}'.format(lbl, ', '.join(sorted(labels))))
+        return labels[lbl] - pos
+
+    op = instr[0]
+    first = chr(instructions.allinstructions.index(op))
+
+    instr = [(x.toIndex(pool) if isinstance(x, PoolRef) else x) for x in instr[1:]]
+    if op in instructions.instrs_lbl:
+        instr[0] = lbl2Off(instr[0])
+    if op in op_structs:
+        rest = op_structs[op].pack(*instr)
+        return first+rest
+    elif op == 'wide':
+        subop, args = instr[0]
+        prefix = chr(instructions.allinstructions.index(subop))
+        fmt = '>Hh' if len(args) > 1 else '>H'
+        rest = struct.pack(fmt, *args)
+        return first + prefix + rest
+    else:
+        padding = getPadding(pos)
+        param, jumps, default = instr[0]
+        default = lbl2Off(default)
+
+        if op == 'tableswitch':
+            jumps = map(lbl2Off, jumps)
+            low, high = param, param + len(jumps)-1
+            temp = struct.Struct('>i')
+            part1 = first + '\0'*padding + struct.pack('>iii', default, low, high)
+            return part1 + ''.join(map(temp.pack, jumps))
+        elif op == 'lookupswitch':
+            jumps = {k:lbl2Off(lbl) for k,lbl in jumps}
+            jumps = sorted(jumps.items())
+            temp = struct.Struct('>ii')
+            part1 = first + '\0'*padding + struct.pack('>ii', default, len(jumps))
+            part2 = ''.join(map(temp.pack, *zip(*jumps))) if jumps else ''
+            return part1 + part2
+
+def groupList(pairs):
+    d = collections.defaultdict(list)
+    for k,v in pairs:
+        d[k].append(v)
+    return d
+
+def splitList(pairs):
+    d = groupList(pairs)
+    return d[False], d[True]
+       
+def assembleCodeAttr(statements, pool, version, addLineNumbers, jasmode):
+    directives, lines = splitList(statements)
+    dir_offsets = collections.defaultdict(list)
+
+    offsets = []
+    labels = {}
+    pos = 0
+    #first run through to calculate bytecode offsets
+    #this is greatly complicated due to the need to
+    #handle Jasmine line number directives
+    for t, statement in statements:
+        if t:
+            lbl, instr = statement
+            labels[lbl] = pos
+            if instr is not None:
+                offsets.append(pos)
+                pos += getInstrLen(instr, pos)
+        #some directives require us to keep track of the corresponding bytecode offset
+        elif statement[0] in ('.line','.stackmap'):
+            dir_offsets[statement[0]].append(pos)
+    code_len = pos
+
+    code_bytes = ''
+    for lbl, instr in lines:
+        if instr is not None:
+            code_bytes += assembleInstruction(instr, labels, len(code_bytes), pool)
+    assert(len(code_bytes) == code_len)
+
+    directive_dict = groupList(directives)
+    limits = groupList(directive_dict['.limit'])
+
+    stack = min(limits['stack'] + [65535]) 
+    locals_ = min(limits['locals'] + [65535]) 
+
+    excepts = []
+    for name, start, end, target in directive_dict['.catch']:
+        #Hack for compatibility with Jasmin
+        if jasmode and name.args and (name.args[1].args == ('Utf8','all')):
+            name.index = 0
+        vals = labels[start], labels[end], labels[target], name.toIndex(pool)
+        excepts.append(struct.pack('>HHHH',*vals))
+    
+    attributes = []
+
+    #StackMapTable
+    def pack_vt(vt):
+        s = chr(codes.vt_codes[vt[0]])
+        if vt[0] == 'Object':
+            s += struct.pack('>H', vt[1].toIndex(pool))        
+        elif vt[0] == 'Uninitialized':
+            s += struct.pack('>H', labels[vt[1]])
+        return s
+
+    if directive_dict['.stackmap']:
+        frames = []
+        last_pos = -1
+
+        for pos, info in zip(dir_offsets['.stackmap'], directive_dict['.stackmap']):
+            offset = pos - last_pos - 1
+            last_pos = pos
+            assert(offset >= 0)
+
+            tag = info[0]
+            if tag == 'same':
+                if offset >= 64:
+                    error('Max offset on a same frame is 63.')
+                frames.append(chr(offset))            
+            elif tag == 'same_locals_1_stack_item':
+                if offset >= 64:
+                    error('Max offset on a same_locals_1_stack_item frame is 63.')
+                frames.append(chr(64 + offset) + pack_vt(info[2][0]))            
+            elif tag == 'same_locals_1_stack_item_extended':
+                frames.append(struct.pack('>BH', 247, offset) + pack_vt(info[2][0]))            
+            elif tag == 'chop':
+                if not (1 <= info[1] <= 3):
+                    error('Chop frame can only remove 1-3 locals')
+                frames.append(struct.pack('>BH', 251-info[1], offset))
+            elif tag == 'same_extended':
+                frames.append(struct.pack('>BH', 251, offset))
+            elif tag == 'append':
+                local_vts = map(pack_vt, info[2])
+                if not (1 <= len(local_vts) <= 3):
+                    error('Append frame can only add 1-3 locals')
+                frames.append(struct.pack('>BH', 251+len(local_vts), offset) + ''.join(local_vts))
+            elif tag == 'full':
+                local_vts = map(pack_vt, info[2])
+                stack_vts = map(pack_vt, info[3])
+                frame = struct.pack('>BH', 255, offset)
+                frame += struct.pack('>H', len(local_vts)) + ''.join(local_vts)
+                frame += struct.pack('>H', len(stack_vts)) + ''.join(stack_vts)
+                frames.append(frame)
+
+        sm_body = ''.join(frames)
+        sm_attr = struct.pack('>HIH', pool.Utf8("StackMapTable"), len(sm_body)+2, len(frames)) + sm_body
+        attributes.append(sm_attr)
+
+    #line number attribute
+    if addLineNumbers and not directive_dict['line']:
+        dir_offsets['line'] = directive_dict['line'] = offsets
+    if directive_dict['line']:
+        lntable = [struct.pack('>HH',x,y) for x,y in zip(dir_offsets['line'], directive_dict['line'])]
+        ln_attr = struct.pack('>HIH', pool.Utf8("LineNumberTable"), 2+4*len(lntable), len(lntable)) + ''.join(lntable)        
+        attributes.append(ln_attr)
+
+    if directive_dict['.var']:
+        sfunc = struct.Struct('>HHHHH').pack
+        vartable = []
+        for index, name, desc, start, end in directive_dict['.var']:
+            start, end = labels[start], labels[end]
+            name, desc = name.toIndex(pool), desc.toIndex(pool)
+            vartable.append(sfunc(start, end-start, name, desc, index))
+        var_attr = struct.pack('>HIH', pool.Utf8("LocalVariableTable"), 2+10*len(vartable), len(vartable)) + ''.join(vartable)        
+        attributes.append(var_attr)
+
+    if not code_len:
+        return None
+
+    for attrname, data in directive_dict['.codeattribute']:
+        attr = struct.pack('>HI', attrname.toIndex(pool), len(data)) + data
+        attributes.append(attr)        
+
+
+    #Old versions use shorter fields for stack, locals, and code length
+    header_fmt = '>HHI' if version > (45,2) else '>BBH'
+
+    name_ind = pool.Utf8("Code")
+    attr_len = struct.calcsize(header_fmt) + 4 + len(code_bytes) + 8*len(excepts) + sum(map(len, attributes))
+    
+    assembled_bytes = struct.pack('>HI', name_ind, attr_len)
+    assembled_bytes += struct.pack(header_fmt, stack, locals_, len(code_bytes))
+    assembled_bytes += code_bytes
+    assembled_bytes += struct.pack('>H', len(excepts)) + ''.join(excepts)
+    assembled_bytes += struct.pack('>H', len(attributes)) + ''.join(attributes)
+    return assembled_bytes
+
+def _assembleEVorAnnotationSub(pool, init_args, isAnnot):
+    #call types
+    C_ANNOT, C_ANNOT2, C_EV = range(3)
+    init_callt = C_ANNOT if isAnnot else C_EV
+
+    stack = [(init_callt, init_args)]
+    parts = []
+    add = parts.append
+
+    while stack:
+        callt, args = stack.pop()
+
+        if callt == C_ANNOT:
+            typeref, keylines = args
+            add(struct.pack('>HH', typeref.toIndex(pool), len(keylines)))
+            for pair in reversed(keylines):
+                stack.append((C_ANNOT2, pair))
+
+        elif callt == C_ANNOT2:
+            name, val = args
+            add(struct.pack('>H', name.toIndex(pool)))
+            stack.append((C_EV, val))
+
+        elif callt == C_EV:
+            tag, data = args
+            assert(tag in codes.et_rtags)
+            add(tag)
+
+            if tag in 'BCDFIJSZsc':
+                add(struct.pack('>H', data[0].toIndex(pool)))
+            elif tag == 'e':
+                add(struct.pack('>HH', data[0].toIndex(pool), data[1].toIndex(pool)))
+            elif tag == '@':
+                stack.append((C_ANNOT, data[0]))
+            elif tag == '[':
+                add(struct.pack('>H', len(data[1])))
+                for arrval in reversed(data[1]):
+                    stack.append((C_EV, arrval))
+    return ''.join(parts)
+
+def assembleElementValue(val, pool):
+    return  _assembleEVorAnnotationSub(pool, val, False)
+
+def assembleAnnotation(annotation, pool):
+    return  _assembleEVorAnnotationSub(pool, annotation, True)
+
+def assembleMethod(header, statements, pool, version, addLineNumbers, jasmode):
+    mflags, (name, desc) = header
+    name = name.toIndex(pool)
+    desc = desc.toIndex(pool)
+
+    flagbits = map(Method.flagVals.get, mflags)
+    flagbits = reduce(operator.__or__, flagbits, 0)
+
+    meth_statements, code_statements = splitList(statements)
+
+    method_attributes = []
+    code_attr = assembleCodeAttr(code_statements, pool, version, addLineNumbers, jasmode)
+    if code_attr is not None:
+        method_attributes.append(code_attr)
+
+    directive_dict = groupList(meth_statements)
+    if directive_dict['.throws']:
+        t_inds = [struct.pack('>H', x.toIndex(pool)) for x in directive_dict['.throws']]
+        throw_attr = struct.pack('>HIH', pool.Utf8("Exceptions"), 2+2*len(t_inds), len(t_inds)) + ''.join(t_inds)        
+        method_attributes.append(throw_attr)
+
+    #Runtime annotations
+    for vis in ('Invisible','Visible'):
+        paramd = groupList(directive_dict['.runtime'+vis.lower()])
+
+        if None in paramd:
+            del paramd[None]
+
+        if paramd:
+            parts = []
+            for i in range(max(paramd)):
+                annotations = [assembleAnnotation(a, pool) for a in paramd[i]]
+                part = struct.pack('>H', len(annotations)) + ''.join(annotations)
+                parts.append(part)
+            attrlen = 1+sum(map(len, parts))
+            attr = struct.pack('>HIB', pool.Utf8("Runtime{}ParameterAnnotations".format(vis)), attrlen, len(parts)) + ''.join(parts)
+            method_attributes.append(attr)
+
+    if '.annotationdefault' in directive_dict:
+        val = directive_dict['.annotationdefault'][0]
+        data = assembleElementValue(val, pool)
+        attr = struct.pack('>HI', pool.Utf8("AnnotationDefault"), len(data)) + data        
+        method_attributes.append(attr)
+
+    assembleClassFieldMethodAttributes(method_attributes.append, directive_dict, pool)
+    return struct.pack('>HHHH', flagbits, name, desc, len(method_attributes)) + ''.join(method_attributes)
+
+def getLdcRefs(statements):
+    lines = [x[1][1] for x in statements if x[0] and x[1][0]]
+    instructions = [x[1] for x in lines if x[1] is not None]
+
+    for instr in instructions:
+        op = instr[0]
+        if op == 'ldc':
+            yield instr[1]
+ 
+def addLdcRefs(methods, pool):
+    def getRealRef(ref, forbidden=()):
+        '''Get the root PoolRef associated with a given PoolRef, following labels'''
+        if ref.index is None and ref.lbl:
+            if ref.lbl in forbidden:
+                error('Circular constant pool reference: ' + ', '.join(forbidden))
+            forbidden = forbidden + (ref.lbl,)
+            return getRealRef(pool.lbls[ref.lbl], forbidden) #recursive call
+        return ref
+
+    #We attempt to estimate how many slots are needed after merging identical entries
+    #So we can reserve the correct number of slots without leaving unused gaps
+    #However, in complex cases, such as string/class/mt referring to an explicit
+    #reference, we may overestimate
+    ldc_refs = collections.defaultdict(set)
+
+    for header, statements in methods:
+        for ref in getLdcRefs(statements):
+            ref = getRealRef(ref)
+            if ref.index is not None:
+                continue
+
+            type_ = ref.args[0]
+            if type_ in ('Int','Float'):
+                key = ref.args[1]
+            elif type_ in ('String','Class','MethodType'): 
+                uref = getRealRef(ref.args[1])
+                key = uref.index, uref.args[1:]
+            else: #for MethodHandles, don't even bother trying to estimate merging
+                key = ref.args[1:] 
+            ldc_refs[type_].add(key)    
+
+    #TODO - make this a little cleaner so we don't have to mess with the ConstantPool internals
+    num = sum(map(len, ldc_refs.values()))
+    slots = [pool.pool.getAvailableIndex() for _ in range(num)]
+    pool.pool.reserved.update(slots)
+
+    for type_ in ('Int','Float'):
+        for arg in ldc_refs[type_]:
+            pool.getItem(type_, arg, index=slots.pop())
+    for type_ in ('String','Class','MethodType'):
+        for ind,args in ldc_refs[type_]:
+            arg = ind if ind is not None else pool.Utf8(*args)
+            pool.getItem(type_, arg, index=slots.pop())
+    for type_ in ('MethodHandle',):
+        for code, ref in ldc_refs[type_]:
+            pool.getItem(type_, code, ref.toIndex(pool), index=slots.pop())
+    assert(not slots)
+    assert(not pool.pool.reserved)
+
+def assembleClassFieldMethodAttributes(addcb, directive_dict, pool):
+    for vis in ('Invisible','Visible'):
+        paramd = groupList(directive_dict['.runtime'+vis.lower()])
+        if None in paramd:
+            annotations = [assembleAnnotation(a, pool) for a in paramd[None]]
+            attrlen = 2+sum(map(len, annotations))
+            attr = struct.pack('>HIH', pool.Utf8("Runtime{}Annotations".format(vis)), attrlen, len(annotations)) + ''.join(annotations)
+            addcb(attr)
+
+    for name in directive_dict['.signature']:
+        attr = struct.pack('>HIH', pool.Utf8("Signature"), 2, name.toIndex(pool))
+        addcb(attr)
+
+    #.innerlength directive overrides the normal attribute length calculation
+    hasoverride = len(directive_dict['.innerlength']) > 0
+
+    for name, data in directive_dict['.attribute']:    
+        name_ind = name.toIndex(pool)
+
+        if hasoverride and pool.pool.getArgsCheck('Utf8', name_ind) == 'InnerClasses':
+            attrlen = directive_dict['.innerlength'][0]
+        else:
+            attrlen = len(data)
+
+        attr = struct.pack('>HI', name_ind, attrlen) + data
+        addcb(attr)
+
+def assembleClassAttributes(addcb, directive_dict, pool, addLineNumbers, jasmode, filename):
+
+    sourcefile = directive_dict.get('.source',[None])[0] #PoolRef or None
+    if jasmode and not sourcefile:
+        sourcefile = pool.Utf8(filename)
+    elif addLineNumbers and not sourcefile:
+        sourcefile = pool.Utf8("SourceFile")
+    if sourcefile:
+        attr = struct.pack('>HIH', pool.Utf8("SourceFile"), 2, sourcefile.toIndex(pool))
+        addcb(attr)
+
+    if '.inner' in directive_dict:
+        parts = []
+        for inner, outer, name, flags in directive_dict['.inner']:
+            flagbits = map(ClassFile.flagVals.get, flags)
+            flagbits = reduce(operator.__or__, flagbits, 0)
+            part = struct.pack('>HHHH', inner.toIndex(pool), outer.toIndex(pool), name.toIndex(pool), flagbits)
+            parts.append(part)
+
+        #.innerlength directive overrides the normal attribute length calculation
+        innerlen = 2+8*len(parts) if '.innerlength' not in directive_dict else directive_dict['.innerlength'][0]
+        attr = struct.pack('>HIH', pool.Utf8("InnerClasses"), innerlen, len(parts)) + ''.join(parts)
+        addcb(attr)
+
+    if '.enclosing' in directive_dict:
+        class_, nat = directive_dict['.enclosing'][0]
+        attr = struct.pack('>HIHH', pool.Utf8("EnclosingMethod"), 4, class_.toIndex(pool), nat.toIndex(pool))
+        addcb(attr)
+
+    assembleClassFieldMethodAttributes(addcb, directive_dict, pool)
+
+
+def assemble(tree, addLineNumbers, jasmode, filename):
+    pool = PoolInfo()
+    version, cattrs1, classdec, superdec, interface_decs, cattrs2, topitems = tree
+    if not version: #default to version 49.0 except in Jasmin compatibility mode
+        version = (45,3) if jasmode else (49,0)
+
+    #scan topitems, plus statements in each method to get cpool directives
+    interfaces = []
+    fields = []
+    methods = []
+    attributes = []
+
+    directive_dict = groupList(cattrs1 + cattrs2)
+    top_d = groupList(topitems)
+
+    for slot, value in top_d['const']:
+        if slot.index is not None:
+            pool.fixed[slot.index] = value
+        else:
+            pool.lbls[slot.lbl] = value
+    pool.assignFixedSlots()
+
+    #Now find all cp references used in an ldc instruction
+    #Since they must be <=255, we give them priority in assigning slots
+    #to maximize the chance of a successful assembly
+    addLdcRefs(top_d['method'], pool)
+
+    for flags, name, desc, const, field_directives in top_d['field']:
+        flagbits = map(Field.flagVals.get, flags)
+        flagbits = reduce(operator.__or__, flagbits, 0)
+        name = name.toIndex(pool)
+        desc = desc.toIndex(pool)
+
+        fattrs = []
+        if const is not None:
+            attr = struct.pack('>HIH', pool.Utf8("ConstantValue"), 2, const.toIndex(pool))
+            fattrs.append(attr)
+
+        assembleClassFieldMethodAttributes(fattrs.append, groupList(field_directives), pool)
+
+        field_code = struct.pack('>HHHH', flagbits, name, desc, len(fattrs)) + ''.join(fattrs)
+        fields.append(field_code)
+
+    for header, statements in top_d['method']:
+        methods.append(assembleMethod(header, statements, pool, version, addLineNumbers, jasmode))
+
+    if pool.bootstrap:
+        entries = [struct.pack('>H' + 'H'*len(bsargs), bsargs[0], len(bsargs)-1, *bsargs[1:]) for bsargs in pool.bootstrap]   
+        attrbody = ''.join(entries)
+        attrhead = struct.pack('>HIH', pool.Utf8("BootstrapMethods"), 2+len(attrbody), len(entries))
+        attributes.append(attrhead + attrbody)
+
+    #Explicit class attributes
+    assembleClassAttributes(attributes.append, directive_dict, pool, addLineNumbers, jasmode, filename)
+
+    interfaces = [struct.pack('>H', x.toIndex(pool)) for x in interface_decs]
+    intf, cflags, this = classdec
+    cflags = set(cflags)
+    if intf:
+        cflags.add('INTERFACE')
+    if jasmode:
+        cflags.add('SUPER')
+
+    flagbits = map(ClassFile.flagVals.get, cflags)
+    flagbits = reduce(operator.__or__, flagbits, 0)
+    this = this.toIndex(pool)
+    super_ = superdec.toIndex(pool)
+
+    major, minor = version
+    class_code = '\xCA\xFE\xBA\xBE' + struct.pack('>HH', minor, major)
+    class_code += pool.pool.bytes()
+    class_code += struct.pack('>HHH', flagbits, this, super_)
+    for stuff in (interfaces, fields, methods, attributes):
+        bytes_ = struct.pack('>H', len(stuff)) + ''.join(stuff)
+        class_code += bytes_
+
+    name = pool.pool.getArgs(this)[0]
+    return name, class_code
\ No newline at end of file
diff --git a/own/krakatau/Krakatau/Krakatau/assembler/codes.py b/own/krakatau/Krakatau/Krakatau/assembler/codes.py
new file mode 100644
--- /dev/null
+++ b/own/krakatau/Krakatau/Krakatau/assembler/codes.py
@@ -0,0 +1,11 @@
+_handle_types = 'getField getStatic putField putStatic invokeVirtual invokeStatic invokeSpecial newInvokeSpecial invokeInterface'.split()
+handle_codes = dict(zip(_handle_types, range(1,10)))
+
+newarr_codes = dict(zip('boolean char float double byte short int long'.split(), range(4,12)))
+
+vt_keywords = ['Top','Integer','Float','Double','Long','Null','UninitializedThis','Object','Uninitialized']
+vt_codes = {k:i for i,k in enumerate(vt_keywords)}
+
+
+et_rtags = dict(zip('BCDFIJSZsce@[', 'byte char double int float long short boolean string class enum annotation array'.split()))
+et_tags = {v:k for k,v in et_rtags.items()}
\ No newline at end of file
diff --git a/own/krakatau/Krakatau/Krakatau/assembler/disassembler.py b/own/krakatau/Krakatau/Krakatau/assembler/disassembler.py
new file mode 100644
--- /dev/null
+++ b/own/krakatau/Krakatau/Krakatau/assembler/disassembler.py
@@ -0,0 +1,539 @@
+import collections
+import re
+
+from . import instructions, tokenize, parse, assembler, codes
+from ..binUnpacker import binUnpacker
+from ..classfile import ClassFile
+
+MAX_INLINE_LENGTH = 50
+
+rhandle_codes = {v:k for k,v in codes.handle_codes.items()}
+rnewarr_codes = {v:k for k,v in codes.newarr_codes.items()}
+
+not_word_regex = '(?:{}|{}|{}|;)'.format(tokenize.int_base, tokenize.float_base, tokenize.t_CPINDEX)
+not_word_regex = re.compile(not_word_regex, re.VERBOSE)
+is_word_regex = re.compile(tokenize.t_WORD.__doc__+'$')
+assert(is_word_regex.match("''") is None)
+
+def isWord(s):
+    '''Determine if s can be used as an inline word'''
+    if s in parse.badwords or (not_word_regex.match(s) is not None):
+        return False
+    #eliminate unprintable characters below 32
+    #also, don't allow characters above 127 to keep things simpler
+    return (is_word_regex.match(s) is not None) and min(s) > ' ' and max(s) <= '\x7f'
+
+def rstring(s, allowWord=True):
+    '''Returns a representation of the string. If allowWord is true, it will be unquoted if possible'''
+    if allowWord and isWord(s):
+        return s
+    try:
+        if s.encode('ascii') == s:
+            return repr(str(s))
+    except (UnicodeEncodeError, UnicodeDecodeError):
+        pass
+    return repr(s)
+
+class PoolManager(object):
+    def __init__(self, pool):
+        self.const_pool = pool #keep this around for the float conversion function
+        self.pool = pool.pool
+        self.bootstrap_methods = [] #filled in externally
+        self.used = set() #which cp entries are used non inline and so must be printed
+
+        #For each type, store the function needed to generate the rhs of a constant pool specifier
+        temp1 = lambda ind: rstring(self.cparg1(ind))
+        temp2 = lambda ind: self.utfref(self.cparg1(ind))
+
+        self.cpref_table = {
+            "Utf8": temp1,
+
+            "Class": temp2,
+            "String": temp2,
+            "MethodType": temp2,
+
+            "NameAndType": self.multiref,
+            "Field": self.multiref,
+            "Method": self.multiref,
+            "InterfaceMethod": self.multiref,
+
+            "Int": self.ldc,
+            "Long": self.ldc,
+            "Float": self.ldc,
+            "Double": self.ldc,
+
+            "MethodHandle": self.methodhandle_notref,
+            "InvokeDynamic": self.invokedynamic_notref,
+            }
+
+    def cparg1(self, ind):
+        return self.pool[ind][1][0]
+
+    def inlineutf(self, ind, allowWord=True):
+        '''Returns the word if it's short enough to inline, else None'''
+        arg = self.cparg1(ind)
+        rstr = rstring(arg, allowWord=allowWord)
+        if len(rstr) <= MAX_INLINE_LENGTH:
+            return rstr
+        return None
+
+    def ref(self, ind):
+        self.used.add(ind)
+        return '[_{}]'.format(ind)
+
+    def utfref(self, ind):
+        if ind == 0:
+            return '[0]'
+        inline = self.inlineutf(ind)
+        return inline if inline is not None else self.ref(ind)
+
+    #Also works for Strings and MethodTypes
+    def classref(self, ind):
+        if ind == 0:
+            return '[0]'
+        inline = self.inlineutf(self.cparg1(ind))
+        return inline if inline is not None else self.ref(ind)
+
+    #For Field, Method, IMethod, and NameAndType. Effectively notref
+    def multiref(self, ind):
+        if ind == 0:
+            return '[0]'
+        typen, args = self.pool[ind]
+        if typen == "Utf8":
+            return self.utfref(ind)
+        elif typen == "Class":
+            return self.classref(ind)
+        return ' '.join(map(self.multiref, args))
+
+    #Special case for instruction fieldrefs as a workaround for Jasmin's awful syntax
+    def notjasref(self, ind):
+        typen, args = self.pool[ind]
+        cind = self.cparg1(ind)
+        inline = self.inlineutf(self.cparg1(cind))
+        if inline is None:
+            return self.ref(ind)
+        return inline + ' ' + self.multiref(args[1])
+
+    def ldc(self, ind):
+        typen, args = self.pool[ind]
+        arg = args[0]
+
+        if typen == 'String':
+            inline = self.inlineutf(arg, allowWord=False)
+            return inline if inline is not None else self.ref(ind)
+        elif typen in ('Int','Long','Float','Double'):
+            if typen == "Float" or typen == "Double":
+                arg = self.const_pool.getArgs(ind)[0]
+
+            rstr = repr(arg).rstrip("Ll")
+            if typen == "Float" or typen == "Long":
+                rstr += typen[0]
+            return rstr
+        else:
+            return self.ref(ind)
+
+    def methodhandle_notref(self, ind):
+        typen, args = self.pool[ind]
+        code = rhandle_codes[args[0]]
+        return code + ' ' + self.ref(args[1])
+
+    def invokedynamic_notref(self, ind):
+        typen, args = self.pool[ind]
+        bs_args = self.bootstrap_methods[args[0]]
+
+        parts = [self.methodhandle_notref(bs_args[0])]
+        parts += map(self.ref, bs_args[1:])
+        parts += [':', self.multiref(args[1])]
+        return ' '.join(parts)
+
+    def printConstDefs(self, add):
+        defs = {}
+
+        while self.used:
+            temp, self.used = self.used, set()
+            for ind in temp:
+                if ind in defs:
+                    continue
+                typen = self.pool[ind][0]
+                defs[ind] = self.cpref_table[typen](ind)
+
+        for ind in sorted(defs):
+            add('.const [_{}] = {} {}'.format(ind, self.pool[ind][0], defs[ind]))
+
+def getAttributeTriples(obj): #name_ind, name, data
+    return [(name_ind, name, data1) for (name_ind, data1), (name, data2) in zip(obj.attributes_raw, obj.attributes)]
+
+def getAttributesDict(obj):
+    d = collections.defaultdict(list)
+    for ind, name, attr in getAttributeTriples(obj):
+        d[name].append((ind, attr))
+    return d
+
+fmt_lookup = {k:v.format for k,v in assembler.op_structs.items()}
+def getInstruction(b, getlbl, poolm):
+    pos = b.off
+    op = b.get('B')
+
+    name = instructions.allinstructions[op]
+    if name == 'wide':
+        name2 = instructions.allinstructions[b.get('B')]
+        if name2 == 'iinc':
+            args = list(b.get('>Hh'))
+        else:
+            args = [b.get('>H')]
+
+        parts = [name, name2] + map(str, args)
+        return '\t' + ' '.join(parts)
+    elif name == 'tableswitch' or name == 'lookupswitch':
+        padding = assembler.getPadding(pos)
+        b.getRaw(padding)
+
+        default = getlbl(b.get('>i')+pos)
+
+        if name == 'lookupswitch':
+            num = b.get('>I')
+            entries = ['\t'+name]
+            entries += ['\t\t{} : {}'.format(b.get('>i'), getlbl(b.get('>i')+pos)) for _ in range(num)]
+        else:
+            low, high = b.get('>ii')
+            num = high-low+1
+            entries = ['\t{} {}'.format(name, low)]
+            entries += ['\t\t{}'.format(getlbl(b.get('>i')+pos)) for _ in range(num)]
+        entries += ['\t\tdefault : {}'.format(default)]
+        return '\n'.join(entries)
+    else:
+        args = list(b.get(fmt_lookup[name], forceTuple=True))
+        #remove extra padding 0
+        if name in ('invokeinterface','invokedynamic'):
+            args = args[:-1]
+
+        funcs = {
+                'OP_CLASS': poolm.classref,
+                'OP_CLASS_INT': poolm.classref,
+                'OP_FIELD': poolm.notjasref, #this is a special case due to the jasmin thing
+                'OP_METHOD': poolm.multiref,
+                'OP_METHOD_INT': poolm.multiref,
+                'OP_DYNAMIC': poolm.ref,
+                'OP_LDC1': poolm.ldc,
+                'OP_LDC2': poolm.ldc,
+                'OP_NEWARR': rnewarr_codes.get,
+            }
+
+        token_t = tokenize.wordget[name]
+        if token_t == 'OP_LBL':
+            assert(len(args) == 1)
+            args[0] = getlbl(args[0]+pos)
+        elif token_t in funcs:
+            args[0] = funcs[token_t](args[0])
+
+        parts = [name] + map(str, args)
+        return '\t' + ' '.join(parts)
+
+def disMethodCode(code, add, poolm):
+    if code is None:
+        return
+    add('\t; method code size: {} bytes'.format(code.codelen))
+    add('\t.limit stack {}'.format(code.stack))
+    add('\t.limit locals {}'.format(code.locals))
+
+    lbls = set()
+    def getlbl(x):
+        lbls.add(x)
+        return 'L'+str(x)
+
+    for e in code.except_raw:
+        parts = poolm.classref(e.type_ind), getlbl(e.start), getlbl(e.end), getlbl(e.handler)
+        add('\t.catch {} from {} to {} using {}'.format(*parts))
+
+    code_attributes = getAttributesDict(code)
+    frames = getStackMapTable(code_attributes, poolm, getlbl)
+
+    instrs = []
+    b = binUnpacker(code.bytecode_raw)
+    while b.size():
+        instrs.append((b.off, getInstruction(b, getlbl, poolm)))
+    instrs.append((b.off, None))
+
+    for off, instr in instrs:
+        if off in lbls:
+            add('L{}:'.format(off))
+        if off in frames:
+            add(frames[off])
+        if instr:
+            add(instr)
+
+    #Generic code attributes
+    for name in code_attributes:
+        #We can't disassemble these because Jasmin's format for these attributes
+        #is overly cumbersome and not easy to disassemble into, but we can't just
+        #leave them as binary blobs either as they are verified by the JVM and the
+        #later two contain constant pool references which won't be preserved even
+        #if the bytecode isn't changed. For now, we just ommit them entirely.
+        #TODO - find a better solution
+        if name in ("LineNumberTable","LocalVariableTable","LocalVariableTypeTable"):
+            continue
+
+        for name_ind, attr in code_attributes[name]:
+            add('.codeattribute {} {!r}'.format(poolm.utfref(name_ind), attr))
+
+def getVerificationType(bytes_, poolm, getLbl):
+    s = codes.vt_keywords[bytes_.get('>B')]
+    if s == 'Object':
+        s += ' ' + poolm.classref(bytes_.get('>H'))
+    elif s == 'Uninitialized':
+        s += ' ' + getLbl(bytes_.get('>H'))
+    return s
+
+def getStackMapTable(code_attributes, poolm, getLbl):
+    smt_attrs = code_attributes['StackMapTable']
+
+    frames = {}
+    offset = 0
+
+    if smt_attrs:
+        assert(len(smt_attrs) == 1)
+        bytes_ = binUnpacker(smt_attrs.pop()[1])
+        count = bytes_.get('>H')
+        getVT = lambda: getVerificationType(bytes_, poolm, getLbl)
+
+        for _ in range(count):
+            tag = bytes_.get('>B')
+            header, contents = None, []
+
+            if 0 <= tag <= 63:
+                offset += tag
+                header = 'same'
+            elif 64 <= tag <= 127:
+                offset += tag - 64
+                header = 'same_locals_1_stack_item'
+                contents.append('\tstack ' + getVT())
+            elif tag == 247:
+                offset += bytes_.get('>H')
+                header = 'same_locals_1_stack_item_extended'
+                contents.append('\tstack ' + getVT())
+            elif 248 <= tag <= 250:
+                offset += bytes_.get('>H')
+                header = 'chop ' + str(251-tag)
+            elif tag == 251:
+                offset += bytes_.get('>H')
+                header = 'same_extended'
+            elif 252 <= tag <= 254:
+                offset += bytes_.get('>H')
+                header = 'append'
+                contents.append('\tlocals ' + ' '.join(getVT() for _ in range(tag-251)))
+            elif tag == 255:
+                offset += bytes_.get('>H')
+                header = 'full'
+                local_count = bytes_.get('>H')
+                contents.append('\tlocals ' + ' '.join(getVT() for _ in range(local_count)))
+                stack_count = bytes_.get('>H')
+                contents.append('\tstack ' + ' '.join(getVT() for _ in range(stack_count)))
+
+            if contents:
+                contents.append('.end stack')
+            contents = ['.stack ' + header] + contents
+            frame = '\n'.join(contents)
+            frames[offset] = frame
+            offset += 1 #frames after the first have an offset one larger than the listed offset
+    return frames
+
+def disCFMAttribute(name_ind, name, bytes_, add, poolm):
+    for vis in ('Visible', 'Invisible'):
+        if name == 'Runtime{}Annotations'.format(vis):
+            count = bytes_.get('>H')
+            for _ in range(count):
+                disAnnotation(bytes_, '.runtime{} '.format(vis.lower()), add, poolm, '')
+            if count: #otherwise we'll create an empty generic attribute
+                return
+
+    if name == "Signature":
+        add('.signature {}'.format(poolm.utfref(bytes_.get('>H'))))
+        return
+    #Create generic attribute if it can't be represented by a standard directive
+    add('.attribute {} {!r}'.format(poolm.utfref(name_ind), bytes_.getRaw(bytes_.size())))
+
+def disMethodAttribute(name_ind, name, bytes_, add, poolm):
+    if name == 'Code':
+        return
+    elif name == 'AnnotationDefault':
+        disElementValue(bytes_, '.annotationdefault ', add, poolm, '')
+        return
+    elif name == 'Exceptions':
+        count = bytes_.get('>H')
+        for _ in range(count):
+            add('.throws ' + poolm.classref(bytes_.get('>H')))
+        if count: #otherwise we'll create an empty generic attribute
+            return
+
+    for vis in ('Visible', 'Invisible'):
+        if name == 'Runtime{}ParameterAnnotations'.format(vis):
+            for i in range(bytes_.get('>B')):
+                for _ in range(bytes_.get('>H')):
+                    disAnnotation(bytes_, '.runtime{} parameter {} '.format(vis.lower(), i), add, poolm, '')
+            return #generic fallback on empty list not yet supported
+
+    disCFMAttribute(name_ind, name, bytes_, add, poolm)
+
+def disMethod(method, add, poolm):
+    mflags = ' '.join(map(str.lower, method.flags))
+    add('.method {} {} : {}'.format(mflags, poolm.utfref(method.name_id), poolm.utfref(method.desc_id)))
+
+    for name_ind, name, attr in getAttributeTriples(method):
+        disMethodAttribute(name_ind, name, binUnpacker(attr), add, poolm)
+
+    disMethodCode(method.code, add, poolm)
+    add('.end method')
+
+def _disEVorAnnotationSub(bytes_, add, poolm, isAnnot, init_prefix, init_indent):
+    C_ANNOT, C_ANNOT2, C_ANNOT3, C_EV, C_EV2 = range(5)
+    init_callt = C_ANNOT if isAnnot else C_EV
+
+    stack = [(init_callt, init_prefix, init_indent)]
+    while stack:
+        callt, prefix, indent = stack.pop()
+
+        if callt == C_ANNOT:
+            add(indent + prefix + 'annotation ' + poolm.utfref(bytes_.get('>H')))
+            #ones we want to happen last should be first on the stack. Annot3 is the final call which ends the annotation
+            stack.append((C_ANNOT3, None, indent))
+            stack.extend([(C_ANNOT2, None, indent)] * bytes_.get('>H'))
+
+        elif callt == C_ANNOT2:
+            key = poolm.utfref(bytes_.get('>H'))
+            stack.append((C_EV, key + ' = ', indent+'\t'))
+
+        elif callt == C_ANNOT3:
+            add(indent + '.end annotation')
+
+        elif callt == C_EV:
+            tag = codes.et_rtags[bytes_.getRaw(1)]
+            if tag == 'annotation':
+                stack.append((C_ANNOT, prefix, indent + '\t'))
+            else:
+                if tag in ('byte','char','double','int','float','long','short','boolean','string'):
+                    val = poolm.ldc(bytes_.get('>H'))
+                elif tag == 'class':
+                    val = poolm.utfref(bytes_.get('>H'))
+                elif tag == 'enum':
+                    val = poolm.utfref(bytes_.get('>H')) + ' ' + poolm.utfref(bytes_.get('>H'))
+                elif tag == 'array':
+                    val = ''
+
+                add(indent + '{} {} {}'.format(prefix, tag, val))
+                if tag == 'array':
+                    for _ in range(bytes_.get('>H')):
+                        stack.append((C_EV, '', indent+'\t'))
+                    stack.append((C_EV2, None, indent))
+
+        elif callt == C_EV2:
+            add(indent + '.end array')
+
+def disElementValue(bytes_, prefix, add, poolm, indent):
+    _disEVorAnnotationSub(bytes_, add, poolm, False, prefix, indent)
+
+def disAnnotation(bytes_, prefix, add, poolm, indent):
+    _disEVorAnnotationSub(bytes_, add, poolm, True, prefix, indent)
+
+#Todo - make fields automatically unpack this themselves
+def getConstValue(field):
+    if not field.static:
+        return None
+    const_attrs = [attr for attr in field.attributes if attr[0] == 'ConstantValue']
+    if const_attrs:
+        assert(len(const_attrs) == 1)
+        bytes_ = binUnpacker(const_attrs[0][1])
+        return bytes_.get('>H')
+
+_classflags = [(v,k.lower()) for k,v in ClassFile.flagVals.items()]
+def disInnerClassesAttribute(name_ind, length, bytes_, add, poolm):
+    count = bytes_.get('>H')
+
+    if length != 2+8*count:
+        add('.innerlength {}'.format(length))
+
+    for _ in range(count):
+        inner, outer, innername, flagbits = bytes_.get('>HHHH')
+
+        flags = [v for k,v in _classflags if k&flagbits]
+        inner = poolm.classref(inner)
+        outer = poolm.classref(outer)
+        innername = poolm.utfref(innername)
+
+        add('.inner {} {} {} {}'.format(' '.join(flags), innername, inner, outer))
+
+    if not count:
+        add('.attribute InnerClasses "\\0\\0"')
+
+def disOtherClassAttribute(name_ind, name, bytes_, add, poolm):
+    assert(name != 'InnerClasses')
+    if name == 'EnclosingMethod':
+        cls, nat = bytes_.get('>HH')
+        add('.enclosing method {} {}'.format(poolm.classref(cls), poolm.multiref(nat)))
+        return
+    disCFMAttribute(name_ind, name, bytes_, add, poolm)
+
+def disassemble(cls):
+    lines = []
+    add = lines.append
+    poolm = PoolManager(cls.cpool)
+
+    # def add(s): print s
+    add('.version {0[0]} {0[1]}'.format(cls.version))
+
+    class_attributes = getAttributesDict(cls)
+    if 'SourceFile' in class_attributes:
+        bytes_ = binUnpacker(class_attributes['SourceFile'].pop()[1])
+        val_ind = bytes_.get('>H')
+        add('.source {}'.format(poolm.utfref(val_ind)))
+
+    if 'BootstrapMethods' in class_attributes:
+        bytes_ = binUnpacker(class_attributes['BootstrapMethods'].pop()[1])
+        count = bytes_.get('>H')
+        for _ in range(count):
+            arg1, argc = bytes_.get('>HH')
+            args = (arg1,) + bytes_.get('>'+'H'*argc, forceTuple=True)
+            poolm.bootstrap_methods.append(args)
+
+    cflags = ' '.join(map(str.lower, cls.flags))
+    add('.class {} {}'.format(cflags, poolm.classref(cls.this)))
+    add('.super {}'.format(poolm.classref(cls.super)))
+    for ii in cls.interfaces_raw:
+        add('.implements {}'.format(poolm.classref(ii)))
+
+    for name in class_attributes:
+        if name == "InnerClasses":
+            assert(len(class_attributes[name]) == 1)
+            for name_ind, (length, attr) in class_attributes[name]:
+                disInnerClassesAttribute(name_ind, length, binUnpacker(attr), add, poolm)
+        else:
+            for name_ind, attr in class_attributes[name]:
+                disOtherClassAttribute(name_ind, name, binUnpacker(attr), add, poolm)
+
+    add('')
+    for field in cls.fields:
+        fflags = ' '.join(map(str.lower, field.flags))
+        const = getConstValue(field)
+
+        if const is not None:
+            add('.field {} {} {} = {}'.format(fflags, poolm.utfref(field.name_id), poolm.utfref(field.desc_id), poolm.ldc(const)))
+        else:
+            add('.field {} {} {}'.format(fflags, poolm.utfref(field.name_id), poolm.utfref(field.desc_id)))
+
+        facount = 0
+        for name_ind, name, attr in getAttributeTriples(field):
+            if name == 'ConstantValue' and field.static:
+                continue
+            disMethodAttribute(name_ind, name, binUnpacker(attr), add, poolm)
+            facount += 1
+        if facount > 0:
+            add('.end field')
+            add('')
+
+    add('')
+
+    for method in cls.methods:
+        disMethod(method, add, poolm)
+        add('')
+
+    poolm.printConstDefs(add)
+    return '\n'.join(lines)
\ No newline at end of file
diff --git a/own/krakatau/Krakatau/Krakatau/assembler/instructions.py b/own/krakatau/Krakatau/Krakatau/assembler/instructions.py
new file mode 100644
--- /dev/null
+++ b/own/krakatau/Krakatau/Krakatau/assembler/instructions.py
@@ -0,0 +1,11 @@
+instrs_noarg = ('nop', 'aconst_null', 'iconst_m1', 'iconst_0', 'iconst_1', 'iconst_2', 'iconst_3', 'iconst_4', 'iconst_5', 'lconst_0', 'lconst_1', 'fconst_0', 'fconst_1', 'fconst_2', 'dconst_0', 'dconst_1', 'iload_0', 'iload_1', 'iload_2', 'iload_3', 'lload_0', 'lload_1', 'lload_2', 'lload_3', 'fload_0', 'fload_1', 'fload_2', 'fload_3', 'dload_0', 'dload_1', 'dload_2', 'dload_3', 'aload_0', 'aload_1', 'aload_2', 'aload_3', 'iaload', 'laload', 'faload', 'daload', 'aaload', 'baload', 'caload', 'saload', 'istore_0', 'istore_1', 'istore_2', 'istore_3', 'lstore_0', 'lstore_1', 'lstore_2', 'lstore_3', 'fstore_0', 'fstore_1', 'fstore_2', 'fstore_3', 'dstore_0', 'dstore_1', 'dstore_2', 'dstore_3', 'astore_0', 'astore_1', 'astore_2', 'astore_3', 'iastore', 'lastore', 'fastore', 'dastore', 'aastore', 'bastore', 'castore', 'sastore', 'pop', 'pop2', 'dup', 'dup_x1', 'dup_x2', 'dup2', 'dup2_x1', 'dup2_x2', 'swap', 'iadd', 'ladd', 'fadd', 'dadd', 'isub', 'lsub', 'fsub', 'dsub', 'imul', 'lmul', 'fmul', 'dmul', 'idiv', 'ldiv', 'fdiv', 'ddiv', 'irem', 'lrem', 'frem', 'drem', 'ineg', 'lneg', 'fneg', 'dneg', 'ishl', 'lshl', 'ishr', 'lshr', 'iushr', 'lushr', 'iand', 'land', 'ior', 'lor', 'ixor', 'lxor', 'i2l', 'i2f', 'i2d', 'l2i', 'l2f', 'l2d', 'f2i', 'f2l', 'f2d', 'd2i', 'd2l', 'd2f', 'i2b', 'i2c', 'i2s', 'lcmp', 'fcmpl', 'fcmpg', 'dcmpl', 'dcmpg', 'ireturn', 'lreturn', 'freturn', 'dreturn', 'areturn', 'return', 'arraylength', 'athrow', 'monitorenter', 'monitorexit')
+
+instrs_int = ('bipush', 'sipush', 'iload', 'lload', 'fload', 'dload', 'aload', 'istore', 'lstore', 'fstore', 'dstore', 'astore', 'ret')
+
+instrs_lbl = ('ifeq', 'ifne', 'iflt', 'ifge', 'ifgt', 'ifle', 'if_icmpeq', 'if_icmpne', 'if_icmplt', 'if_icmpge', 'if_icmpgt', 'if_icmple', 'if_acmpeq', 'if_acmpne', 'goto', 'jsr', 'ifnull', 'ifnonnull', 'goto_w', 'jsr_w')
+
+instrs_cp = ('ldc', 'ldc_w', 'ldc2_w', 'getstatic', 'putstatic', 'getfield', 'putfield', 'invokevirtual', 'invokespecial', 'invokestatic', 'invokedynamic', 'new', 'anewarray', 'checkcast', 'instanceof')
+
+instrs_other = ('iinc', 'tableswitch', 'lookupswitch', 'invokeinterface', 'newarray', 'wide', 'multianewarray')
+
+allinstructions = ('nop', 'aconst_null', 'iconst_m1', 'iconst_0', 'iconst_1', 'iconst_2', 'iconst_3', 'iconst_4', 'iconst_5', 'lconst_0', 'lconst_1', 'fconst_0', 'fconst_1', 'fconst_2', 'dconst_0', 'dconst_1', 'bipush', 'sipush', 'ldc', 'ldc_w', 'ldc2_w', 'iload', 'lload', 'fload', 'dload', 'aload', 'iload_0', 'iload_1', 'iload_2', 'iload_3', 'lload_0', 'lload_1', 'lload_2', 'lload_3', 'fload_0', 'fload_1', 'fload_2', 'fload_3', 'dload_0', 'dload_1', 'dload_2', 'dload_3', 'aload_0', 'aload_1', 'aload_2', 'aload_3', 'iaload', 'laload', 'faload', 'daload', 'aaload', 'baload', 'caload', 'saload', 'istore', 'lstore', 'fstore', 'dstore', 'astore', 'istore_0', 'istore_1', 'istore_2', 'istore_3', 'lstore_0', 'lstore_1', 'lstore_2', 'lstore_3', 'fstore_0', 'fstore_1', 'fstore_2', 'fstore_3', 'dstore_0', 'dstore_1', 'dstore_2', 'dstore_3', 'astore_0', 'astore_1', 'astore_2', 'astore_3', 'iastore', 'lastore', 'fastore', 'dastore', 'aastore', 'bastore', 'castore', 'sastore', 'pop', 'pop2', 'dup', 'dup_x1', 'dup_x2', 'dup2', 'dup2_x1', 'dup2_x2', 'swap', 'iadd', 'ladd', 'fadd', 'dadd', 'isub', 'lsub', 'fsub', 'dsub', 'imul', 'lmul', 'fmul', 'dmul', 'idiv', 'ldiv', 'fdiv', 'ddiv', 'irem', 'lrem', 'frem', 'drem', 'ineg', 'lneg', 'fneg', 'dneg', 'ishl', 'lshl', 'ishr', 'lshr', 'iushr', 'lushr', 'iand', 'land', 'ior', 'lor', 'ixor', 'lxor', 'iinc', 'i2l', 'i2f', 'i2d', 'l2i', 'l2f', 'l2d', 'f2i', 'f2l', 'f2d', 'd2i', 'd2l', 'd2f', 'i2b', 'i2c', 'i2s', 'lcmp', 'fcmpl', 'fcmpg', 'dcmpl', 'dcmpg', 'ifeq', 'ifne', 'iflt', 'ifge', 'ifgt', 'ifle', 'if_icmpeq', 'if_icmpne', 'if_icmplt', 'if_icmpge', 'if_icmpgt', 'if_icmple', 'if_acmpeq', 'if_acmpne', 'goto', 'jsr', 'ret', 'tableswitch', 'lookupswitch', 'ireturn', 'lreturn', 'freturn', 'dreturn', 'areturn', 'return', 'getstatic', 'putstatic', 'getfield', 'putfield', 'invokevirtual', 'invokespecial','invokestatic', 'invokeinterface', 'invokedynamic', 'new', 'newarray', 'anewarray', 'arraylength', 'athrow', 'checkcast', 'instanceof', 'monitorenter', 'monitorexit', 'wide', 'multianewarray', 'ifnull', 'ifnonnull', 'goto_w', 'jsr_w')
\ No newline at end of file
diff --git a/own/krakatau/Krakatau/Krakatau/assembler/parse.py b/own/krakatau/Krakatau/Krakatau/assembler/parse.py
new file mode 100644
--- /dev/null
+++ b/own/krakatau/Krakatau/Krakatau/assembler/parse.py
@@ -0,0 +1,554 @@
+import ast, struct
+import itertools
+
+from ..classfile import ClassFile
+from ..method import Method
+from ..field import Field
+
+#Important to import tokens here even though it appears unused, as ply uses it
+from .tokenize import tokens, wordget, flags
+from .assembler import PoolRef
+
+#Specify the starting symbol
+start = 'top'
+
+###############################################################################
+name_counter = itertools.count()
+def addRule(func, name, *rhs_rules):
+    def _inner(p):
+        func(p)
+    _inner.__doc__ = name + ' : ' + '\n| '.join(rhs_rules)
+    fname = 'p_{}'.format(next(name_counter))
+    globals()[fname] = _inner
+
+def list_sub(p):p[0] = p[1] + p[2:]
+def listRule(name): #returns a list
+    name2 = name + 's'
+    addRule(list_sub, name2, '{} {}'.format(name2, name), 'empty')    
+
+def nothing(p):pass
+def assign1(p):p[0] = p[1]
+def assign2(p):p[0] = p[2]
+def upper1(p): p[0] = p[1].upper()
+
+# Common Rules ################################################################
+addRule(nothing, 'sep', 'sep NEWLINE', 'NEWLINE')
+
+def p_empty(p):
+    'empty :'
+    p[0] = []
+
+def p_intl(p):
+    '''intl : INT_LITERAL'''
+    p[0] = ast.literal_eval(p[1])
+
+def p_longl(p):
+    '''longl : LONG_LITERAL'''
+    p[0] = ast.literal_eval(p[1][:-1])
+
+#Todo - find a better way of handling floats
+def parseFloat(s):
+    s = s[:-1]
+    if s.strip('-')[:2].lower() == '0x':
+        f = float.fromhex(s)
+    else:
+        f = float(s)
+    return struct.unpack('>i', struct.pack('>f', f))[0]
+
+def parseDouble(s):
+    if s.strip('-')[:2].lower() == '0x':
+        f = float.fromhex(s)
+    else:
+        f = float(s)
+    return struct.unpack('>q', struct.pack('>d', f))[0]
+
+def p_floatl(p):
+    '''floatl : FLOAT_LITERAL'''
+    p[0] = parseFloat(p[1])
+def p_doublel(p):
+    '''doublel : DOUBLE_LITERAL'''
+    p[0] = parseDouble(p[1])
+
+#We can allow keywords as inline classnames as long as they aren't flag names
+#which would be ambiguous. We don't allow directives to simplfy the grammar
+#rules, since they wouldn't be valid identifiers anyway.
+badwords = frozenset(map(str.lower, flags))
+badwords |= frozenset(k for k in wordget if '.' in k) 
+oktokens = frozenset(v for k,v in wordget.items() if k not in badwords)
+addRule(assign1, 'notflag', 'WORD', 'STRING_LITERAL', *oktokens)
+
+def p_ref(p):
+    '''ref : CPINDEX'''
+    s = p[1][1:-1]
+    try:
+        i = int(s)
+        if 0 <= i <= 0xFFFF:
+            p[0] = PoolRef(index=i)
+        else:
+            p[0] = PoolRef(lbl=s)    
+    except ValueError:
+        p[0] = PoolRef(lbl=s)
+
+def p_utf8_notref(p):
+    '''utf8_notref : notflag'''
+    p[0] = PoolRef('Utf8', p[1])
+
+def p_class_notref(p):
+    '''class_notref : utf8_notref'''
+    p[0] = PoolRef('Class', p[1])
+
+def p_string_notref(p):
+    '''string_notref : utf8_notref'''
+    p[0] = PoolRef('String', p[1])
+
+def p_nat_notref(p):
+    '''nameandtype_notref : utf8ref utf8ref'''
+    p[0] = PoolRef('NameAndType', p[1], p[2])
+
+def p_field_notref(p):
+    '''field_notref : classref nameandtyperef'''
+    p[0] = PoolRef('Field', p[1], p[2])
+
+def p_method_notref(p):
+    '''method_notref : classref nameandtyperef'''
+    p[0] = PoolRef('Method', p[1], p[2])
+
+def p_imethod_notref(p):
+    '''interfacemethod_notref : classref nameandtyperef'''
+    p[0] = PoolRef('InterfaceMethod', p[1], p[2])
+
+#constant pool types related to InvokeDynamic handled later
+
+for _name in ('utf8','class', 'nameandtype', 'method', 'interfacemethod', 'methodhandle'):
+    addRule(assign1, '{}ref'.format(_name), '{}_notref'.format(_name), 'ref')
+
+###############################################################################
+def p_classnoend(p):
+    '''classnoend : version_opt class_directive_lines classdec superdec interfacedecs class_directive_lines topitems'''
+    p[0] = tuple(p[1:])
+
+addRule(assign1, 'classwithend', 'classnoend D_END CLASS sep')
+listRule('classwithend')
+
+def p_top(p):
+    '''top : sep classwithends classnoend'''
+    p[0] = p[2] + [p[3]]
+#case where all classes have an end
+addRule(assign2, 'top', 'sep classwithends')
+
+def p_version(p):
+    '''version_opt : D_VERSION intl intl sep'''
+    p[0] = p[2], p[3]
+addRule(assign1, 'version_opt', 'empty')
+
+###############################################################################
+for c, type_ in zip('cmf', (ClassFile, Method, Field)):
+    _name = "{}flag".format(c)
+    addRule(upper1, _name, *list(type_.flagVals))
+    listRule(_name)
+
+def p_classdec(p):
+    '''classdec : D_CLASS cflags classref sep 
+                | D_INTERFACE cflags classref sep'''
+    #if interface, add interface to flags
+    p[0] = (p[1] == '.interface'), p[2], p[3]
+
+addRule(assign2, 'superdec', 'D_SUPER classref sep')
+addRule(assign2, 'interfacedec', 'D_IMPLEMENTS classref sep')
+listRule('interfacedec')
+
+addRule(assign1, 'class_directive', 'classattribute', 'innerlength_dir')
+addRule(assign1, 'class_directive_line', 'class_directive sep')
+listRule('class_directive_line')
+
+def p_topitem_c(p):
+    '''topitem : const_spec'''
+    p[0] = 'const', p[1]
+def p_topitem_f(p):
+    '''topitem : field_spec'''
+    p[0] = 'field', p[1]
+def p_topitem_m(p):
+    '''topitem : method_spec'''
+    p[0] = 'method', p[1]
+listRule('topitem')
+
+###############################################################################
+#invoke dynamic stuff
+from .codes import handle_codes
+_handle_token_types = set(wordget.get(x, 'WORD') for x in handle_codes)
+def p_handle(p):
+    p[0] = handle_codes[p[1]]
+p_handle.__doc__ = "handlecode : " + '\n| '.join(_handle_token_types)
+
+#The second argument's type depends on the code, so we require an explicit reference for simplicity
+def p_methodhandle_notref(p):
+    '''methodhandle_notref : handlecode ref'''
+    p[0] = PoolRef('MethodHandle', p[1], p[2])
+
+def p_methodtype_notref(p):
+    '''methodtype_notref : utf8_notref'''
+    p[0] = PoolRef('Methodtype', p[1])
+
+addRule(assign1, 'bootstrap_arg', 'ref') #TODO - allow inline constants and strings?
+listRule('bootstrap_arg')
+
+def p_invokedynamic_notref(p):
+    '''invokedynamic_notref : methodhandleref bootstrap_args COLON nameandtyperef'''
+    args = [p[1]] + p[2] + [p[4]]
+    p[0] = PoolRef('InvokeDynamic', *args)
+
+###############################################################################
+def p_const_spec(p):
+    '''const_spec : D_CONST ref EQUALS const_rhs sep'''
+    p[0] = p[2], p[4]
+
+def assignPoolSingle(typen):
+    def inner(p):
+        p[0] = PoolRef(typen, p[2])
+    return inner
+
+addRule(assign1, 'const_rhs', 'ref')
+for tt in ['UTF8', 'CLASS','STRING','NAMEANDTYPE','FIELD','METHOD','INTERFACEMETHOD',
+            'METHODHANDLE','METHODTYPE','INVOKEDYNAMIC']:
+    addRule(assign2, 'const_rhs', '{} {}_notref'.format(tt, tt.lower()))
+
+#these are special cases, since they take a single argument
+#and the notref version can't have a ref as its argument due to ambiguity
+for ptype in ('Class','String','MethodType'):
+    addRule(assignPoolSingle(ptype), 'const_rhs', ptype.upper() + ' ref')
+
+for ptype in ('Int','Float','Long','Double'):
+    addRule(assignPoolSingle(ptype), 'const_rhs', '{} {}l'.format(ptype.upper(), ptype.lower()))
+###############################################################################
+
+
+def p_field_spec(p):
+    '''field_spec : D_FIELD fflags utf8ref utf8ref field_constval fieldattribute_list'''
+    p[0] = p[2:7]
+
+addRule(nothing, 'field_constval', 'empty')
+addRule(assign2, 'field_constval', 'EQUALS ref', 
+                                    'EQUALS ldc1_notref', 
+                                    'EQUALS ldc2_notref')
+
+#Sadly, we must only allow .end field when at least one attribute is specified
+#in order to avoid grammatical ambiguity. JasminXT does not share this problem
+#because it lacks the .end class syntax which causes the conflict
+def p_field_attrlist1(p):
+    '''field_al_nonempty : fieldattribute sep field_al_nonempty'''
+    p[0] = [p[1]]+ p[3]
+def p_field_attrlist2(p):
+    '''field_al_nonempty : fieldattribute sep D_END FIELD sep'''
+    p[0] = [p[1]]
+
+addRule(assign2, 'fieldattribute_list', 'sep field_al_nonempty', 'sep empty')
+
+
+def p_method_spec(p):
+    '''method_spec : defmethod statements endmethod'''
+    p[0] = p[1],p[2]
+
+def p_defmethod_0(p):
+    '''defmethod : D_METHOD mflags jas_meth_namedesc sep'''
+    p[0] = p[2],p[3] 
+def p_defmethod_1(p):
+    '''defmethod : D_METHOD mflags utf8ref COLON utf8ref sep'''
+    p[0] = p[2],(p[3], p[5]) 
+
+def p_jas_meth_namedesc(p):
+    '''jas_meth_namedesc : WORD'''
+    name, paren, desc = p[1].rpartition('(')
+    name = PoolRef('Utf8', name)
+    desc = PoolRef('Utf8', paren+desc)
+    p[0] = name, desc
+addRule(nothing, 'endmethod', 'D_END METHOD sep')
+
+def p_statement_0(p):
+    '''statement : method_directive sep'''
+    p[0] = False, p[1]
+def p_statement_1(p):
+    '''statement : code_directive sep'''
+    p[0] = True, (False, p[1])
+def p_statement_2(p):
+    '''statement : empty instruction sep 
+                | lbldec instruction sep
+                | lbldec sep'''
+    p[0] = True, (True, ((p[1] or None), p[2]))
+listRule('statement')
+
+addRule(assign1, 'lbldec', 'lbl COLON')
+addRule(assign1, 'method_directive', 'methodattribute')
+addRule(assign1, 'code_directive', 'limit_dir', 'except_dir','localvar_dir','linenumber_dir','stack_dir', 'generic_codeattribute_dir')
+
+def p_limit_dir(p):
+    '''limit_dir : D_LIMIT LOCALS intl 
+                | D_LIMIT STACK intl'''
+    p[0] = p[1], (p[2], p[3])
+
+def p_except_dir(p):
+    '''except_dir : D_CATCH classref FROM lbl TO lbl USING lbl'''
+    p[0] = p[1], (p[2], p[4], p[6], p[8])
+
+def p_linenumber_dir(p):
+    '''linenumber_dir : D_LINE intl'''
+    p[0] = p[1], p[2]
+
+def p_localvar_dir(p):
+    '''localvar_dir : D_VAR intl IS utf8ref utf8ref FROM lbl TO lbl'''
+    p[0] = p[1], (p[2], p[4], p[5], p[7], p[9])
+
+def p_instruction(p):
+    '''instruction : OP_NONE
+                    | OP_INT intl
+                    | OP_INT_INT intl intl
+                    | OP_LBL lbl
+                    | OP_FIELD fieldref_or_jas
+                    | OP_METHOD methodref_or_jas
+                    | OP_METHOD_INT imethodref_or_jas intl
+                    | OP_DYNAMIC ref
+                    | OP_CLASS classref
+                    | OP_CLASS_INT classref intl
+                    | OP_LDC1 ldc1_ref
+                    | OP_LDC2 ldc2_ref
+                    | OP_NEWARR nacode
+                    | OP_LOOKUPSWITCH luswitch
+                    | OP_TABLESWITCH tblswitch
+                    | OP_WIDE wide_instr
+                    '''
+    if p[1] == 'invokenonvirtual':
+        p[1] = 'invokespecial'
+    p[0] = tuple(p[1:])
+    #these instructions have 0 padding at the end
+    #this is kind of an ungly hack, but the best way I could think of
+    if p[1] in ('invokeinterface','invokedynamic'):
+        p[0] += (0,)
+
+addRule(assign1, 'lbl', 'WORD')
+addRule(assign1, 'fieldref_or_jas', 'jas_fieldref', 'ref', 'inline_fieldref')
+def p_jas_fieldref(p):
+    '''jas_fieldref : WORD WORD'''
+    class_, sep, name = p[1].replace('.','/').rpartition('/')
+
+    desc = PoolRef('Utf8', p[2])
+    class_ = PoolRef('Class', PoolRef('Utf8', class_))
+    name = PoolRef('Utf8', name)
+    nt = PoolRef('NameAndType', name, desc)
+    p[0] = PoolRef('Field', class_, nt)
+
+#This is an ugly hack to work around the fact that Jasmin syntax would otherwise be impossible to 
+#handle with a LALR(1) parser
+def p_inline_fieldref_1(p):
+    '''inline_fieldref : WORD nameandtyperef
+                        | STRING_LITERAL nameandtyperef'''
+    class_ = PoolRef('Class', PoolRef('Utf8', p[1]))
+    p[0] = PoolRef('Field', class_, p[2])
+def p_inline_fieldref_2(p):
+    '''inline_fieldref : ref nameandtyperef'''
+    p[0] = PoolRef('Field', p[1], p[2])
+
+
+def p_jas_meth_classnamedesc(p):
+    '''jas_methodref : WORD'''
+    name, paren, desc = p[1].rpartition('(')
+    class_, sep, name = name.replace('.','/').rpartition('/')
+    desc = paren + desc
+
+    class_ = PoolRef('Class', PoolRef('Utf8', class_))
+    nt = PoolRef('NameAndType', PoolRef('Utf8', name), PoolRef('Utf8', desc))
+    p[0] = class_, nt
+
+addRule(assign1, 'methodref_or_jas', 'methodref')
+def p_methodref_or_jas(p):
+    '''methodref_or_jas : jas_methodref'''
+    p[0] = PoolRef('Method', *p[1])
+
+addRule(assign1, 'imethodref_or_jas', 'interfacemethodref')
+def p_imethodref_or_jas(p):
+    '''imethodref_or_jas : jas_methodref'''
+    p[0] = PoolRef('InterfaceMethod', *p[1])
+
+
+from .codes import newarr_codes
+_newarr_token_types = set(wordget.get(x, 'WORD') for x in newarr_codes)
+def p_nacode(p):
+    p[0] = newarr_codes[p[1]]
+p_nacode.__doc__ = "nacode : " + '\n| '.join(_newarr_token_types)
+
+addRule(assign1, 'ldc1_ref', 'ldc1_notref', 'ref')
+def p_ldc1_notref_string(p):
+    '''ldc1_notref : STRING_LITERAL'''
+    p[0] = PoolRef('String', PoolRef('Utf8', p[1]))
+def p_ldc1_notref_int(p):
+    '''ldc1_notref : intl'''
+    p[0] = PoolRef('Int', p[1])
+def p_ldc1_notref_float(p):
+    '''ldc1_notref : floatl'''
+    p[0] = PoolRef('Float', p[1])
+
+addRule(assign1, 'ldc2_ref', 'ldc2_notref', 'ref')
+def p_ldc2_notref_long(p):
+    '''ldc2_notref : longl'''
+    p[0] = PoolRef('Long', p[1])
+def p_ldc2_notref_double(p):
+    '''ldc2_notref : doublel'''
+    p[0] = PoolRef('Double', p[1])
+
+def p_defaultentry(p):
+    '''defaultentry : DEFAULT COLON lbl'''
+    p[0] = p[3]
+
+def p_luentry(p):
+    '''luentry : intl COLON lbl sep'''
+    p[0] = p[1], p[3]
+listRule('luentry')
+
+addRule(assign1, 'tblentry', 'lbl sep')
+listRule('tblentry')
+
+def p_lookupswitch(p):
+    '''luswitch : empty sep luentrys defaultentry'''
+    p[0] = p[1], p[3], p[4]
+
+def p_tableswitch(p):
+    '''tblswitch : intl sep tblentrys defaultentry'''
+    p[0] = p[1], p[3], p[4]
+
+def p_wide_instr(p):
+    '''wide_instr : OP_INT intl
+                | OP_INT_INT intl intl'''
+    p[0] = p[1], tuple(p[2:])
+
+#######################################################################
+# Explicit Attributes
+addRule(assign1, 'cfmattribute', 'annotation_dir', 'signature_dir', 'generic_attribute_dir')
+addRule(assign1, 'classattribute', 'cfmattribute', 'sourcefile_dir', 'inner_dir', 'enclosing_dir')
+addRule(assign1, 'fieldattribute', 'cfmattribute')
+addRule(assign1, 'methodattribute', 'cfmattribute', 'throws_dir', 'annotation_param_dir', 'annotation_def_dir')
+
+#Class, field, method
+def p_annotation_dir(p):
+    '''annotation_dir : D_RUNTIMEVISIBLE annotation
+                    | D_RUNTIMEINVISIBLE annotation'''
+    p[0] = p[1], (None, p[2])
+
+def p_signature_dir(p):
+    '''signature_dir : D_SIGNATURE utf8ref'''
+    p[0] = p[1], p[2]
+
+#Class only
+def p_sourcefile_dir(p):
+    '''sourcefile_dir : D_SOURCE utf8ref'''
+    p[0] = p[1], p[2]
+
+def p_inner_dir(p): 
+    '''inner_dir : D_INNER cflags utf8ref classref classref'''
+    p[0] = p[1], (p[4],p[5],p[3],p[2]) #use JasminXT's (flags, name, inner, outer) order but switch internally to correct order
+
+def p_enclosing_dir(p): 
+    '''enclosing_dir : D_ENCLOSING METHOD classref nameandtyperef'''
+    p[0] = p[1], (p[3], p[4])
+
+#This is included here even though strictly speaking, it's not an attribute. Rather it's a directive that affects the assembly
+#of the InnerClasses attribute
+def p_innerlength_dir(p): 
+    '''innerlength_dir : D_INNERLENGTH intl'''
+    p[0] = p[1], p[2]
+
+
+#Method only
+def p_throws_dir(p):
+    '''throws_dir : D_THROWS classref'''
+    p[0] = p[1], p[2]
+
+def p_annotation_param_dir(p):
+    '''annotation_param_dir : D_RUNTIMEVISIBLE PARAMETER intl annotation
+                           | D_RUNTIMEINVISIBLE PARAMETER intl annotation'''
+    p[0] = p[1], (p[3], p[4])
+def p_annotation_def_dir(p):
+    '''annotation_def_dir : D_ANNOTATIONDEFAULT element_value'''
+    p[0] = p[1], p[2]
+
+#Generic
+def p_generic_attribute_dir(p): 
+    '''generic_attribute_dir : D_ATTRIBUTE utf8ref STRING_LITERAL'''
+    p[0] = p[1], (p[2], p[3])
+
+def p_generic_codeattribute_dir(p): 
+    '''generic_codeattribute_dir : D_CODEATTRIBUTE utf8ref STRING_LITERAL'''
+    p[0] = p[1], (p[2], p[3])
+
+#######################################################################
+#Stack map stuff
+addRule(nothing, 'endstack', 'D_END STACK') #directives are not expected to end with a sep
+
+def assign1All(p):p[0] = tuple(p[1:])
+addRule(assign1All, 'verification_type', 'TOP', 'INTEGER', 'FLOAT', 'DOUBLE', 'LONG', 'NULL', 'UNINITIALIZEDTHIS',
+                                        'OBJECT classref', 'UNINITIALIZED lbl')
+listRule('verification_type')
+addRule(assign2, 'locals_vtlist', 'LOCALS verification_types sep')
+addRule(assign2, 'stack_vtlist', 'STACK verification_types sep')
+
+def p_stack_dir(p):
+    '''stack_dir_rest : SAME 
+                    | SAME_EXTENDED
+                    | CHOP intl 
+                    | SAME_LOCALS_1_STACK_ITEM sep stack_vtlist endstack
+                    | SAME_LOCALS_1_STACK_ITEM_EXTENDED sep stack_vtlist endstack
+                    | APPEND sep locals_vtlist endstack
+                    | FULL sep locals_vtlist stack_vtlist endstack'''
+    p[0] = '.stackmap', tuple(p[1:])
+addRule(assign2, 'stack_dir', 'D_STACK stack_dir_rest')
+#######################################################################
+#Annotation stuff
+from .codes import et_tags
+primtags = set(wordget.get(x, 'WORD') for x in 'byte char double int float long short boolean string'.split())
+addRule(assign1, 'primtag', *primtags)
+addRule(assign1, 'ldc_any', 'ldc1_notref', 'ldc2_notref', 'ref')
+
+def p_element_value_0(p):
+    '''element_value : primtag ldc_any
+                    | CLASS utf8ref
+                    | ENUM utf8ref utf8ref
+                    | ARRAY sep element_array'''
+    p[0] = et_tags[p[1]], tuple(p[2:])
+def p_element_value_1(p):
+    '''element_value : annotation'''
+    p[0] = '@', (p[1],)
+
+addRule(assign1, 'element_value_line', 'element_value sep')
+listRule('element_value_line')
+addRule(assign1, 'element_array', 'element_value_lines D_END ARRAY')
+
+def p_key_ev_line(p):
+    '''key_ev_line : utf8ref EQUALS element_value_line'''
+    p[0] = p[1], p[3]
+listRule('key_ev_line')
+
+def p_annotation(p):
+    '''annotation : ANNOTATION utf8ref sep key_ev_lines D_END ANNOTATION'''
+    p[0] = p[2], p[4]
+#######################################################################
+
+def p_error(p):
+    if p is None:
+        print "Syntax error: unexpected EOF"
+    else: #remember to subtract 1 from line number since we had a newline at the start of the file
+        print "Syntax error at line {}: unexpected token {!r}".format(p.lineno-1, p.value)
+    
+    #Ugly hack since Ply doesn't provide any useful error information
+    import inspect
+    frame = inspect.currentframe()
+    cvars = frame.f_back.f_locals
+    print 'Expected:', ', '.join(cvars['actions'][cvars['state']].keys())
+    print 'Found:', cvars['ltype']
+    print 'Current stack:', cvars['symstack']
+
+    #Discard the rest of the input so that Ply doesn't attempt error recovery
+    from ply import yacc
+    tok = yacc.token()
+    while tok is not None:
+        tok = yacc.token()
+
+def makeParser(**kwargs):
+    from ply import yacc
+    return yacc.yacc(**kwargs)
\ No newline at end of file
diff --git a/own/krakatau/Krakatau/Krakatau/assembler/tokenize.py b/own/krakatau/Krakatau/Krakatau/assembler/tokenize.py
new file mode 100644
--- /dev/null
+++ b/own/krakatau/Krakatau/Krakatau/assembler/tokenize.py
@@ -0,0 +1,140 @@
+import ast
+
+from ..classfile import ClassFile
+from ..method import Method
+from ..field import Field
+from .. import constant_pool
+from . import instructions as ins
+from . import codes
+
+directives = 'CLASS','INTERFACE','SUPER','IMPLEMENTS','CONST','FIELD','METHOD','END','LIMIT','CATCH','SOURCE','LINE','VAR','THROWS',
+directives += 'VERSION', 'STACK', 'RUNTIMEVISIBLE', 'RUNTIMEINVISIBLE', 'ANNOTATIONDEFAULT', 'INNER', 'ENCLOSING', 'SIGNATURE', 
+directives += 'ATTRIBUTE', 'CODEATTRIBUTE', 'INNERLENGTH'
+keywords = ['CLASS','METHOD','FIELD','LOCALS','STACK','FROM','TO','USING','DEFAULT','IS']
+keywords += ['SAME','SAME_LOCALS_1_STACK_ITEM','SAME_LOCALS_1_STACK_ITEM_EXTENDED','CHOP','SAME_EXTENDED','APPEND','FULL']
+keywords += ['ANNOTATION','ARRAY','PARAMETER']
+flags = ClassFile.flagVals.keys() + Method.flagVals.keys() + Field.flagVals.keys()
+
+lowwords = set().union(keywords, flags)
+casewords = set().union(codes.vt_keywords, constant_pool.name2Type.keys())
+
+wordget = {}
+wordget.update({w.lower():w.upper() for w in lowwords})
+wordget.update({w:w.upper() for w in casewords})
+wordget.update({'.'+w.lower():'D_'+w for w in directives})
+
+assert(set(wordget).isdisjoint(ins.allinstructions))
+for op in ins.instrs_noarg:
+    wordget[op] = 'OP_NONE'
+for op in ins.instrs_int:
+    wordget[op] = 'OP_INT'
+for op in ins.instrs_lbl:
+    wordget[op] = 'OP_LBL'
+for op in ('getstatic', 'putstatic', 'getfield', 'putfield'):
+    wordget[op] = 'OP_FIELD'
+#support invokenonvirtual for backwards compatibility with Jasmin
+for op in ('invokevirtual', 'invokespecial', 'invokestatic', 'invokenonvirtual'): 
+    wordget[op] = 'OP_METHOD'
+for op in ('new', 'anewarray', 'checkcast', 'instanceof'):
+    wordget[op] = 'OP_CLASS'
+for op in ('wide','lookupswitch','tableswitch'):
+    wordget[op] = 'OP_' + op.upper()
+
+wordget['ldc'] = 'OP_LDC1'
+wordget['ldc_w'] = 'OP_LDC1'
+wordget['ldc2_w'] = 'OP_LDC2'
+wordget['iinc'] = 'OP_INT_INT'
+wordget['newarray'] = 'OP_NEWARR'
+wordget['multianewarray'] = 'OP_CLASS_INT'
+wordget['invokeinterface'] = 'OP_METHOD_INT'
+wordget['invokedynamic'] = 'OP_DYNAMIC'
+
+for op in ins.allinstructions:
+    wordget.setdefault(op,op.upper())
+
+#special PLY value


More information about the pypy-commit mailing list