[pypy-svn] r49590 - pypy/dist/pypy/rlib

cfbolz at codespeak.net cfbolz at codespeak.net
Mon Dec 10 13:07:58 CET 2007


Author: cfbolz
Date: Mon Dec 10 13:07:57 2007
New Revision: 49590

Modified:
   pypy/dist/pypy/rlib/rope.py
Log:
be lazier in calculating all the rope extra info


Modified: pypy/dist/pypy/rlib/rope.py
==============================================================================
--- pypy/dist/pypy/rlib/rope.py	(original)
+++ pypy/dist/pypy/rlib/rope.py	Mon Dec 10 13:07:57 2007
@@ -53,7 +53,6 @@
 
 class StringNode(object):
     hash_cache = 0
-    charbitmask = 0
     def length(self):
         raise NotImplementedError("base class")
 
@@ -69,6 +68,9 @@
     def hash_part(self):
         raise NotImplementedError("base class")
 
+    def charbitmask(self):
+        raise NotImplementedError("base class")
+
     def check_balanced(self):
         return True
 
@@ -115,23 +117,28 @@
 
 
 class LiteralStringNode(LiteralNode):
-    def __init__(self, s):
+    _is_ascii = False
+    _charbitmask = 0
+    def __init__(self, s, charbitmask=0, is_ascii=False):
         assert isinstance(s, str)
         self.s = s
-        is_ascii = True
-        charbitmask = 0
-        for c in s:
-            ordc = ord(c)
-            if ordc >= 128:
-                is_ascii = False
-            charbitmask |= 1 << (ordc & 0x1F)
-        self.charbitmask = charbitmask
-        self._is_ascii = is_ascii
+        if not s:
+            self._is_ascii = True
+            self._calculated = True
+        elif charbitmask:
+            self._charbitmask = charbitmask
+            self._is_ascii = is_ascii
+            self._calculated = True
+        else:
+            self._calculated = False
+
     
     def length(self):
         return len(self.s)
 
     def is_ascii(self):
+        if not self._calculated:
+            self._calculate()
         return self._is_ascii
 
     def is_bytestring(self):
@@ -154,6 +161,23 @@
             h = self.hash_cache = x
         return h
 
+    def _calculate(self):
+        is_ascii = True
+        charbitmask = 0
+        for c in self.s:
+            ordc = ord(c)
+            if ordc >= 128:
+                is_ascii = False
+            charbitmask |= intmask(1 << (ordc & 0x1F))
+        self._is_ascii = is_ascii
+        self._charbitmask = charbitmask
+        self._calculated = True
+
+    def charbitmask(self):
+        if not self._calculated:
+            self._calculate()
+        return self._charbitmask
+
     def getchar(self, index):
         return self.s[index]
 
@@ -171,7 +195,7 @@
             return False
         if self.is_ascii() and value > 127:
             return False
-        return (1 << (value & 0x1f)) & self.charbitmask
+        return (1 << (value & 0x1f)) & self.charbitmask()
 
     def getslice(self, start, stop):
         assert 0 <= start <= stop
@@ -203,6 +227,9 @@
         yield ('"%s" [shape=box,label="length: %s\\n%s"];' % (
             id(self), len(self.s),
             repr(addinfo).replace('"', '').replace("\\", "\\\\")))
+
+    def _freeze_(self):
+        self._calculate()
 LiteralStringNode.EMPTY = LiteralStringNode("")
 LiteralStringNode.PREBUILT = [LiteralStringNode(chr(i)) for i in range(256)]
 del i
@@ -212,13 +239,7 @@
     def __init__(self, u):
         assert isinstance(u, unicode)
         self.u = u
-        charbitmask = 0
-        for c in u:
-            ordc = ord(c)
-            if ordc >= 128:
-                charbitmask |= 1 # be compatible with LiteralStringNode
-            charbitmask |= 1 << (ordc & 0x1F)
-        self.charbitmask = charbitmask
+        self._charbitmask = 0
     
     def length(self):
         return len(self.u)
@@ -243,6 +264,22 @@
             h = self.hash_cache = x
         return h
 
+    def _calculate(self):
+        if len(self.u) == 0:
+            return
+        charbitmask = 0
+        for c in self.u:
+            ordc = ord(c)
+            if ordc >= 128:
+                charbitmask |= 1 # be compatible with LiteralStringNode
+            charbitmask |= intmask(1 << (ordc & 0x1F))
+        self._charbitmask = intmask(charbitmask)
+
+    def charbitmask(self):
+        if not self._charbitmask:
+            self._calculate()
+        return self._charbitmask
+
     def getunichar(self, index):
         return self.u[index]
 
@@ -258,7 +295,7 @@
         return LiteralUnicodeNode(unichr(ch))
 
     def can_contain_int(self, value):
-        return (1 << (value & 0x1f)) & self.charbitmask
+        return (1 << (value & 0x1f)) & self.charbitmask()
 
     def getslice(self, start, stop):
         assert 0 <= start <= stop
@@ -289,6 +326,8 @@
         yield ('"%s" [shape=box,label="length: %s\\n%s"];' % (
             id(self), len(self.u),
             repr(addinfo).replace('"', '').replace("\\", "\\\\")))
+    def _freeze_(self):
+        self._calculate()
 
 def make_binary_get(getter):
     def get(self, index):
@@ -303,46 +342,43 @@
     return get
 
 class BinaryConcatNode(StringNode):
-    def __init__(self, left, right):
+    def __init__(self, left, right, balanced=False):
         self.left = left
         self.right = right
         try:
             self.len = ovfcheck(left.length() + right.length())
         except OverflowError:
             raise
-        self._depth = max(left.depth(), right.depth()) + 1
-        self.balanced = False
-        self._is_ascii = left.is_ascii() and right.is_ascii()
-        self._is_bytestring = left.is_bytestring() and right.is_bytestring()
-        self.charbitmask = left.charbitmask | right.charbitmask
+        self.balanced = balanced
+        self._calculated = False
+        self._depth = 0
 
     def is_ascii(self):
+        if not self._calculated:
+            self._calculate()
         return self._is_ascii
 
     def is_bytestring(self):
+        if not self._calculated:
+            self._calculate()
         return self._is_bytestring
 
     def check_balanced(self):
         if self.balanced:
             return True
-        if not self.left.check_balanced() or not self.right.check_balanced():
-            return False
-        left = self.left
-        right = self.right
-        llen = left.length()
-        rlen = right.length()
-        ldepth = left.depth()
-        rdepth = right.depth()
-        balanced = (find_fib_index(self.len // (NEW_NODE_WHEN_LENGTH / 2)) >=
-                    self._depth)
-        self.balanced = balanced
-        return balanced
+        if not self._calculated:
+            self._calculate()
+        return self.balanced
 
     def length(self):
         return self.len
 
     def depth(self):
-        return self._depth
+        depth = self._depth
+        if not depth:
+            depth = self._depth = max(self.left.depth(),
+                                      self.right.depth()) + 1
+        return depth
 
     getchar = make_binary_get("getchar")
     getunichar = make_binary_get("getunichar")
@@ -354,7 +390,7 @@
             return False
         if self.is_ascii() and value > 127:
             return False
-        return (1 << (value & 0x1f)) & self.charbitmask
+        return (1 << (value & 0x1f)) & self.charbitmask()
 
     def getslice(self, start, stop):
         if start == 0:
@@ -385,7 +421,32 @@
             h = self.hash_cache = x
         return h
 
+    def _calculate(self):
+        left = self.left
+        right = self.right
+        self._is_ascii = left.is_ascii() and right.is_ascii()
+        self._is_bytestring = left.is_bytestring() and right.is_bytestring()
+        self._charbitmask = left.charbitmask() | right.charbitmask()
+        # balance calculation
+        # XXX improve?
+        if self.balanced:
+            balanced = True
+        elif not left.check_balanced() or not right.check_balanced():
+            balanced = False
+        else:
+            balanced = (find_fib_index(self.len // (NEW_NODE_WHEN_LENGTH / 2)) >=
+                        self._depth)
+        self.balanced = balanced
+        self._calculated = True
+
+    def charbitmask(self):
+        if not self._calculated:
+            self._calculate()
+        return self._charbitmask
+
     def rebalance(self):
+        if self.balanced:
+            return self
         return rebalance([self], self.len)
 
     def dot(self, seen, toplevel=False):
@@ -404,6 +465,8 @@
             yield '"%s" -> "%s";' % (id(self), id(child))
             for line in child.dot(seen):
                 yield line
+    def _freeze_(self):
+        self._calculate()
 
 def concatenate(node1, node2):
     if node1.length() == 0:
@@ -427,8 +490,8 @@
     if slicelength == -1:
         # XXX for testing only
         slicelength = len(xrange(start, stop, step))
+    start, stop, node = find_straddling(node, start, stop)
     if step != 1:
-        start, stop, node = find_straddling(node, start, stop)
         iter = SeekableItemIterator(node)
         iter.seekforward(start)
         if node.is_bytestring():
@@ -443,7 +506,7 @@
                 iter.seekforward(step - 1)
                 result.append(iter.nextunichar())
             return rope_from_unicharlist(result)
-    return getslice_one(node, start, stop)
+    return node.getslice(start, stop)
 
 def getslice_one(node, start, stop):
     start, stop, node = find_straddling(node, start, stop)
@@ -589,7 +652,6 @@
         if l[index] is not None:
             curr = BinaryConcatNode(l[index], curr)
     assert curr is not None
-    curr.check_balanced()
     return curr
 
 # __________________________________________________________________________



More information about the Pypy-commit mailing list