[pypy-commit] pypy utf8-unicode2: Fix translation

waedt noreply at buildbot.pypy.org
Sun Sep 7 00:33:11 CEST 2014


Author: Tyler Wade <wayedt at gmail.com>
Branch: utf8-unicode2
Changeset: r73355:327917b8c2e5
Date: 2014-09-06 17:32 -0500
http://bitbucket.org/pypy/pypy/changeset/327917b8c2e5/

Log:	Fix translation

diff --git a/pypy/interpreter/utf8.py b/pypy/interpreter/utf8.py
--- a/pypy/interpreter/utf8.py
+++ b/pypy/interpreter/utf8.py
@@ -124,7 +124,7 @@
         return s1 in s2
 
 class Utf8Str(object):
-    _immutable_fields_ = ['bytes', '_is_ascii', '_len']
+    _immutable_fields_ = ['bytes', '_is_ascii', '_len', '_cache_scheme']
 
     def __init__(self, data, is_ascii=False, length=-1):
         # TODO: Maybe I can determine is_ascii rather than have it passed in?
@@ -162,7 +162,12 @@
         self._len = length
 
     def byte_index_of_char(self, char):
-        return self._cache_scheme.byte_index_of_char(char)
+        if self._is_ascii:
+            return char
+
+        res = self._cache_scheme.byte_index_of_char(char)
+        assert res >= 0
+        return res
 
     def byte_index_of_char_from_known(self, char, start_char, start_byte):
         if start_char > char:
@@ -187,6 +192,9 @@
 
 
     def char_index_of_byte(self, byte_pos):
+        if self._is_ascii:
+            return byte_pos
+
         return self._cache_scheme.char_index_of_byte(byte_pos)
 
     def char_index_of_byte_from_known(self, byte_pos, start_char, start_byte):
@@ -915,24 +923,21 @@
         if pos == 0:
             return 0
 
-        # Calculate the distance from the start, the last known position, and
-        # the end
-        # (cost, known char, known byte)
-        start_dist = (pos, 0, 0)
-        end_dist = (2 * (len(self.str) - pos), len(self.str),
-                    len(self.str.bytes))
+        if pos < 2 * (len(self.str) - pos):
+            cost = pos
+            known_char = 0
+            known_byte = 0
+        else:
+            cost = 2 * (len(self.str) - pos)
+            known_char = len(self.str)
+            known_byte = len(self.str.bytes)
 
-        if pos <= self.prev_pos:
-            min = (2 * (self.prev_pos - pos), self.prev_pos, self.prev_byte_pos)
-        else:
-            min = (pos - self.prev_pos, self.prev_pos, self.prev_byte_pos)
+        if 2 * abs(pos - self.prev_pos) < cost:
+            known_char = self.prev_pos
+            known_byte = self.prev_byte_pos
 
-        if start_dist[0] < min[0]:
-            min = start_dist
-        if end_dist[0] < min[0]:
-            min = end_dist
-
-        b =  self.str.byte_index_of_char_from_known(pos, min[1], min[2])
+        b =  self.str.byte_index_of_char_from_known(pos, known_char,
+                                                    known_byte)
         self.prev_pos = pos
         self.prev_byte_pos = b
         return b
@@ -942,19 +947,21 @@
             return 0
 
         # (cost, known char, known byte)
-        start_dist = (byte_pos, 0, 0)
-        end_dist = (2 * (len(self.str.bytes) - byte_pos), len(self.str),
-                    len(self.str.bytes))
+        if byte_pos < 2 * (len(self.str.bytes) - byte_pos):
+            cost = byte_pos
+            known_char = 0
+            known_byte = 0
+        else:
+            cost = 2 * (len(self.str.bytes) - byte_pos)
+            known_char = len(self.str)
+            known_byte = len(self.str.bytes)
 
-        min = (2 * abs(byte_pos - self.prev_byte_pos), self.prev_pos,
-               self.prev_byte_pos)
+        if 2 * abs(byte_pos - self.prev_byte_pos) < cost:
+            known_char = self.prev_pos
+            known_byte = self.prev_byte_pos
 
-        if start_dist[0] < min[0]:
-            min = start_dist
-        if end_dist[0] < min[0]:
-            min = end_dist
-
-        i = self.str.char_index_of_byte_from_known(byte_pos, min[1], min[2])
+        i = self.str.char_index_of_byte_from_known(byte_pos, known_char,
+                                                   known_byte)
         self.prev_pos = i
         self.prev_byte_pos = byte_pos
         return i


More information about the pypy-commit mailing list