[pypy-commit] pypy utf8-unicode2: Fix translation
waedt
noreply at buildbot.pypy.org
Sun Sep 7 00:33:11 CEST 2014
Author: Tyler Wade <wayedt at gmail.com>
Branch: utf8-unicode2
Changeset: r73355:327917b8c2e5
Date: 2014-09-06 17:32 -0500
http://bitbucket.org/pypy/pypy/changeset/327917b8c2e5/
Log: Fix translation
diff --git a/pypy/interpreter/utf8.py b/pypy/interpreter/utf8.py
--- a/pypy/interpreter/utf8.py
+++ b/pypy/interpreter/utf8.py
@@ -124,7 +124,7 @@
return s1 in s2
class Utf8Str(object):
- _immutable_fields_ = ['bytes', '_is_ascii', '_len']
+ _immutable_fields_ = ['bytes', '_is_ascii', '_len', '_cache_scheme']
def __init__(self, data, is_ascii=False, length=-1):
# TODO: Maybe I can determine is_ascii rather than have it passed in?
@@ -162,7 +162,12 @@
self._len = length
def byte_index_of_char(self, char):
- return self._cache_scheme.byte_index_of_char(char)
+ if self._is_ascii:
+ return char
+
+ res = self._cache_scheme.byte_index_of_char(char)
+ assert res >= 0
+ return res
def byte_index_of_char_from_known(self, char, start_char, start_byte):
if start_char > char:
@@ -187,6 +192,9 @@
def char_index_of_byte(self, byte_pos):
+ if self._is_ascii:
+ return byte_pos
+
return self._cache_scheme.char_index_of_byte(byte_pos)
def char_index_of_byte_from_known(self, byte_pos, start_char, start_byte):
@@ -915,24 +923,21 @@
if pos == 0:
return 0
- # Calculate the distance from the start, the last known position, and
- # the end
- # (cost, known char, known byte)
- start_dist = (pos, 0, 0)
- end_dist = (2 * (len(self.str) - pos), len(self.str),
- len(self.str.bytes))
+ if pos < 2 * (len(self.str) - pos):
+ cost = pos
+ known_char = 0
+ known_byte = 0
+ else:
+ cost = 2 * (len(self.str) - pos)
+ known_char = len(self.str)
+ known_byte = len(self.str.bytes)
- if pos <= self.prev_pos:
- min = (2 * (self.prev_pos - pos), self.prev_pos, self.prev_byte_pos)
- else:
- min = (pos - self.prev_pos, self.prev_pos, self.prev_byte_pos)
+ if 2 * abs(pos - self.prev_pos) < cost:
+ known_char = self.prev_pos
+ known_byte = self.prev_byte_pos
- if start_dist[0] < min[0]:
- min = start_dist
- if end_dist[0] < min[0]:
- min = end_dist
-
- b = self.str.byte_index_of_char_from_known(pos, min[1], min[2])
+ b = self.str.byte_index_of_char_from_known(pos, known_char,
+ known_byte)
self.prev_pos = pos
self.prev_byte_pos = b
return b
@@ -942,19 +947,21 @@
return 0
# (cost, known char, known byte)
- start_dist = (byte_pos, 0, 0)
- end_dist = (2 * (len(self.str.bytes) - byte_pos), len(self.str),
- len(self.str.bytes))
+ if byte_pos < 2 * (len(self.str.bytes) - byte_pos):
+ cost = byte_pos
+ known_char = 0
+ known_byte = 0
+ else:
+ cost = 2 * (len(self.str.bytes) - byte_pos)
+ known_char = len(self.str)
+ known_byte = len(self.str.bytes)
- min = (2 * abs(byte_pos - self.prev_byte_pos), self.prev_pos,
- self.prev_byte_pos)
+ if 2 * abs(byte_pos - self.prev_byte_pos) < cost:
+ known_char = self.prev_pos
+ known_byte = self.prev_byte_pos
- if start_dist[0] < min[0]:
- min = start_dist
- if end_dist[0] < min[0]:
- min = end_dist
-
- i = self.str.char_index_of_byte_from_known(byte_pos, min[1], min[2])
+ i = self.str.char_index_of_byte_from_known(byte_pos, known_char,
+ known_byte)
self.prev_pos = i
self.prev_byte_pos = byte_pos
return i
More information about the pypy-commit
mailing list