[pypy-commit] pypy py3.5: hg merge default

Thu Jan 26 03:35:43 EST 2017

Author: Armin Rigo <arigo at tunes.org>
Branch: py3.5
Changeset: r89777:ad1beddd3f43
Date: 2017-01-26 09:34 +0100
http://bitbucket.org/pypy/pypy/changeset/ad1beddd3f43/

Log:	hg merge default

diff --git a/pypy/goal/targetpypystandalone.py b/pypy/goal/targetpypystandalone.py
--- a/pypy/goal/targetpypystandalone.py
+++ b/pypy/goal/targetpypystandalone.py
@@ -311,8 +311,8 @@
         if config.objspace.usemodules.cpyext:
             if config.translation.gc not in ('incminimark', 'boehm'):
                 raise Exception("The 'cpyext' module requires the 'incminimark'"
-                                " 'boehm' GC.  You need either 'targetpypystandalone.py"
-                                " --withoutmod-cpyext' or '--gc=incminimark'")
+                    " or 'boehm' GC.  You need either 'targetpypystandalone.py"
+                    " --withoutmod-cpyext', or use one of these two GCs.")
 
         config.translating = True
 
diff --git a/pypy/module/_cffi_backend/cdataobj.py b/pypy/module/_cffi_backend/cdataobj.py
--- a/pypy/module/_cffi_backend/cdataobj.py
+++ b/pypy/module/_cffi_backend/cdataobj.py
@@ -323,17 +323,28 @@
         #
         return self._add_or_sub(w_other, -1)
 
-    def getcfield(self, w_attr):
-        return self.ctype.getcfield(self.space.str_w(w_attr))
+    def getcfield(self, w_attr, mode):
+        space = self.space
+        attr = space.str_w(w_attr)
+        try:
+            cfield = self.ctype.getcfield(attr)
+        except KeyError:
+            raise oefmt(space.w_AttributeError, "cdata '%s' has no field '%s'",
+                        self.ctype.name, attr)
+        if cfield is None:
+            raise oefmt(space.w_AttributeError,
+                        "cdata '%s' points to an opaque type: cannot %s fields",
+                        self.ctype.name, mode)
+        return cfield
 
     def getattr(self, w_attr):
-        cfield = self.getcfield(w_attr)
+        cfield = self.getcfield(w_attr, mode="read")
         with self as ptr:
             w_res = cfield.read(ptr, self)
         return w_res
 
     def setattr(self, w_attr, w_value):
-        cfield = self.getcfield(w_attr)
+        cfield = self.getcfield(w_attr, mode="write")
         with self as ptr:
             cfield.write(ptr, w_value)
 
diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py
--- a/pypy/module/_cffi_backend/ctypeptr.py
+++ b/pypy/module/_cffi_backend/ctypeptr.py
@@ -348,7 +348,10 @@
         return result
 
     def getcfield(self, attr):
-        return self.ctitem.getcfield(attr)
+        from pypy.module._cffi_backend.ctypestruct import W_CTypeStructOrUnion
+        if isinstance(self.ctitem, W_CTypeStructOrUnion):
+            return self.ctitem.getcfield(attr)
+        return W_CType.getcfield(self, attr)
 
     def typeoffsetof_field(self, fieldname, following):
         if following == 0:
diff --git a/pypy/module/_cffi_backend/ctypestruct.py b/pypy/module/_cffi_backend/ctypestruct.py
--- a/pypy/module/_cffi_backend/ctypestruct.py
+++ b/pypy/module/_cffi_backend/ctypestruct.py
@@ -161,18 +161,18 @@
         return self._fields_dict[attr]
 
     def getcfield(self, attr):
-        ready = self._fields_dict is not None
-        if not ready and self.size >= 0:
+        # Returns a W_CField.  Error cases: returns None if we are an
+        # opaque struct; or raises KeyError if the particular field
+        # 'attr' does not exist.  The point of not directly building the
+        # error here is to get the exact ctype in the error message: it
+        # might be of the kind 'struct foo' or 'struct foo *'.
+        if self._fields_dict is None:
+            if self.size < 0:
+                return None
             self.force_lazy_struct()
-            ready = True
-        if ready:
-            self = jit.promote(self)
-            attr = jit.promote_string(attr)
-            try:
-                return self._getcfield_const(attr)
-            except KeyError:
-                pass
-        return W_CType.getcfield(self, attr)
+        self = jit.promote(self)
+        attr = jit.promote_string(attr)
+        return self._getcfield_const(attr)    # <= KeyError here
 
     def cdata_dir(self):
         if self.size < 0:
diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py
--- a/pypy/module/_cffi_backend/test/_backend_test_c.py
+++ b/pypy/module/_cffi_backend/test/_backend_test_c.py
@@ -737,8 +737,14 @@
     BInt = new_primitive_type("int")
     BStruct = new_struct_type("struct foo")
     BStructPtr = new_pointer_type(BStruct)
-    p = cast(BStructPtr, 0)
-    py.test.raises(AttributeError, "p.a1")    # opaque
+    p = cast(BStructPtr, 42)
+    e = py.test.raises(AttributeError, "p.a1")    # opaque
+    assert str(e.value) == ("cdata 'struct foo *' points to an opaque type: "
+                            "cannot read fields")
+    e = py.test.raises(AttributeError, "p.a1 = 10")    # opaque
+    assert str(e.value) == ("cdata 'struct foo *' points to an opaque type: "
+                            "cannot write fields")
+
     complete_struct_or_union(BStruct, [('a1', BInt, -1),
                                        ('a2', BInt, -1)])
     p = newp(BStructPtr, None)
@@ -749,8 +755,29 @@
     assert s.a2 == 123
     py.test.raises(OverflowError, "s.a1 = sys.maxsize+1")
     assert s.a1 == 0
-    py.test.raises(AttributeError, "p.foobar")
-    py.test.raises(AttributeError, "s.foobar")
+    e = py.test.raises(AttributeError, "p.foobar")
+    assert str(e.value) == "cdata 'struct foo *' has no field 'foobar'"
+    e = py.test.raises(AttributeError, "p.foobar = 42")
+    assert str(e.value) == "cdata 'struct foo *' has no field 'foobar'"
+    e = py.test.raises(AttributeError, "s.foobar")
+    assert str(e.value) == "cdata 'struct foo' has no field 'foobar'"
+    e = py.test.raises(AttributeError, "s.foobar = 42")
+    assert str(e.value) == "cdata 'struct foo' has no field 'foobar'"
+    j = cast(BInt, 42)
+    e = py.test.raises(AttributeError, "j.foobar")
+    assert str(e.value) == "cdata 'int' has no attribute 'foobar'"
+    e = py.test.raises(AttributeError, "j.foobar = 42")
+    assert str(e.value) == "cdata 'int' has no attribute 'foobar'"
+    j = cast(new_pointer_type(BInt), 42)
+    e = py.test.raises(AttributeError, "j.foobar")
+    assert str(e.value) == "cdata 'int *' has no attribute 'foobar'"
+    e = py.test.raises(AttributeError, "j.foobar = 42")
+    assert str(e.value) == "cdata 'int *' has no attribute 'foobar'"
+    pp = newp(new_pointer_type(BStructPtr), p)
+    e = py.test.raises(AttributeError, "pp.a1")
+    assert str(e.value) == "cdata 'struct foo * *' has no attribute 'a1'"
+    e = py.test.raises(AttributeError, "pp.a1 = 42")
+    assert str(e.value) == "cdata 'struct foo * *' has no attribute 'a1'"
 
 def test_union_instance():
     BInt = new_primitive_type("int")
diff --git a/pypy/objspace/std/mapdict.py b/pypy/objspace/std/mapdict.py
--- a/pypy/objspace/std/mapdict.py
+++ b/pypy/objspace/std/mapdict.py
@@ -437,6 +437,9 @@
         for i in range(len(self.cached_attrs)):
             self.cached_attrs[i] = None
 
+    def _cleanup_(self):
+        self.clear()
+
 # ____________________________________________________________
 # object implementation
 
diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py
--- a/pypy/objspace/std/typeobject.py
+++ b/pypy/objspace/std/typeobject.py
@@ -72,6 +72,10 @@
 class MethodCache(object):
 
     def __init__(self, space):
+        # Note: these attributes never change which object they contain,
+        # so reading 'cache.versions' for example is constant-folded.
+        # The actual list in 'cache.versions' is not a constant, of
+        # course.
         SIZE = 1 << space.config.objspace.std.methodcachesizeexp
         self.versions = [None] * SIZE
         self.names = [None] * SIZE
@@ -89,6 +93,9 @@
         for i in range(len(self.lookup_where)):
             self.lookup_where[i] = None_None
 
+    def _cleanup_(self):
+        self.clear()
+
 class _Global(object):
     weakref_warning_printed = False
 _global = _Global()
diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py
--- a/rpython/config/translationoption.py
+++ b/rpython/config/translationoption.py
@@ -201,6 +201,10 @@
     StrOption("icon", "Path to the (Windows) icon to use for the executable"),
     StrOption("libname",
               "Windows: name and possibly location of the lib file to create"),
+    ChoiceOption("hash",
+                 "The hash to use for strings",
+                 ["rpython", "siphash24"],
+                 default="rpython", cmdline="--hash"),
 
     OptionDescription("backendopt", "Backend Optimization Options", [
         # control inlining
@@ -390,6 +394,12 @@
         if sys.platform == "darwin" or sys.platform =="win32":
             raise ConfigError("'asmgcc' not supported on this platform")
 
+def apply_extra_settings(config):
+    # make the setting of config.hash definitive
+    from rpython.rlib.objectmodel import set_hash_algorithm
+    config.translation.hash = config.translation.hash
+    set_hash_algorithm(config.translation.hash)
+
 # ----------------------------------------------------------------
 
 def set_platform(config):
diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py
--- a/rpython/rlib/objectmodel.py
+++ b/rpython/rlib/objectmodel.py
@@ -520,10 +520,22 @@
 # ----------
 
 HASH_ALGORITHM = "rpython"  # XXX Is there a better name?
+HASH_ALGORITHM_FIXED = False
 
-def _hash_string(s):
-    """The algorithm behind compute_hash() for a string or a unicode."""
+ at not_rpython
+def set_hash_algorithm(algo):
+    """Must be called very early, before any string is hashed with
+    compute_hash()!"""
+    global HASH_ALGORITHM
+    if HASH_ALGORITHM != algo:
+        assert not HASH_ALGORITHM_FIXED, "compute_hash() already called!"
+        assert algo in ("rpython", "siphash24")
+        HASH_ALGORITHM = algo
+
+
+def _hash_string_rpython(s):
     from rpython.rlib.rarithmetic import intmask
+
     length = len(s)
     if length == 0:
         return -1
@@ -535,6 +547,101 @@
     x ^= length
     return intmask(x)
 
+
+ at not_rpython
+def _hash_string_siphash24(s):
+    """This version is called when untranslated only."""
+    import array
+    from rpython.rlib.rsiphash import siphash24
+    from rpython.rtyper.lltypesystem import lltype, rffi
+    from rpython.rlib.rarithmetic import intmask
+
+    if not isinstance(s, str):
+        if isinstance(s, unicode):
+            lst = map(ord, s)
+        else:
+            lst = map(ord, s.chars)    # for rstr.STR or UNICODE
+        # NOTE: a latin-1 unicode string must have the same hash as the
+        # corresponding byte string.
+        if all(n <= 0xFF for n in lst):
+            kind = "B"
+        elif rffi.sizeof(lltype.UniChar) == 4:
+            kind = "I"
+        else:
+            kind = "H"
+        s = array.array(kind, lst).tostring()
+    ptr = rffi.str2charp(s)
+    x = siphash24(ptr, len(s))
+    rffi.free_charp(ptr)
+    return intmask(x)
+
+def ll_hash_string_siphash24(ll_s):
+    """Called from lltypesystem/rstr.py.  'll_s' is a rstr.STR or UNICODE."""
+    from rpython.rlib.rsiphash import siphash24
+    from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, rstr
+    from rpython.rlib.rarithmetic import intmask
+
+    length = len(ll_s.chars)
+    if lltype.typeOf(ll_s).TO.chars.OF == lltype.Char:
+        # no GC operation from here!
+        addr = rstr._get_raw_buf_string(rstr.STR, ll_s, 0)
+    else:
+        # NOTE: a latin-1 unicode string must have the same hash as the
+        # corresponding byte string.  If the unicode is all within
+        # 0-255, then we need to allocate a byte buffer and copy the
+        # latin-1 encoding in it manually.
+        for i in range(length):
+            if ord(ll_s.chars[i]) > 0xFF:
+                # no GC operation from here!
+                addr = rstr._get_raw_buf_unicode(rstr.UNICODE, ll_s, 0)
+                length *= rffi.sizeof(rstr.UNICODE.chars.OF)
+                break
+        else:
+            p = lltype.malloc(rffi.CCHARP.TO, length, flavor='raw')
+            i = 0
+            while i < length:
+                p[i] = chr(ord(ll_s.chars[i]))
+                i += 1
+            x = siphash24(llmemory.cast_ptr_to_adr(p), length)
+            lltype.free(p, flavor='raw')
+            return intmask(x)
+    x = siphash24(addr, length)
+    keepalive_until_here(ll_s)
+    return intmask(x)
+ll_hash_string_siphash24._jit_look_inside_ = False
+
+
+ at not_rpython
+def _hash_string(s):
+    """The algorithm behind compute_hash() for a string or a unicode.
+    This version is only for untranslated usage, and 's' is a str or unicode.
+    """
+    global HASH_ALGORITHM_FIXED
+    HASH_ALGORITHM_FIXED = True
+    if HASH_ALGORITHM == "rpython":
+        return _hash_string_rpython(s)
+    if HASH_ALGORITHM == "siphash24":
+        return _hash_string_siphash24(s)
+    raise NotImplementedError
+
+def ll_hash_string(ll_s):
+    """The algorithm behind compute_hash() for a string or a unicode.
+    This version is called from lltypesystem/rstr.py, and 'll_s' is a
+    rstr.STR or rstr.UNICODE.
+    """
+    if not we_are_translated():
+        global HASH_ALGORITHM_FIXED
+        HASH_ALGORITHM_FIXED = True
+    if HASH_ALGORITHM == "rpython":
+        return _hash_string_rpython(ll_s.chars)
+    if HASH_ALGORITHM == "siphash24":
+        if we_are_translated():
+            return ll_hash_string_siphash24(ll_s)
+        else:
+            return _hash_string_siphash24(ll_s)
+    raise NotImplementedError
+
+
 def _hash_float(f):
     """The algorithm behind compute_hash() for a float.
     This implementation is identical to the CPython implementation,
diff --git a/rpython/rlib/rsiphash.py b/rpython/rlib/rsiphash.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rsiphash.py
@@ -0,0 +1,157 @@
+import sys, os, struct
+from contextlib import contextmanager
+from rpython.rlib import rarithmetic
+from rpython.rlib.objectmodel import not_rpython, always_inline
+from rpython.rlib.rgc import no_collect
+from rpython.rlib.rarithmetic import r_uint64
+from rpython.rlib.rawstorage import misaligned_is_fine
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
+from rpython.rtyper.lltypesystem.lloperation import llop
+
+
+if sys.byteorder == 'little':
+    def _le64toh(x):
+        return x
+else:
+    _le64toh = rarithmetic.byteswap
+
+
+# Initialize the values of the secret seed: two 64-bit constants.
+# CPython picks a new seed every time 'python' starts.  PyPy cannot do
+# that as easily because many details may rely on getting the same hash
+# value before and after translation.  We can, however, pick a random
+# seed once per translation, which should already be quite good.
+
+ at not_rpython
+def select_random_seed():
+    global k0, k1    # note: the globals k0, k1 are already byte-swapped
+    v0, v1 = struct.unpack("QQ", os.urandom(16))
+    k0 = r_uint64(v0)
+    k1 = r_uint64(v1)
+
+select_random_seed()
+
+ at contextmanager
+def choosen_seed(new_k0, new_k1, test_misaligned_path=False):
+    global k0, k1, misaligned_is_fine
+    old = k0, k1, misaligned_is_fine
+    k0 = _le64toh(r_uint64(new_k0))
+    k1 = _le64toh(r_uint64(new_k1))
+    if test_misaligned_path:
+        misaligned_is_fine = False
+    yield
+    k0, k1, misaligned_is_fine = old
+
+def get_current_seed():
+    return _le64toh(k0), _le64toh(k1)
+
+
+magic0 = r_uint64(0x736f6d6570736575)
+magic1 = r_uint64(0x646f72616e646f6d)
+magic2 = r_uint64(0x6c7967656e657261)
+magic3 = r_uint64(0x7465646279746573)
+
+
+ at always_inline
+def _rotate(x, b):
+    return (x << b) | (x >> (64 - b))
+
+ at always_inline
+def _half_round(a, b, c, d, s, t):
+    a += b
+    c += d
+    b = _rotate(b, s) ^ a
+    d = _rotate(d, t) ^ c
+    a = _rotate(a, 32)
+    return a, b, c, d
+
+ at always_inline
+def _double_round(v0, v1, v2, v3):
+    v0,v1,v2,v3 = _half_round(v0,v1,v2,v3,13,16)
+    v2,v1,v0,v3 = _half_round(v2,v1,v0,v3,17,21)
+    v0,v1,v2,v3 = _half_round(v0,v1,v2,v3,13,16)
+    v2,v1,v0,v3 = _half_round(v2,v1,v0,v3,17,21)
+    return v0, v1, v2, v3
+
+
+ at no_collect
+def siphash24(addr_in, size):
+    """Takes an address pointer and a size.  Returns the hash as a r_uint64,
+    which can then be casted to the expected type."""
+
+    direct = (misaligned_is_fine or
+                 (rffi.cast(lltype.Signed, addr_in) & 7) == 0)
+
+    b = r_uint64(size) << 56
+    v0 = k0 ^ magic0
+    v1 = k1 ^ magic1
+    v2 = k0 ^ magic2
+    v3 = k1 ^ magic3
+
+    index = 0
+    if direct:
+        while size >= 8:
+            mi = llop.raw_load(rffi.ULONGLONG, addr_in, index)
+            mi = _le64toh(mi)
+            size -= 8
+            index += 8
+            v3 ^= mi
+            v0, v1, v2, v3 = _double_round(v0, v1, v2, v3)
+            v0 ^= mi
+    else:
+        while size >= 8:
+            mi = (
+                r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index)) |
+                r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 1)) << 8 |
+                r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 2)) << 16 |
+                r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 3)) << 24 |
+                r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 4)) << 32 |
+                r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 5)) << 40 |
+                r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 6)) << 48 |
+                r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 7)) << 56
+            )
+            mi = _le64toh(mi)
+            size -= 8
+            index += 8
+            v3 ^= mi
+            v0, v1, v2, v3 = _double_round(v0, v1, v2, v3)
+            v0 ^= mi
+
+    t = r_uint64(0)
+    if size == 7:
+        t = r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 6)) << 48
+        size = 6
+    if size == 6:
+        t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 5)) << 40
+        size = 5
+    if size == 5:
+        t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 4)) << 32
+        size = 4
+    if size == 4:
+        if direct:
+            t |= r_uint64(llop.raw_load(rffi.UINT, addr_in, index))
+            size = 0
+        else:
+            t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 3)) << 24
+            size = 3
+    if size == 3:
+        t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 2)) << 16
+        size = 2
+    if size == 2:
+        t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 1)) << 8
+        size = 1
+    if size == 1:
+        t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index))
+        size = 0
+    assert size == 0
+
+    b |= _le64toh(t)
+
+    v3 ^= b
+    v0, v1, v2, v3 = _double_round(v0, v1, v2, v3)
+    v0 ^= b
+    v2 ^= 0xff
+    v0, v1, v2, v3 = _double_round(v0, v1, v2, v3)
+    v0, v1, v2, v3 = _double_round(v0, v1, v2, v3)
+
+    return (v0 ^ v1) ^ (v2 ^ v3)
diff --git a/rpython/rlib/test/test_rsiphash.py b/rpython/rlib/test/test_rsiphash.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/test/test_rsiphash.py
@@ -0,0 +1,44 @@
+from rpython.rlib.rsiphash import siphash24, choosen_seed
+from rpython.rtyper.lltypesystem import llmemory, rffi
+
+
+CASES = [
+    (2323638336262702335 , ""),
+    (5150479602681463644 , "h"),
+    (1013213613370725794 , "he"),
+    (7028032310911240238 , "hel"),
+    (9535960132410784494 , "hell"),
+    (3256502711089771242 , "hello"),
+    (2389188832234450176 , "hello "),
+    (13253855839845990393, "hello w"),
+    (7850036019043917323 , "hello wo"),
+    (14283308628425005953, "hello wor"),
+    (9605549962279590084 , "hello worl"),
+    (16371281469632894235, "hello world"),
+    (7298637955795769949 , "hello world\x9a"),
+    (13530878135053370821, "hello world\xf3\x80"),
+    (1643533543579802994 , "\xffhel\x82lo world\xbc"),
+    (14632093238728197380, "hexlylxox rewqw"),
+    (3434253029196696424 , "hexlylxox rewqws"),
+    (9855754545877066788 , "hexlylxox rewqwsv"),
+    (5233065012564472454 , "hexlylxox rewqwkashdw89"),
+    (16768585622569081808, "hexlylxox rewqwkeashdw89"),
+    (17430482483431293463, "HEEExlylxox rewqwkashdw89"),
+    (695783005783737705  , "hello woadwealidewd 3829ez 32ig dxwaebderld"),
+]
+
+def check(s):
+    p = rffi.str2charp(s)
+    q = rffi.str2charp('?' + s)
+    with choosen_seed(0x8a9f065a358479f4, 0x11cb1e9ee7f40e1f,
+                      test_misaligned_path=True):
+        x = siphash24(llmemory.cast_ptr_to_adr(p), len(s))
+        y = siphash24(llmemory.cast_ptr_to_adr(rffi.ptradd(q, 1)), len(s))
+    rffi.free_charp(p)
+    rffi.free_charp(q)
+    assert x == y
+    return x
+
+def test_siphash24():
+    for expected, string in CASES:
+        assert check(string) == expected
diff --git a/rpython/rtyper/lltypesystem/rbytearray.py b/rpython/rtyper/lltypesystem/rbytearray.py
--- a/rpython/rtyper/lltypesystem/rbytearray.py
+++ b/rpython/rtyper/lltypesystem/rbytearray.py
@@ -8,10 +8,10 @@
 def mallocbytearray(size):
     return lltype.malloc(BYTEARRAY, size)
 
-_, _, copy_bytearray_contents = rstr._new_copy_contents_fun(BYTEARRAY, BYTEARRAY,
+_, _, copy_bytearray_contents, _ = rstr._new_copy_contents_fun(BYTEARRAY, BYTEARRAY,
                                                          lltype.Char,
                                                          'bytearray')
-_, _, copy_bytearray_contents_from_str = rstr._new_copy_contents_fun(rstr.STR,
+_, _, copy_bytearray_contents_from_str, _ = rstr._new_copy_contents_fun(rstr.STR,
                                                                   BYTEARRAY,
                                                                   lltype.Char,
                                                                   'bytearray_from_str')
diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py
--- a/rpython/rtyper/lltypesystem/rffi.py
+++ b/rpython/rtyper/lltypesystem/rffi.py
@@ -1073,8 +1073,9 @@
         if size is None:
             size = llmemory.sizeof(tp)    # a symbolic result in this case
         return size
-    if isinstance(tp, lltype.Ptr) or tp is llmemory.Address:
-        return globals()['r_void*'].BITS/8
+    if (tp is lltype.Signed or isinstance(tp, lltype.Ptr) 
+                            or tp is llmemory.Address):
+        return LONG_BIT/8
     if tp is lltype.Char or tp is lltype.Bool:
         return 1
     if tp is lltype.UniChar:
@@ -1087,8 +1088,6 @@
         # :-/
         return sizeof_c_type("long double")
     assert isinstance(tp, lltype.Number)
-    if tp is lltype.Signed:
-        return LONG_BIT/8
     return tp._type.BITS/8
 sizeof._annspecialcase_ = 'specialize:memo'
 
diff --git a/rpython/rtyper/lltypesystem/rstr.py b/rpython/rtyper/lltypesystem/rstr.py
--- a/rpython/rtyper/lltypesystem/rstr.py
+++ b/rpython/rtyper/lltypesystem/rstr.py
@@ -3,7 +3,7 @@
 from rpython.annotator import model as annmodel
 from rpython.rlib import jit, types
 from rpython.rlib.objectmodel import (malloc_zero_filled, we_are_translated,
-    _hash_string, keepalive_until_here, specialize, enforceargs)
+    ll_hash_string, keepalive_until_here, specialize, enforceargs)
 from rpython.rlib.signature import signature
 from rpython.rlib.rarithmetic import ovfcheck
 from rpython.rtyper.error import TyperError
@@ -44,11 +44,13 @@
 mallocstr = new_malloc(STR, 'mallocstr')
 mallocunicode = new_malloc(UNICODE, 'mallocunicode')
 
+ at specialize.memo()
 def emptystrfun():
-    return emptystr
+    return string_repr.convert_const("")
 
+ at specialize.memo()
 def emptyunicodefun():
-    return emptyunicode
+    return unicode_repr.convert_const(u'')
 
 def _new_copy_contents_fun(SRC_TP, DST_TP, CHAR_TP, name):
     @specialize.arg(0)
@@ -136,15 +138,19 @@
     copy_raw_to_string = func_with_new_name(copy_raw_to_string,
                                               'copy_raw_to_%s' % name)
 
-    return copy_string_to_raw, copy_raw_to_string, copy_string_contents
+    return (copy_string_to_raw, copy_raw_to_string, copy_string_contents,
+            _get_raw_buf)
 
 (copy_string_to_raw,
  copy_raw_to_string,
- copy_string_contents) = _new_copy_contents_fun(STR, STR, Char, 'string')
+ copy_string_contents,
+ _get_raw_buf_string) = _new_copy_contents_fun(STR, STR, Char, 'string')
 
 (copy_unicode_to_raw,
  copy_raw_to_unicode,
- copy_unicode_contents) = _new_copy_contents_fun(UNICODE, UNICODE, UniChar, 'unicode')
+ copy_unicode_contents,
+ _get_raw_buf_unicode) = _new_copy_contents_fun(UNICODE, UNICODE, UniChar,
+                                                'unicode')
 
 CONST_STR_CACHE = WeakValueDictionary()
 CONST_UNICODE_CACHE = WeakValueDictionary()
@@ -382,7 +388,7 @@
         # but our malloc initializes the memory to zero, so we use zero as the
         # special non-computed-yet value.  Also, jit.conditional_call_elidable
         # always checks for zero, for now.
-        x = _hash_string(s.chars)
+        x = ll_hash_string(s)
         if x == 0:
             x = 29872897
         s.hash = x
@@ -1276,8 +1282,6 @@
 char_repr.ll = LLHelpers
 unichar_repr.ll = LLHelpers
 unicode_repr = UnicodeRepr()
-emptystr = string_repr.convert_const("")
-emptyunicode = unicode_repr.convert_const(u'')
 
 StringRepr.repr = string_repr
 UnicodeRepr.repr = unicode_repr
@@ -1336,14 +1340,6 @@
 string_repr.iterator_repr = StringIteratorRepr()
 unicode_repr.iterator_repr = UnicodeIteratorRepr()
 
-# these should be in rclass, but circular imports prevent (also it's
-# not that insane that a string constant is built in this file).
-
-instance_str_prefix = string_repr.convert_const("<")
-instance_str_infix  = string_repr.convert_const(" object at 0x")
-instance_str_suffix = string_repr.convert_const(">")
-
-null_str = string_repr.convert_const("NULL")
-
-unboxed_instance_str_prefix = string_repr.convert_const("<unboxed ")
-unboxed_instance_str_suffix = string_repr.convert_const(">")
+ at specialize.memo()
+def conststr(s):
+    return string_repr.convert_const(s)
diff --git a/rpython/rtyper/lltypesystem/rtagged.py b/rpython/rtyper/lltypesystem/rtagged.py
--- a/rpython/rtyper/lltypesystem/rtagged.py
+++ b/rpython/rtyper/lltypesystem/rtagged.py
@@ -117,9 +117,9 @@
             from rpython.rtyper.lltypesystem import rstr
             from rpython.rtyper.rint import signed_repr
             llstr1 = signed_repr.ll_str(ll_unboxed_to_int(i))
-            return rstr.ll_strconcat(rstr.unboxed_instance_str_prefix,
+            return rstr.ll_strconcat(rstr.conststr("<unboxed "),
                       rstr.ll_strconcat(llstr1,
-                                        rstr.unboxed_instance_str_suffix))
+                                        rstr.conststr(">")))
         else:
             return InstanceRepr.ll_str(self, i)
 
diff --git a/rpython/rtyper/rclass.py b/rpython/rtyper/rclass.py
--- a/rpython/rtyper/rclass.py
+++ b/rpython/rtyper/rclass.py
@@ -840,18 +840,18 @@
         from rpython.rtyper.lltypesystem.ll_str import ll_int2hex
         from rpython.rlib.rarithmetic import r_uint
         if not i:
-            return rstr.null_str
+            return rstr.conststr("NULL")
         instance = cast_pointer(OBJECTPTR, i)
         # Two choices: the first gives a fast answer but it can change
         # (typically only once) during the life of the object.
         #uid = r_uint(cast_ptr_to_int(i))
         uid = r_uint(llop.gc_id(lltype.Signed, i))
         #
-        res = rstr.instance_str_prefix
+        res = rstr.conststr("<")
         res = rstr.ll_strconcat(res, instance.typeptr.name)
-        res = rstr.ll_strconcat(res, rstr.instance_str_infix)
+        res = rstr.ll_strconcat(res, rstr.conststr(" object at 0x"))
         res = rstr.ll_strconcat(res, ll_int2hex(uid, False))
-        res = rstr.ll_strconcat(res, rstr.instance_str_suffix)
+        res = rstr.ll_strconcat(res, rstr.conststr(">"))
         return res
 
     def get_ll_eq_function(self):
@@ -1092,7 +1092,6 @@
     except StandardError:
         return None
 
-
 # ____________________________________________________________
 #
 #  Low-level implementation of operations on classes and instances
diff --git a/rpython/translator/c/test/test_typed.py b/rpython/translator/c/test/test_typed.py
--- a/rpython/translator/c/test/test_typed.py
+++ b/rpython/translator/c/test/test_typed.py
@@ -1,8 +1,12 @@
 from __future__ import with_statement
 
 import math
-import sys
+import sys, os
 
+if __name__ == '__main__':
+    # hack for test_hash_string_siphash24()
+    sys.path.insert(0, os.path.join(os.path.dirname(__file__),
+                                    '..', '..', '..', '..'))
 import py
 
 from rpython.rlib.rstackovf import StackOverflow
@@ -597,6 +601,49 @@
         assert res[3] == compute_hash(d)
         assert res[4] == compute_hash(("Hi", None, (7.5, 2, d)))
 
+    def _test_hash_string(self, algo):
+        from rpython.rlib import objectmodel
+        objectmodel.set_hash_algorithm(algo)
+        s = "hello"
+        u = u"world"
+        v = u"\u1234\u2318+\u2bcd\u2102"
+        hash_s = compute_hash(s)
+        hash_u = compute_hash(u)
+        hash_v = compute_hash(v)
+        assert hash_s == compute_hash(u"hello")   # same hash because it's
+        assert hash_u == compute_hash("world")    #    a latin-1 unicode
+        #
+        def fn(length):
+            assert length >= 1
+            return str((compute_hash(s),
+                        compute_hash(u),
+                        compute_hash(v),
+                        compute_hash(s[0] + s[1:length]),
+                        compute_hash(u[0] + u[1:length]),
+                        compute_hash(v[0] + v[1:length]),
+                        ))
+
+        assert fn(5) == str((hash_s, hash_u, hash_v, hash_s, hash_u, hash_v))
+
+        f = self.getcompiled(fn, [int])
+        res = f(5)
+        res = [int(a) for a in res[1:-1].split(",")]
+        assert res[0] == hash_s
+        assert res[1] == hash_u
+        assert res[2] == hash_v
+        assert res[3] == hash_s
+        assert res[4] == hash_u
+        assert res[5] == hash_v
+
+    def test_hash_string_rpython(self):
+        self._test_hash_string("rpython")
+
+    def test_hash_string_siphash24(self):
+        import subprocess
+        subprocess.check_call([sys.executable, __file__, "siphash24",
+                               self.__class__.__module__,
+                               self.__class__.__name__])
+
     def test_list_basic_ops(self):
         def list_basic_ops(i, j):
             l = [1, 2, 3]
@@ -896,3 +943,11 @@
         f = self.getcompiled(func, [int])
         res = f(2)
         assert res == 1     # and not 2
+
+
+if __name__ == '__main__':
+    # for test_hash_string_siphash24()
+    algo, clsmodule, clsname = sys.argv[1:]
+    mod = __import__(clsmodule, None, None, [clsname])
+    cls = getattr(mod, clsname)
+    cls()._test_hash_string(algo)
diff --git a/rpython/translator/goal/translate.py b/rpython/translator/goal/translate.py
--- a/rpython/translator/goal/translate.py
+++ b/rpython/translator/goal/translate.py
@@ -11,7 +11,8 @@
 from rpython.config.config import (to_optparse, OptionDescription, BoolOption,
     ArbitraryOption, StrOption, IntOption, Config, ChoiceOption, OptHelpFormatter)
 from rpython.config.translationoption import (get_combined_translation_config,
-    set_opt_level, OPT_LEVELS, DEFAULT_OPT_LEVEL, set_platform, CACHE_DIR)
+    set_opt_level, OPT_LEVELS, DEFAULT_OPT_LEVEL, set_platform, CACHE_DIR,
+    apply_extra_settings)
 
 # clean up early rpython/_cache
 try:
@@ -177,6 +178,9 @@
     if 'handle_config' in targetspec_dic:
         targetspec_dic['handle_config'](config, translateconfig)
 
+    # apply extra settings
+    apply_extra_settings(config)
+
     return targetspec_dic, translateconfig, config, args
 
 def show_help(translateconfig, opt_parser, targetspec_dic, config):