[pypy-commit] pypy rpython-hash: Write the exact same logic as CPython. Write a test, which doesn't pass
arigo
pypy.commits at gmail.com
Sun Jan 29 12:51:07 EST 2017
Author: Armin Rigo <arigo at tunes.org>
Branch: rpython-hash
Changeset: r89829:6df13d671d16
Date: 2017-01-29 18:50 +0100
http://bitbucket.org/pypy/pypy/changeset/6df13d671d16/
Log: Write the exact same logic as CPython. Write a test, which doesn't
pass so far.
diff --git a/rpython/rlib/rsiphash.py b/rpython/rlib/rsiphash.py
--- a/rpython/rlib/rsiphash.py
+++ b/rpython/rlib/rsiphash.py
@@ -5,17 +5,19 @@
of siphash-2-4 on all RPython strings and unicodes in your program
after translation.
"""
-import sys, os
+import sys, os, errno
from contextlib import contextmanager
from rpython.rlib import rarithmetic, rurandom
from rpython.rlib.objectmodel import not_rpython, always_inline
from rpython.rlib.objectmodel import we_are_translated, dont_inline
-from rpython.rlib import rgc, jit
+from rpython.rlib.objectmodel import keepalive_until_here
+from rpython.rlib import rgc, jit, rposix
from rpython.rlib.rarithmetic import r_uint64, r_uint32, r_uint
from rpython.rlib.rawstorage import misaligned_is_fine
from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, rstr
from rpython.rtyper.lltypesystem.lloperation import llop
from rpython.rtyper.extregistry import ExtRegistryEntry
+from rpython.rtyper.annlowlevel import llhelper
if sys.byteorder == 'little':
@@ -47,32 +49,8 @@
random_ctx = rurandom.init_urandom()
-
-def lcg_urandom(value):
- # Quite unsure what the point of this function is, given that a hash
- # seed of the form '%s\x00\x00\x00..' should be just as hard to
- # guess as this one. We copy it anyway from CPython for the case
- # where 'value' is a 32-bit unsigned number, but if it is not, we
- # fall back to the '%s\x00\x00\x00..' form.
- if value == '0':
- value = ''
- try:
- x = r_uint(r_uint32(value))
- except (ValueError, OverflowError):
- x = r_uint(0)
- if str(x) == value:
- s = ''
- for index in range(16):
- x *= 214013
- x += 2531011
- x = r_uint(r_uint32(x))
- s += chr((x >> 16) & 0xff)
- else:
- if len(value) < 16:
- s = value + '\x00' * (16 - len(value))
- else:
- s = value[:16]
- return s
+strtoul = rffi.llexternal("strtoul", [rffi.CCHARP, rffi.CCHARPP, rffi.INT],
+ rffi.ULONG, save_err=rffi.RFFI_SAVE_ERRNO)
env_var_name = "PYTHONHASHSEED"
@@ -83,12 +61,46 @@
# global variable 'env_var_name', or just pass a different init
# function to enable_siphash24().
value = os.environ.get(env_var_name)
- if len(value) > 0 and value != "random":
- s = lcg_urandom(value)
+ if value and value != "random":
+ with rffi.scoped_view_charp(value) as ptr:
+ with lltype.scoped_alloc(rffi.CCHARPP.TO, 1) as endptr:
+ endptr[0] = ptr
+ seed = strtoul(ptr, endptr, 10)
+ full = endptr[0][0] == '\x00'
+ seed = lltype.cast_primitive(lltype.Unsigned, seed)
+ if not full or seed > r_uint(4294967295) or (
+ rposix.get_saved_errno() == errno.ERANGE and
+ seed == lltype.cast_primitive(lltype.Unsigned,
+ rffi.cast(rffi.ULONG, -1))):
+ os.write(2,
+ "PYTHONHASHSEED must be \"random\" or an integer "
+ "in range [0; 4294967295]")
+ os._exit(1)
+ if not seed:
+ # disable the randomized hash
+ s = '\x00' * 16
+ else:
+ s = lcg_urandom(seed)
else:
- s = rurandom.urandom(random_ctx, 16)
+ try:
+ s = rurandom.urandom(random_ctx, 16)
+ except Exception as e:
+ os.write(2,
+ "%s: failed to get random numbers to initialize Python\n" %
+ (e.__class__.__name__,))
+ os._exit(1)
+ raise # makes the annotator happy
select_random_seed(s)
+def lcg_urandom(x):
+ s = ''
+ for index in range(16):
+ x *= 214013
+ x += 2531011
+ s += chr((x >> 16) & 0xff)
+ return s
+
+
_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
def enable_siphash24(*init):
@@ -104,7 +116,7 @@
(init_func,) = init
else:
init_func = initialize_from_env
- llop.call_at_startup(lltype.Void, llexternal(_FUNC, init_func))
+ llop.call_at_startup(lltype.Void, llhelper(_FUNC, init_func))
def _internal_enable_siphash24():
pass
diff --git a/rpython/rlib/test/test_rsiphash.py b/rpython/rlib/test/test_rsiphash.py
--- a/rpython/rlib/test/test_rsiphash.py
+++ b/rpython/rlib/test/test_rsiphash.py
@@ -1,7 +1,10 @@
import os
from rpython.rlib.rsiphash import siphash24, _siphash24, choosen_seed
-from rpython.rlib.rsiphash import initialize_from_env, seed
+from rpython.rlib.rsiphash import initialize_from_env, enable_siphash24
+from rpython.rlib.objectmodel import compute_hash
+from rpython.rlib.rarithmetic import intmask
from rpython.rtyper.lltypesystem import llmemory, rffi
+from rpython.translator.c.test.test_genc import compile
CASES = [
@@ -51,9 +54,9 @@
assert siphash24("foo") == 15988776847138518036
# value checked with CPython 3.5
- os.environ['PYTHONHASHSEED'] = '123'
+ os.environ['PYTHONHASHSEED'] = '4000000000'
initialize_from_env()
- assert siphash24("foo") == 12577370453467666022
+ assert siphash24("foo") == 13829150778707464258
# value checked with CPython 3.5
for env in ['', 'random']:
@@ -68,3 +71,44 @@
del os.environ['PYTHONHASHSEED']
else:
os.environ['PYTHONHASHSEED'] = old_val
+
+def test_translated():
+ d1 = {"foo": 123}
+ d2 = {u"foo": 456, u"\u1234": 789}
+
+ def entrypoint():
+ enable_siphash24()
+ return '%d %d %d %d %d %d' % (
+ d1.get("foo", -1), compute_hash("bar"),
+ d2.get(u"foo", -1), compute_hash(u"foo"),
+ d2.get(u"\u1234", -1), compute_hash(u"\u1234"))
+
+ fn = compile(entrypoint, [])
+
+ old_val = os.environ.get('PYTHONHASHSEED', None)
+ try:
+ os.environ['PYTHONHASHSEED'] = '0'
+ s1 = fn()
+ assert map(int, s1.split()) == [
+ 123, intmask(15988776847138518036),
+ 456, intmask(15988776847138518036),
+ 789, intmask(16003099094427356855)]
+
+ os.environ['PYTHONHASHSEED'] = '3987654321'
+ s1 = fn()
+ assert map(int, s1.split()) == [
+ 123, intmask(5890804383681474441),
+ 456, intmask(5890804383681474441),
+ 789, intmask(10331001347733193222)]
+
+ for env in ['', 'random']:
+ os.environ['PYTHONHASHSEED'] = env
+ s1 = fn()
+ s2 = fn()
+ assert s1 != s2
+
+ finally:
+ if old_val is None:
+ del os.environ['PYTHONHASHSEED']
+ else:
+ os.environ['PYTHONHASHSEED'] = old_val
More information about the pypy-commit
mailing list