From noreply at buildbot.pypy.org Sun Jun 1 00:14:49 2014 From: noreply at buildbot.pypy.org (mattip) Date: Sun, 1 Jun 2014 00:14:49 +0200 (CEST) Subject: [pypy-commit] pypy ufuncapi: frompyfunc needs object dtype Message-ID: <20140531221449.9CECE1D2DD4@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: ufuncapi Changeset: r71792:011468df676c Date: 2014-05-26 18:42 +0300 http://bitbucket.org/pypy/pypy/changeset/011468df676c/ Log: frompyfunc needs object dtype diff --git a/pypy/module/micronumpy/__init__.py b/pypy/module/micronumpy/__init__.py --- a/pypy/module/micronumpy/__init__.py +++ b/pypy/module/micronumpy/__init__.py @@ -31,7 +31,9 @@ class UMathModule(MixedModule): appleveldefs = {} - interpleveldefs = {'FLOATING_POINT_SUPPORT': 'space.wrap(1)'} + interpleveldefs = { + 'FLOATING_POINT_SUPPORT': 'space.wrap(1)', + } # ufuncs for exposed, impl in [ ("absolute", "absolute"), diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py --- a/pypy/module/micronumpy/test/test_ufuncs.py +++ b/pypy/module/micronumpy/test/test_ufuncs.py @@ -97,11 +97,26 @@ raises(TypeError, ufunc) def test_frompyfunc(self): - from numpy import ufunc, frompyfunc, arange - myufunc = frompyfunc(int.__add__, 2, 1) - assert isinstance(add, ufunc) + try: + from numpy import frompyfunc + except ImportError: + skip('frompyfunc not available') + from numpy import ufunc, frompyfunc, arange, dtype + def adder(a, b): + return a+b + myufunc = frompyfunc(adder, 2, 1) + assert isinstance(myufunc, ufunc) res = myufunc(arange(10), arange(10)) + assert res.dtype == dtype(object) assert all(res == arange(10) + arange(10)) + raises(TypeError, frompyfunc, 1, 2, 3) + int_func22 = frompyfunc(int, 2, 2) + raises (ValueError, int_func22, arange(10)) + int_func12 = frompyfunc(int, 1, 2) + res = int_func12(arange(10)) + assert len(res) == 2 + assert isinstance(res, tuple) + assert (res[0] == arange(10)).all() def test_ufunc_attrs(self): from numpy import add, multiply, sin diff --git a/pypy/module/micronumpy/ufuncs.py b/pypy/module/micronumpy/ufuncs.py --- a/pypy/module/micronumpy/ufuncs.py +++ b/pypy/module/micronumpy/ufuncs.py @@ -780,3 +780,5 @@ def get(space): return space.fromcache(UfuncState) + + From noreply at buildbot.pypy.org Sun Jun 1 00:14:51 2014 From: noreply at buildbot.pypy.org (mattip) Date: Sun, 1 Jun 2014 00:14:51 +0200 (CEST) Subject: [pypy-commit] pypy ufuncapi: add non-translated test for generic ufunc interface Message-ID: <20140531221451.357D21D2DD4@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: ufuncapi Changeset: r71793:06a969a0120f Date: 2014-05-31 21:56 +0300 http://bitbucket.org/pypy/pypy/changeset/06a969a0120f/ Log: add non-translated test for generic ufunc interface diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py --- a/pypy/module/micronumpy/test/test_ufuncs.py +++ b/pypy/module/micronumpy/test/test_ufuncs.py @@ -4,6 +4,69 @@ from pypy.module.micronumpy.descriptor import get_dtype_cache +try: + import cffi + ffi = cffi.FFI() + if ffi.sizeof('int *') == ffi.sizeof('long'): + intp = 'long' + elif ffi.sizeof('int *') == ffi.sizeof('int'): + intp = 'int' + else: + raise ValueError('unknown size of int *') + ffi.cdef(''' + void double_times2(char **args, {0} *dimensions, + {0} * steps, void* data); + void int_times2(char **args, {0} **dimensions, + {0} **steps, void* data); + '''.format(intp) + ) + cfuncs = ffi.verify(''' + void double_times2(char **args, {0} *dimensions, + {0} * steps, void* data) + {{ + {0} i; + {0} n = dimensions[0]; + char *in = args[0], *out = args[1]; + {0} in_step = steps[0], out_step = steps[1]; + + double tmp; + + for (i = 0; i < n; i++) {{ + /*BEGIN main ufunc computation*/ + tmp = *(double *)in; + tmp *=2.0; + *((double *)out) = tmp; + /*END main ufunc computation*/ + + in += in_step; + out += out_step; + }} + }}; + void int_times2(char **args, {0} *dimensions, + {0} * steps, void* data) + {{ + {0} i; + {0} n = dimensions[0]; + char *in = args[0], *out = args[1]; + {0} in_step = steps[0], out_step = steps[1]; + + int tmp; + + for (i = 0; i < n; i++) {{ + /*BEGIN main ufunc computation*/ + tmp = *(int *)in; + tmp *=2.0; + *((int *)out) = tmp; + /*END main ufunc computation*/ + + in += in_step; + out += out_step; + }} + }} + '''.format(intp)) +except ImportError: + cfuncs = None + class TestUfuncCoercion(object): def test_binops(self, space): bool_dtype = get_dtype_cache(space).w_booldtype @@ -81,6 +144,25 @@ # promote bools, happens with sign ufunc assert find_unaryop_result_dtype(space, bool_dtype, promote_bools=True) is int8_dtype +class TestUfuncFromCFunc(object): + def test_fromcfunc(self,space): + if not cfuncs: + skip('no cffi available') + from pypy.module.micronumpy.ufuncs import ufunc_from_func_and_data_and_signature as from_cfunc + from pypy.module.micronumpy.ctors import array + int32_dtype = get_dtype_cache(space).w_int32dtype + float64_dtype = get_dtype_cache(space).w_float64dtype + func = from_cfunc([cfuncs.double_times2, cfuncs.int_times2], None, + [float64_dtype, float64_dtype, int32_dtype, int32_dtype], + 1, 1, 0, 'times2', 'times2_doc', 0, '()->()', + ) + def get(i): + return w_result.getitem(space, [i]).value + for d in [int32_dtype, float64_dtype]: + w_array = array(space, range(10), dtype=d) + w_result = func(w_array) + for i in 10: + assert get(i) == 2*i class AppTestUfuncs(BaseNumpyAppTest): def test_constants(self): diff --git a/pypy/module/micronumpy/ufuncs.py b/pypy/module/micronumpy/ufuncs.py --- a/pypy/module/micronumpy/ufuncs.py +++ b/pypy/module/micronumpy/ufuncs.py @@ -781,4 +781,7 @@ def get(space): return space.fromcache(UfuncState) - + at unwrap_spec(nin=int, nout=int, name=str, doc=str, check_return=int, signature=str) +def ufunc_from_func_and_data_and_signature(w_funcs, w_data, w_types, nin, nout, + w_identity, name, doc, check_return, signature): + pass From noreply at buildbot.pypy.org Sun Jun 1 00:14:52 2014 From: noreply at buildbot.pypy.org (mattip) Date: Sun, 1 Jun 2014 00:14:52 +0200 (CEST) Subject: [pypy-commit] pypy ufuncapi: s/argcount/nin/, add nout, signature, nargs attributes, start W_UfuncGeneric Message-ID: <20140531221452.CBDCF1D2DD6@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: ufuncapi Changeset: r71794:0262b16a53cf Date: 2014-06-01 01:09 +0300 http://bitbucket.org/pypy/pypy/changeset/0262b16a53cf/ Log: s/argcount/nin/, add nout, signature, nargs attributes, start W_UfuncGeneric diff --git a/pypy/module/micronumpy/ctors.py b/pypy/module/micronumpy/ctors.py --- a/pypy/module/micronumpy/ctors.py +++ b/pypy/module/micronumpy/ctors.py @@ -3,7 +3,8 @@ from rpython.rlib.rstring import strip_spaces from rpython.rtyper.lltypesystem import lltype, rffi from pypy.module.micronumpy import descriptor, loop -from pypy.module.micronumpy.base import W_NDimArray, convert_to_array +from pypy.module.micronumpy.base import convert_to_array +from pypy.module.micronumpy.ndarray import W_NDimArray # use extended class from pypy.module.micronumpy.converters import shape_converter diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py --- a/pypy/module/micronumpy/test/test_ufuncs.py +++ b/pypy/module/micronumpy/test/test_ufuncs.py @@ -159,8 +159,8 @@ def get(i): return w_result.getitem(space, [i]).value for d in [int32_dtype, float64_dtype]: - w_array = array(space, range(10), dtype=d) - w_result = func(w_array) + w_array = array(space, space.wrap(range(10)), w_dtype=d) + w_result = func.call(space, [w_array]) for i in 10: assert get(i) == 2*i @@ -208,8 +208,17 @@ assert sin.identity is None assert add.nin == 2 + assert add.nout == 1 + assert add.nargs == 3 + assert add.signature == None assert multiply.nin == 2 + assert multiply.nout == 1 + assert multiply.nargs == 3 + assert multiply.signature == None assert sin.nin == 1 + assert sin.nout == 1 + assert sin.nargs == 2 + assert sin.signature == None def test_wrong_arguments(self): from numpy import add, sin diff --git a/pypy/module/micronumpy/ufuncs.py b/pypy/module/micronumpy/ufuncs.py --- a/pypy/module/micronumpy/ufuncs.py +++ b/pypy/module/micronumpy/ufuncs.py @@ -62,17 +62,17 @@ if (w_subok is not None and space.is_true(w_subok)): raise OperationError(space.w_NotImplementedError, space.wrap("parameters unsupported")) - if kwds_w or len(args_w) < self.argcount: + if kwds_w or len(args_w) < self.nin: raise OperationError(space.w_ValueError, space.wrap("invalid number of arguments") ) - elif (len(args_w) > self.argcount and out is not None) or \ - (len(args_w) > self.argcount + 1): + elif (len(args_w) > self.nin and out is not None) or \ + (len(args_w) > self.nin + 1): raise OperationError(space.w_TypeError, space.wrap("invalid number of arguments") ) # Override the default out value, if it has been provided in w_wargs - if len(args_w) > self.argcount: + if len(args_w) > self.nin: out = args_w[-1] else: args_w = args_w + [out] @@ -163,7 +163,7 @@ def reduce(self, space, w_obj, w_axis, keepdims=False, out=None, dtype=None, cumulative=False): - if self.argcount != 2: + if self.nin != 2: raise oefmt(space.w_ValueError, "reduce only supported for binary functions") assert isinstance(self, W_Ufunc2) @@ -287,7 +287,10 @@ class W_Ufunc1(W_Ufunc): _immutable_fields_ = ["func", "bool_result"] - argcount = 1 + nin = 1 + nout = 1 + nargs = 2 + signature = None def __init__(self, func, name, promote_to_largest=False, promote_to_float=False, promote_bools=False, identity=None, bool_result=False, int_only=False, @@ -353,7 +356,10 @@ class W_Ufunc2(W_Ufunc): _immutable_fields_ = ["func", "comparison_func", "done_func"] - argcount = 2 + nin = 2 + nout = 1 + nargs = 3 + signature = None def __init__(self, func, name, promote_to_largest=False, promote_to_float=False, promote_bools=False, identity=None, comparison_func=False, int_only=False, @@ -457,6 +463,62 @@ res_dtype, w_lhs, w_rhs, out) +class W_UfuncGeneric(W_Ufunc): + _immutable_fields_ = ["funcs", "signature", "nin", "nout", "nargs", + "dtypes"] + + def __init__(self, funcs, name, identity, nin, nout, dtypes, signature): + # XXX make sure funcs, signature, dtypes, nin, nout are consistent + + # These don't matter, we use the signature and dtypes for determining + # output dtype + promote_to_largest = promote_to_float = promote_bools = False + int_only = allow_bool = allow_complex = complex_to_float = False + W_Ufunc.__init__(self, name, promote_to_largest, promote_to_float, promote_bools, + identity, int_only, allow_bool, allow_complex, complex_to_float) + self.funcs = funcs + self.dtypes = dtypes + self.nin = nin + self.nout = nout + self.nargs = nin + max(nout, 1) # ufuncs can always be called with an out=<> kwarg + self.signature = signature + + def reduce(self, space, w_obj, w_axis, keepdims=False, out=None, dtype=None, + cumulative=False): + raise oefmt(space.w_NotImplementedError, 'not implemented yet') + + def call(self, space, args_w): + out = None + inargs = [] + for i in range(self.nin): + inargs.append(convert_to_array(space, args_w[i])) + outargs = [] + for i in range(min(self.nout, len(args_w)-self.nin)): + out = args_w[i+self.nin] + if space.is_w(out, space.w_None): + outargs.append(None) + else: + if not isinstance(out, W_NDimArray): + raise oefmt(space.w_TypeError, 'output must be an array') + outargs.append(out) + index = self.alloc_outargs(space, inargs, outargs) + func, dims, steps = self.prep_call(space, index, inargs, outargs) + data = [] # allocate a ll array of pointers and + # initialize to [x.data for x in inargs+outargs] + func(data, dims, steps, None) + + def alloc_outargs(self, space, inargs, outargs): + # Find a match for the inargs.dtype in self.dtypes, + # then use the result dtype to verify/allocate outargs + return 0 + + def prep_call(self, space, index, inargs, outargs): + # Use the index and signature to determine + # dims and steps for function call + return self.funcs[index], [inargs[0].get_shape()[0]], \ + [inargs[0].get_strides()[0], outargs[0].get_strides()[0]] + + W_Ufunc.typedef = TypeDef("numpy.ufunc", __call__ = interp2app(W_Ufunc.descr_call), __repr__ = interp2app(W_Ufunc.descr_repr), @@ -464,7 +526,10 @@ identity = GetSetProperty(W_Ufunc.descr_get_identity), accumulate = interp2app(W_Ufunc.descr_accumulate), - nin = interp_attrproperty("argcount", cls=W_Ufunc), + nin = interp_attrproperty("nin", cls=W_Ufunc), + nout = interp_attrproperty("nout", cls=W_Ufunc), + nargs = interp_attrproperty("nargs", cls=W_Ufunc), + signature = interp_attrproperty("signature", cls=W_Ufunc), reduce = interp2app(W_Ufunc.descr_reduce), outer = interp2app(W_Ufunc.descr_outer), @@ -626,7 +691,7 @@ 'supported', w_obj) -def ufunc_dtype_caller(space, ufunc_name, op_name, argcount, comparison_func, +def ufunc_dtype_caller(space, ufunc_name, op_name, nin, comparison_func, bool_result): def get_op(dtype): try: @@ -636,13 +701,13 @@ "%s not implemented for %s", ufunc_name, dtype.get_name()) dtype_cache = descriptor.get_dtype_cache(space) - if argcount == 1: + if nin == 1: def impl(res_dtype, value): res = get_op(res_dtype)(value) if bool_result: return dtype_cache.w_booldtype.box(res) return res - elif argcount == 2: + elif nin == 2: def impl(res_dtype, lvalue, rvalue): res = get_op(res_dtype)(lvalue, rvalue) if comparison_func: @@ -757,7 +822,7 @@ ]: self.add_ufunc(space, *ufunc_def) - def add_ufunc(self, space, ufunc_name, op_name, argcount, extra_kwargs=None): + def add_ufunc(self, space, ufunc_name, op_name, nin, extra_kwargs=None): if extra_kwargs is None: extra_kwargs = {} @@ -767,13 +832,13 @@ descriptor.get_dtype_cache(space).w_longdtype.box(identity) extra_kwargs["identity"] = identity - func = ufunc_dtype_caller(space, ufunc_name, op_name, argcount, + func = ufunc_dtype_caller(space, ufunc_name, op_name, nin, comparison_func=extra_kwargs.get("comparison_func", False), bool_result=extra_kwargs.get("bool_result", False), ) - if argcount == 1: + if nin == 1: ufunc = W_Ufunc1(func, ufunc_name, **extra_kwargs) - elif argcount == 2: + elif nin == 2: ufunc = W_Ufunc2(func, ufunc_name, **extra_kwargs) setattr(self, ufunc_name, ufunc) @@ -781,7 +846,7 @@ def get(space): return space.fromcache(UfuncState) - at unwrap_spec(nin=int, nout=int, name=str, doc=str, check_return=int, signature=str) -def ufunc_from_func_and_data_and_signature(w_funcs, w_data, w_types, nin, nout, - w_identity, name, doc, check_return, signature): +def ufunc_from_func_and_data_and_signature(funcs, data, dtypes, nin, nout, + identity, name, doc, check_return, signature): + return W_UfuncGeneric(funcs, name, identity, nin, nout, dtypes, signature) pass From noreply at buildbot.pypy.org Sun Jun 1 03:03:15 2014 From: noreply at buildbot.pypy.org (rlamy) Date: Sun, 1 Jun 2014 03:03:15 +0200 (CEST) Subject: [pypy-commit] pypy default: move MethodsPBCRepr out of lltypesystem/ Message-ID: <20140601010315.D669A1C02AF@cobra.cs.uni-duesseldorf.de> Author: Ronan Lamy Branch: Changeset: r71795:09a955eb512d Date: 2014-06-01 02:02 +0100 http://bitbucket.org/pypy/pypy/changeset/09a955eb512d/ Log: move MethodsPBCRepr out of lltypesystem/ diff --git a/rpython/rtyper/lltypesystem/rpbc.py b/rpython/rtyper/lltypesystem/rpbc.py --- a/rpython/rtyper/lltypesystem/rpbc.py +++ b/rpython/rtyper/lltypesystem/rpbc.py @@ -1,6 +1,6 @@ import types -from rpython.annotator import description, model as annmodel +from rpython.annotator import description from rpython.rlib.debug import ll_assert from rpython.rlib.unroll import unrolling_iterable from rpython.rtyper import callparse @@ -8,8 +8,8 @@ from rpython.rtyper.lltypesystem.lltype import (typeOf, Void, ForwardReference, Struct, Bool, Char, Ptr, malloc, nullptr, Array, Signed) from rpython.rtyper.rmodel import Repr, inputconst -from rpython.rtyper.rpbc import (AbstractClassesPBCRepr, AbstractMethodsPBCRepr, - OverriddenFunctionPBCRepr, AbstractMultipleFrozenPBCRepr, +from rpython.rtyper.rpbc import ( + AbstractClassesPBCRepr, AbstractMultipleFrozenPBCRepr, AbstractFunctionsPBCRepr, AbstractMultipleUnrelatedFrozenPBCRepr, SingleFrozenPBCRepr, get_concrete_calltable) from rpython.rtyper.typesystem import getfunctionptr @@ -302,47 +302,8 @@ else: return v - -class MethodsPBCRepr(AbstractMethodsPBCRepr): - """Representation selected for a PBC of the form {func: classdef...}. - It assumes that all the methods come from the same name in a base - classdef.""" - - def rtype_simple_call(self, hop): - return self.redispatch_call(hop, call_args=False) - - def rtype_call_args(self, hop): - return self.redispatch_call(hop, call_args=True) - - def redispatch_call(self, hop, call_args): - r_class = self.r_im_self.rclass - mangled_name, r_func = r_class.clsfields[self.methodname] - assert isinstance(r_func, (FunctionsPBCRepr, - OverriddenFunctionPBCRepr, - SmallFunctionSetPBCRepr)) - # s_func = r_func.s_pbc -- not precise enough, see - # test_precise_method_call_1. Build a more precise one... - funcdescs = [desc.funcdesc for desc in hop.args_s[0].descriptions] - s_func = annmodel.SomePBC(funcdescs, subset_of=r_func.s_pbc) - v_im_self = hop.inputarg(self, arg=0) - v_cls = self.r_im_self.getfield(v_im_self, '__class__', hop.llops) - v_func = r_class.getclsfield(v_cls, self.methodname, hop.llops) - - hop2 = self.add_instance_arg_to_hop(hop, call_args) - hop2.v_s_insertfirstarg(v_func, s_func) # insert 'function' - - if type(hop2.args_r[0]) is SmallFunctionSetPBCRepr and type(r_func) is FunctionsPBCRepr: - hop2.args_r[0] = FunctionsPBCRepr(self.rtyper, s_func) - else: - hop2.args_v[0] = hop2.llops.convertvar(hop2.args_v[0], r_func, hop2.args_r[0]) - - # now hop2 looks like simple_call(function, self, args...) - return hop2.dispatch() - - # ____________________________________________________________ - class ClassesPBCRepr(AbstractClassesPBCRepr): """Representation selected for a PBC of class(es).""" @@ -359,10 +320,10 @@ # "my_instantiate = typeptr.instantiate" c_name = hop.inputconst(Void, 'instantiate') v_instantiate = hop.genop('getfield', [vtypeptr, c_name], - resulttype = rclass.OBJECT_VTABLE.instantiate) + resulttype=rclass.OBJECT_VTABLE.instantiate) # "my_instantiate()" v_inst = hop.genop('indirect_call', [v_instantiate, c_graphs], - resulttype = rclass.OBJECTPTR) + resulttype=rclass.OBJECTPTR) return hop.genop('cast_pointer', [v_inst], resulttype=r_instance) def getlowleveltype(self): diff --git a/rpython/rtyper/rpbc.py b/rpython/rtyper/rpbc.py --- a/rpython/rtyper/rpbc.py +++ b/rpython/rtyper/rpbc.py @@ -21,8 +21,8 @@ class __extend__(annmodel.SomePBC): def rtyper_makerepr(self, rtyper): - from rpython.rtyper.lltypesystem.rpbc import (FunctionsPBCRepr, - SmallFunctionSetPBCRepr, ClassesPBCRepr, MethodsPBCRepr) + from rpython.rtyper.lltypesystem.rpbc import ( + FunctionsPBCRepr, SmallFunctionSetPBCRepr, ClassesPBCRepr) kind = self.getKind() if issubclass(kind, description.FunctionDesc): sample = self.any_description() @@ -748,7 +748,7 @@ s_shape = hop2.rtyper.annotator.bookkeeper.immutablevalue(new_shape) hop2.v_s_insertfirstarg(c_shape, s_shape) # reinsert adjusted shape -class AbstractMethodsPBCRepr(Repr): +class MethodsPBCRepr(Repr): """Representation selected for a PBC of MethodDescs. It assumes that all the methods come from the same name and have been read from instances with a common base.""" @@ -808,6 +808,42 @@ _, s_shape = hop2.r_s_popfirstarg() adjust_shape(hop2, s_shape) return hop2 + + def rtype_simple_call(self, hop): + return self.redispatch_call(hop, call_args=False) + + def rtype_call_args(self, hop): + return self.redispatch_call(hop, call_args=True) + + def redispatch_call(self, hop, call_args): + from rpython.rtyper.lltypesystem.rpbc import ( + FunctionsPBCRepr, SmallFunctionSetPBCRepr) + r_class = self.r_im_self.rclass + mangled_name, r_func = r_class.clsfields[self.methodname] + assert isinstance(r_func, (FunctionsPBCRepr, + OverriddenFunctionPBCRepr, + SmallFunctionSetPBCRepr)) + # s_func = r_func.s_pbc -- not precise enough, see + # test_precise_method_call_1. Build a more precise one... + funcdescs = [desc.funcdesc for desc in hop.args_s[0].descriptions] + s_func = annmodel.SomePBC(funcdescs, subset_of=r_func.s_pbc) + v_im_self = hop.inputarg(self, arg=0) + v_cls = self.r_im_self.getfield(v_im_self, '__class__', hop.llops) + v_func = r_class.getclsfield(v_cls, self.methodname, hop.llops) + + hop2 = self.add_instance_arg_to_hop(hop, call_args) + hop2.v_s_insertfirstarg(v_func, s_func) # insert 'function' + + if (type(hop2.args_r[0]) is SmallFunctionSetPBCRepr and + type(r_func) is FunctionsPBCRepr): + hop2.args_r[0] = FunctionsPBCRepr(self.rtyper, s_func) + else: + hop2.args_v[0] = hop2.llops.convertvar( + hop2.args_v[0], r_func, hop2.args_r[0]) + + # now hop2 looks like simple_call(function, self, args...) + return hop2.dispatch() + # ____________________________________________________________ def samesig(funcs): From noreply at buildbot.pypy.org Sun Jun 1 18:55:15 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 1 Jun 2014 18:55:15 +0200 (CEST) Subject: [pypy-commit] pypy default: The variable 'uid' was originally unsigned. Message-ID: <20140601165515.BC9251C0542@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r71796:7b30281727ad Date: 2014-06-01 18:53 +0200 http://bitbucket.org/pypy/pypy/changeset/7b30281727ad/ Log: The variable 'uid' was originally unsigned. diff --git a/rpython/rtyper/lltypesystem/rclass.py b/rpython/rtyper/lltypesystem/rclass.py --- a/rpython/rtyper/lltypesystem/rclass.py +++ b/rpython/rtyper/lltypesystem/rclass.py @@ -588,7 +588,7 @@ # Two choices: the first gives a fast answer but it can change # (typically only once) during the life of the object. #uid = r_uint(cast_ptr_to_int(i)) - uid = llop.gc_id(lltype.Signed, i) + uid = r_uint(llop.gc_id(lltype.Signed, i)) # nameLen = len(instance.typeptr.name) nameString = rstr.mallocstr(nameLen-1) From noreply at buildbot.pypy.org Sun Jun 1 21:54:39 2014 From: noreply at buildbot.pypy.org (rlamy) Date: Sun, 1 Jun 2014 21:54:39 +0200 (CEST) Subject: [pypy-commit] pypy default: remove misleading _freeze_() Message-ID: <20140601195439.135311C015E@cobra.cs.uni-duesseldorf.de> Author: Ronan Lamy Branch: Changeset: r71797:04434aa772aa Date: 2014-06-01 20:53 +0100 http://bitbucket.org/pypy/pypy/changeset/04434aa772aa/ Log: remove misleading _freeze_() diff --git a/rpython/rtyper/rclass.py b/rpython/rtyper/rclass.py --- a/rpython/rtyper/rclass.py +++ b/rpython/rtyper/rclass.py @@ -28,15 +28,15 @@ def __repr__(self): return '' % getattr(self, 'TYPE', '?') - def _freeze_(self): - return True class ImmutableRanking(object): def __init__(self, name, is_immutable): self.name = name self.is_immutable = is_immutable + def __nonzero__(self): return self.is_immutable + def __repr__(self): return '<%s>' % self.name From noreply at buildbot.pypy.org Mon Jun 2 02:05:17 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Mon, 2 Jun 2014 02:05:17 +0200 (CEST) Subject: [pypy-commit] pypy py3k: _winreg -> winreg Message-ID: <20140602000517.C5E161D2CC8@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: py3k Changeset: r71798:28d0b1ba4a91 Date: 2014-05-31 11:11 -0700 http://bitbucket.org/pypy/pypy/changeset/28d0b1ba4a91/ Log: _winreg -> winreg diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -760,10 +760,10 @@ toencode = u'caf\xe9', b'caf\xe9' try: # test for non-latin1 codepage, more general test needed - import _winreg - key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, + import winreg + key = winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'System\CurrentControlSet\Control\Nls\CodePage') - if _winreg.QueryValueEx(key, 'ACP')[0] == u'1255': # non-latin1 + if winreg.QueryValueEx(key, 'ACP')[0] == u'1255': # non-latin1 toencode = u'caf\xbf',b'caf\xbf' except: assert False, 'cannot test mbcs on this windows system, check code page' diff --git a/pypy/module/_winreg/interp_winreg.py b/pypy/module/_winreg/interp_winreg.py --- a/pypy/module/_winreg/interp_winreg.py +++ b/pypy/module/_winreg/interp_winreg.py @@ -68,7 +68,7 @@ descr_HKEY_new = interp2app(new_HKEY) W_HKEY.typedef = TypeDef( - "_winreg.HKEYType", + "winreg.HKEYType", __doc__ = """\ PyHKEY Object - A Python object, representing a win32 registry key. @@ -212,7 +212,7 @@ The key identified by the key parameter must have been opened with KEY_SET_VALUE access.""" if typ != rwinreg.REG_SZ: - errstring = space.wrap("Type must be _winreg.REG_SZ") + errstring = space.wrap("Type must be winreg.REG_SZ") raise OperationError(space.w_ValueError, errstring) hkey = hkey_w(w_hkey, space) if space.is_w(w_subkey, space.w_None): diff --git a/pypy/module/_winreg/test/test_winreg.py b/pypy/module/_winreg/test/test_winreg.py --- a/pypy/module/_winreg/test/test_winreg.py +++ b/pypy/module/_winreg/test/test_winreg.py @@ -3,7 +3,7 @@ import os, sys, py if sys.platform != 'win32': - py.test.skip("_winreg is a win32 module") + py.test.skip("winreg is a win32 module") try: # To call SaveKey, the process must have Backup Privileges From noreply at buildbot.pypy.org Mon Jun 2 02:05:19 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Mon, 2 Jun 2014 02:05:19 +0200 (CEST) Subject: [pypy-commit] pypy py3k: make our import tag prefix 'pypy3' to avoid clashing w/ pypy2 cffi's compiled Message-ID: <20140602000519.A00DE1D2CE3@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: py3k Changeset: r71799:c9e1b4b9f29c Date: 2014-06-01 17:02 -0700 http://bitbucket.org/pypy/pypy/changeset/c9e1b4b9f29c/ Log: make our import tag prefix 'pypy3' to avoid clashing w/ pypy2 cffi's compiled libs within __pycache__ diff --git a/pypy/module/imp/importing.py b/pypy/module/imp/importing.py --- a/pypy/module/imp/importing.py +++ b/pypy/module/imp/importing.py @@ -32,9 +32,10 @@ IMP_HOOK = 9 SO = '.pyd' if _WIN32 else '.so' -DEFAULT_SOABI = 'pypy-%d%d' % PYPY_VERSION[:2] +PREFIX = 'pypy3-' +DEFAULT_SOABI = '%s%d%d' % ((PREFIX,) + PYPY_VERSION[:2]) -PYC_TAG = 'pypy-%d%d' % PYPY_VERSION[:2] +PYC_TAG = '%s%d%d' % ((PREFIX,) + PYPY_VERSION[:2]) @specialize.memo() def get_so_extension(space): diff --git a/pypy/module/imp/test/test_app.py b/pypy/module/imp/test/test_app.py --- a/pypy/module/imp/test/test_app.py +++ b/pypy/module/imp/test/test_app.py @@ -264,4 +264,4 @@ def test_get_tag(self): import imp import sys - assert imp.get_tag() == 'pypy-%d%d' % sys.pypy_version_info[0:2] + assert imp.get_tag() == 'pypy3-%d%d' % sys.pypy_version_info[0:2] diff --git a/pypy/module/imp/test/test_import.py b/pypy/module/imp/test/test_import.py --- a/pypy/module/imp/test/test_import.py +++ b/pypy/module/imp/test/test_import.py @@ -697,9 +697,9 @@ def test_cache_from_source(self): import imp pycfile = imp.cache_from_source('a/b/c.py') - assert pycfile.startswith('a/b/__pycache__/c.pypy-') + assert pycfile.startswith('a/b/__pycache__/c.pypy3-') assert pycfile.endswith('.pyc') - assert imp.source_from_cache('a/b/__pycache__/c.pypy-17.pyc' + assert imp.source_from_cache('a/b/__pycache__/c.pypy3-17.pyc' ) == 'a/b/c.py' raises(ValueError, imp.source_from_cache, 'a/b/c.py') From noreply at buildbot.pypy.org Mon Jun 2 09:36:16 2014 From: noreply at buildbot.pypy.org (fijal) Date: Mon, 2 Jun 2014 09:36:16 +0200 (CEST) Subject: [pypy-commit] pypy default: A way to get an address from str Message-ID: <20140602073616.0F91B1D241C@cobra.cs.uni-duesseldorf.de> Author: Maciej Fijalkowski Branch: Changeset: r71800:ac52eb7bbbb0 Date: 2014-06-02 09:35 +0200 http://bitbucket.org/pypy/pypy/changeset/ac52eb7bbbb0/ Log: A way to get an address from str diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -1136,3 +1136,12 @@ lltype.Void, releasegil=False ) + +from rpython.rtyper.annlowlevel import llstr +from rpython.rtyper.lltypesystem.rstr import STR + +def get_buffer_from_str(data): + lldata = llstr(data) + data_start = cast_ptr_to_adr(lldata) + \ + offsetof(STR, 'chars') + itemoffsetof(STR.chars, 0) + return cast(CCHARP, data_start) From noreply at buildbot.pypy.org Mon Jun 2 10:03:01 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Mon, 2 Jun 2014 10:03:01 +0200 (CEST) Subject: [pypy-commit] pypy gc-pinning: merge default Message-ID: <20140602080301.B748A1C0542@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc-pinning Changeset: r71801:1e82e87cb90a Date: 2014-05-29 14:21 -0400 http://bitbucket.org/pypy/pypy/changeset/1e82e87cb90a/ Log: merge default diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py --- a/rpython/annotator/bookkeeper.py +++ b/rpython/annotator/bookkeeper.py @@ -160,8 +160,6 @@ def consider_call_site_for_pbc(self, s_callable, args, s_result, call_op): descs = list(s_callable.descriptions) - if not descs: - return family = descs[0].getcallfamily() s_callable.getKind().consider_call_site(self, family, descs, args, s_result, call_op) diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py --- a/rpython/jit/codewriter/jtransform.py +++ b/rpython/jit/codewriter/jtransform.py @@ -561,8 +561,10 @@ return [SpaceOperation('-live-', [], None), op1, None] if hints.get('force_virtualizable'): return SpaceOperation('hint_force_virtualizable', [op.args[0]], None) - else: - log.WARNING('ignoring hint %r at %r' % (hints, self.graph)) + if hints.get('force_no_const'): # for tests only + assert getkind(op.args[0].concretetype) == 'int' + return SpaceOperation('int_same_as', [op.args[0]], op.result) + log.WARNING('ignoring hint %r at %r' % (hints, self.graph)) def _rewrite_raw_malloc(self, op, name, args): d = op.args[1].value.copy() diff --git a/rpython/jit/metainterp/blackhole.py b/rpython/jit/metainterp/blackhole.py --- a/rpython/jit/metainterp/blackhole.py +++ b/rpython/jit/metainterp/blackhole.py @@ -380,6 +380,10 @@ # ---------- + @arguments("i", returns="i") + def bhimpl_int_same_as(a): + return a + @arguments("i", "i", returns="i") def bhimpl_int_add(a, b): return intmask(a + b) diff --git a/rpython/jit/metainterp/pyjitpl.py b/rpython/jit/metainterp/pyjitpl.py --- a/rpython/jit/metainterp/pyjitpl.py +++ b/rpython/jit/metainterp/pyjitpl.py @@ -231,6 +231,14 @@ ''' % (_opimpl, _opimpl.upper())).compile() @arguments("box") + def opimpl_int_same_as(self, box): + # for tests only: emits a same_as, forcing the result to be in a Box + resbox = history.BoxInt(box.getint()) + self.metainterp._record_helper_nonpure_varargs( + rop.SAME_AS, resbox, None, [box]) + return resbox + + @arguments("box") def opimpl_ptr_nonzero(self, box): return self.execute(rop.PTR_NE, box, history.CONST_NULL) diff --git a/rpython/jit/metainterp/test/support.py b/rpython/jit/metainterp/test/support.py --- a/rpython/jit/metainterp/test/support.py +++ b/rpython/jit/metainterp/test/support.py @@ -273,6 +273,5 @@ def noConst(x): """Helper function for tests, returning 'x' as a BoxInt/BoxPtr even if it is a ConstInt/ConstPtr.""" - f1 = _Foo(); f2 = _Foo() - f1.x = x; f2.x = 0 - return f1.x + from rpython.rlib import jit + return jit.hint(x, force_no_const=True) diff --git a/rpython/rtyper/annlowlevel.py b/rpython/rtyper/annlowlevel.py --- a/rpython/rtyper/annlowlevel.py +++ b/rpython/rtyper/annlowlevel.py @@ -486,7 +486,7 @@ assert False def specialize_call(self, hop): - from rpython.rtyper import rpbc + from rpython.rtyper.rnone import NoneRepr PTR = hop.r_result.lowleveltype if isinstance(PTR, lltype.Ptr): T = lltype.Ptr @@ -496,7 +496,7 @@ assert False hop.exception_cannot_occur() - if isinstance(hop.args_r[1], rpbc.NoneFrozenPBCRepr): + if isinstance(hop.args_r[1], NoneRepr): return hop.inputconst(PTR, null) v_arg = hop.inputarg(hop.args_r[1], arg=1) assert isinstance(v_arg.concretetype, T) diff --git a/rpython/rtyper/lltypesystem/rpbc.py b/rpython/rtyper/lltypesystem/rpbc.py --- a/rpython/rtyper/lltypesystem/rpbc.py +++ b/rpython/rtyper/lltypesystem/rpbc.py @@ -7,39 +7,15 @@ from rpython.rtyper.lltypesystem import rclass, llmemory from rpython.rtyper.lltypesystem.lltype import (typeOf, Void, ForwardReference, Struct, Bool, Char, Ptr, malloc, nullptr, Array, Signed) -from rpython.rtyper.rmodel import Repr, TyperError, inputconst +from rpython.rtyper.rmodel import Repr, inputconst from rpython.rtyper.rpbc import (AbstractClassesPBCRepr, AbstractMethodsPBCRepr, OverriddenFunctionPBCRepr, AbstractMultipleFrozenPBCRepr, AbstractFunctionsPBCRepr, AbstractMultipleUnrelatedFrozenPBCRepr, - SingleFrozenPBCRepr, MethodOfFrozenPBCRepr, none_frozen_pbc_repr, - get_concrete_calltable) + SingleFrozenPBCRepr, get_concrete_calltable) from rpython.rtyper.typesystem import getfunctionptr from rpython.tool.pairtype import pairtype -def rtype_is_None(robj1, rnone2, hop, pos=0): - if isinstance(robj1.lowleveltype, Ptr): - v1 = hop.inputarg(robj1, pos) - return hop.genop('ptr_iszero', [v1], resulttype=Bool) - elif robj1.lowleveltype == llmemory.Address: - v1 = hop.inputarg(robj1, pos) - cnull = hop.inputconst(llmemory.Address, robj1.null_instance()) - return hop.genop('adr_eq', [v1, cnull], resulttype=Bool) - elif robj1 == none_frozen_pbc_repr: - return hop.inputconst(Bool, True) - elif isinstance(robj1, SmallFunctionSetPBCRepr): - if robj1.s_pbc.can_be_None: - v1 = hop.inputarg(robj1, pos) - return hop.genop('char_eq', [v1, inputconst(Char, '\000')], - resulttype=Bool) - else: - return inputconst(Bool, False) - else: - raise TyperError('rtype_is_None of %r' % (robj1)) - - -# ____________________________________________________________ - class MultipleFrozenPBCRepr(AbstractMultipleFrozenPBCRepr): """Representation selected for multiple non-callable pre-built constants.""" def __init__(self, rtyper, access_set): diff --git a/rpython/rtyper/rnone.py b/rpython/rtyper/rnone.py new file mode 100644 --- /dev/null +++ b/rpython/rtyper/rnone.py @@ -0,0 +1,82 @@ +from rpython.flowspace.model import Constant +from rpython.annotator.model import SomeNone +from rpython.rtyper.rmodel import Repr, TyperError, inputconst +from rpython.rtyper.lltypesystem.lltype import Void, Bool, Ptr, Char +from rpython.rtyper.lltypesystem.llmemory import Address +from rpython.rtyper.lltypesystem.rpbc import SmallFunctionSetPBCRepr +from rpython.rtyper.annlowlevel import llstr +from rpython.tool.pairtype import pairtype + +class NoneRepr(Repr): + lowleveltype = Void + + def rtype_bool(self, hop): + return Constant(False, Bool) + + def none_call(self, hop): + raise TyperError("attempt to call constant None") + + def ll_str(self, none): + return llstr("None") + + def get_ll_eq_function(self): + return None + + def get_ll_hash_function(self): + return ll_none_hash + + rtype_simple_call = none_call + rtype_call_args = none_call + +none_repr = NoneRepr() + +class __extend__(SomeNone): + def rtyper_makerepr(self, rtyper): + return none_repr + + def rtyper_makekey(self): + return self.__class__, + +def ll_none_hash(_): + return 0 + + +class __extend__(pairtype(Repr, NoneRepr)): + + def convert_from_to((r_from, _), v, llops): + return inputconst(Void, None) + + def rtype_is_((robj1, rnone2), hop): + if hop.s_result.is_constant(): + return hop.inputconst(Bool, hop.s_result.const) + return rtype_is_None(robj1, rnone2, hop) + +class __extend__(pairtype(NoneRepr, Repr)): + + def convert_from_to((_, r_to), v, llops): + return inputconst(r_to, None) + + def rtype_is_((rnone1, robj2), hop): + if hop.s_result.is_constant(): + return hop.inputconst(Bool, hop.s_result.const) + return rtype_is_None(robj2, rnone1, hop, pos=1) + +def rtype_is_None(robj1, rnone2, hop, pos=0): + if isinstance(robj1.lowleveltype, Ptr): + v1 = hop.inputarg(robj1, pos) + return hop.genop('ptr_iszero', [v1], resulttype=Bool) + elif robj1.lowleveltype == Address: + v1 = hop.inputarg(robj1, pos) + cnull = hop.inputconst(Address, robj1.null_instance()) + return hop.genop('adr_eq', [v1, cnull], resulttype=Bool) + elif robj1 == none_repr: + return hop.inputconst(Bool, True) + elif isinstance(robj1, SmallFunctionSetPBCRepr): + if robj1.s_pbc.can_be_None: + v1 = hop.inputarg(robj1, pos) + return hop.genop('char_eq', [v1, inputconst(Char, '\000')], + resulttype=Bool) + else: + return inputconst(Bool, False) + else: + raise TyperError('rtype_is_None of %r' % (robj1)) diff --git a/rpython/rtyper/rpbc.py b/rpython/rtyper/rpbc.py --- a/rpython/rtyper/rpbc.py +++ b/rpython/rtyper/rpbc.py @@ -4,9 +4,8 @@ from rpython.flowspace.model import Constant from rpython.annotator.argument import simple_args from rpython.rtyper import rclass, callparse -from rpython.rtyper.annlowlevel import llstr from rpython.rtyper.error import TyperError -from rpython.rtyper.lltypesystem.lltype import typeOf, Void, Bool +from rpython.rtyper.lltypesystem.lltype import typeOf, Void from rpython.rtyper.rmodel import (Repr, inputconst, CanBeNull, mangle, inputdesc, warning, impossible_repr) from rpython.tool.pairtype import pair, pairtype @@ -23,8 +22,7 @@ class __extend__(annmodel.SomePBC): def rtyper_makerepr(self, rtyper): from rpython.rtyper.lltypesystem.rpbc import (FunctionsPBCRepr, - SmallFunctionSetPBCRepr, ClassesPBCRepr, MethodsPBCRepr, - MethodOfFrozenPBCRepr) + SmallFunctionSetPBCRepr, ClassesPBCRepr, MethodsPBCRepr) kind = self.getKind() if issubclass(kind, description.FunctionDesc): sample = self.any_description() @@ -61,13 +59,6 @@ t = () return tuple([self.__class__, self.can_be_None]+lst)+t -class __extend__(annmodel.SomeNone): - def rtyper_makerepr(self, rtyper): - return none_frozen_pbc_repr - - def rtyper_makekey(self): - return self.__class__, - # ____________________________________________________________ class ConcreteCallTableRow(dict): @@ -589,56 +580,6 @@ def convert_from_to((r_from, r_to), v, llops): return pair(r_from.r_im_self, r_to.r_im_self).convert_from_to(v, llops) -# __ None ____________________________________________________ -class NoneFrozenPBCRepr(Repr): - lowleveltype = Void - - def rtype_bool(self, hop): - return Constant(False, Bool) - - def none_call(self, hop): - raise TyperError("attempt to call constant None") - - def ll_str(self, none): - return llstr("None") - - def get_ll_eq_function(self): - return None - - def get_ll_hash_function(self): - return ll_none_hash - - rtype_simple_call = none_call - rtype_call_args = none_call - -none_frozen_pbc_repr = NoneFrozenPBCRepr() - -def ll_none_hash(_): - return 0 - - -class __extend__(pairtype(Repr, NoneFrozenPBCRepr)): - - def convert_from_to((r_from, _), v, llops): - return inputconst(Void, None) - - def rtype_is_((robj1, rnone2), hop): - from rpython.rtyper.lltypesystem.rpbc import rtype_is_None - if hop.s_result.is_constant(): - return hop.inputconst(Bool, hop.s_result.const) - return rtype_is_None(robj1, rnone2, hop) - -class __extend__(pairtype(NoneFrozenPBCRepr, Repr)): - - def convert_from_to((_, r_to), v, llops): - return inputconst(r_to, None) - - def rtype_is_((rnone1, robj2), hop): - from rpython.rtyper.lltypesystem.rpbc import rtype_is_None - if hop.s_result.is_constant(): - return hop.inputconst(Bool, hop.s_result.const) - return rtype_is_None(robj2, rnone1, hop, pos=1) - # ____________________________________________________________ class AbstractClassesPBCRepr(Repr): diff --git a/rpython/rtyper/rtyper.py b/rpython/rtyper/rtyper.py --- a/rpython/rtyper/rtyper.py +++ b/rpython/rtyper/rtyper.py @@ -936,7 +936,7 @@ # _______________________________________________________________________ # this has the side-effect of registering the unary and binary operations # and the rtyper_chooserepr() methods -from rpython.rtyper import rint, rbool, rfloat +from rpython.rtyper import rint, rbool, rfloat, rnone from rpython.rtyper import rrange from rpython.rtyper import rstr, rdict, rlist, rbytearray from rpython.rtyper import rclass, rbuiltin, rpbc From noreply at buildbot.pypy.org Mon Jun 2 10:03:03 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Mon, 2 Jun 2014 10:03:03 +0200 (CEST) Subject: [pypy-commit] pypy gc-pinning: merge default Message-ID: <20140602080303.3D85D1C0542@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc-pinning Changeset: r71802:1f4aa19a1f22 Date: 2014-06-01 03:25 -0400 http://bitbucket.org/pypy/pypy/changeset/1f4aa19a1f22/ Log: merge default diff --git a/lib-python/2.7/imputil.py b/lib-python/2.7/imputil.py --- a/lib-python/2.7/imputil.py +++ b/lib-python/2.7/imputil.py @@ -422,7 +422,8 @@ saved back to the filesystem for future imports. The source file's modification timestamp must be provided as a Long value. """ - codestring = open(pathname, 'rU').read() + with open(pathname, 'rU') as fp: + codestring = fp.read() if codestring and codestring[-1] != '\n': codestring = codestring + '\n' code = __builtin__.compile(codestring, pathname, 'exec') @@ -603,8 +604,8 @@ self.desc = desc def import_file(self, filename, finfo, fqname): - fp = open(filename, self.desc[1]) - module = imp.load_module(fqname, fp, filename, self.desc) + with open(filename, self.desc[1]) as fp: + module = imp.load_module(fqname, fp, filename, self.desc) module.__file__ = filename return 0, module, { } diff --git a/lib-python/2.7/modulefinder.py b/lib-python/2.7/modulefinder.py --- a/lib-python/2.7/modulefinder.py +++ b/lib-python/2.7/modulefinder.py @@ -109,16 +109,16 @@ def run_script(self, pathname): self.msg(2, "run_script", pathname) - fp = open(pathname, READ_MODE) - stuff = ("", "r", imp.PY_SOURCE) - self.load_module('__main__', fp, pathname, stuff) + with open(pathname, READ_MODE) as fp: + stuff = ("", "r", imp.PY_SOURCE) + self.load_module('__main__', fp, pathname, stuff) def load_file(self, pathname): dir, name = os.path.split(pathname) name, ext = os.path.splitext(name) - fp = open(pathname, READ_MODE) - stuff = (ext, "r", imp.PY_SOURCE) - self.load_module(name, fp, pathname, stuff) + with open(pathname, READ_MODE) as fp: + stuff = (ext, "r", imp.PY_SOURCE) + self.load_module(name, fp, pathname, stuff) def import_hook(self, name, caller=None, fromlist=None, level=-1): self.msg(3, "import_hook", name, caller, fromlist, level) @@ -461,6 +461,8 @@ fp, buf, stuff = self.find_module("__init__", m.__path__) self.load_module(fqname, fp, buf, stuff) self.msgout(2, "load_package ->", m) + if fp: + fp.close() return m def add_module(self, fqname): diff --git a/lib-python/2.7/test/test_argparse.py b/lib-python/2.7/test/test_argparse.py --- a/lib-python/2.7/test/test_argparse.py +++ b/lib-python/2.7/test/test_argparse.py @@ -48,6 +48,9 @@ def tearDown(self): os.chdir(self.old_dir) + import gc + # Force a collection which should close FileType() options + gc.collect() for root, dirs, files in os.walk(self.temp_dir, topdown=False): for name in files: os.chmod(os.path.join(self.temp_dir, name), stat.S_IWRITE) diff --git a/rpython/rtyper/lltypesystem/rclass.py b/rpython/rtyper/lltypesystem/rclass.py --- a/rpython/rtyper/lltypesystem/rclass.py +++ b/rpython/rtyper/lltypesystem/rclass.py @@ -585,7 +585,11 @@ if not i: return rstr.null_str instance = cast_pointer(OBJECTPTR, i) - uid = r_uint(cast_ptr_to_int(i)) + # Two choices: the first gives a fast answer but it can change + # (typically only once) during the life of the object. + #uid = r_uint(cast_ptr_to_int(i)) + uid = llop.gc_id(lltype.Signed, i) + # nameLen = len(instance.typeptr.name) nameString = rstr.mallocstr(nameLen-1) i = 0 diff --git a/rpython/rtyper/lltypesystem/rpbc.py b/rpython/rtyper/lltypesystem/rpbc.py --- a/rpython/rtyper/lltypesystem/rpbc.py +++ b/rpython/rtyper/lltypesystem/rpbc.py @@ -1,6 +1,6 @@ import types -from rpython.annotator import description, model as annmodel +from rpython.annotator import description from rpython.rlib.debug import ll_assert from rpython.rlib.unroll import unrolling_iterable from rpython.rtyper import callparse @@ -8,8 +8,8 @@ from rpython.rtyper.lltypesystem.lltype import (typeOf, Void, ForwardReference, Struct, Bool, Char, Ptr, malloc, nullptr, Array, Signed) from rpython.rtyper.rmodel import Repr, inputconst -from rpython.rtyper.rpbc import (AbstractClassesPBCRepr, AbstractMethodsPBCRepr, - OverriddenFunctionPBCRepr, AbstractMultipleFrozenPBCRepr, +from rpython.rtyper.rpbc import ( + AbstractClassesPBCRepr, AbstractMultipleFrozenPBCRepr, AbstractFunctionsPBCRepr, AbstractMultipleUnrelatedFrozenPBCRepr, SingleFrozenPBCRepr, get_concrete_calltable) from rpython.rtyper.typesystem import getfunctionptr @@ -302,47 +302,8 @@ else: return v - -class MethodsPBCRepr(AbstractMethodsPBCRepr): - """Representation selected for a PBC of the form {func: classdef...}. - It assumes that all the methods come from the same name in a base - classdef.""" - - def rtype_simple_call(self, hop): - return self.redispatch_call(hop, call_args=False) - - def rtype_call_args(self, hop): - return self.redispatch_call(hop, call_args=True) - - def redispatch_call(self, hop, call_args): - r_class = self.r_im_self.rclass - mangled_name, r_func = r_class.clsfields[self.methodname] - assert isinstance(r_func, (FunctionsPBCRepr, - OverriddenFunctionPBCRepr, - SmallFunctionSetPBCRepr)) - # s_func = r_func.s_pbc -- not precise enough, see - # test_precise_method_call_1. Build a more precise one... - funcdescs = [desc.funcdesc for desc in hop.args_s[0].descriptions] - s_func = annmodel.SomePBC(funcdescs, subset_of=r_func.s_pbc) - v_im_self = hop.inputarg(self, arg=0) - v_cls = self.r_im_self.getfield(v_im_self, '__class__', hop.llops) - v_func = r_class.getclsfield(v_cls, self.methodname, hop.llops) - - hop2 = self.add_instance_arg_to_hop(hop, call_args) - hop2.v_s_insertfirstarg(v_func, s_func) # insert 'function' - - if type(hop2.args_r[0]) is SmallFunctionSetPBCRepr and type(r_func) is FunctionsPBCRepr: - hop2.args_r[0] = FunctionsPBCRepr(self.rtyper, s_func) - else: - hop2.args_v[0] = hop2.llops.convertvar(hop2.args_v[0], r_func, hop2.args_r[0]) - - # now hop2 looks like simple_call(function, self, args...) - return hop2.dispatch() - - # ____________________________________________________________ - class ClassesPBCRepr(AbstractClassesPBCRepr): """Representation selected for a PBC of class(es).""" @@ -359,10 +320,10 @@ # "my_instantiate = typeptr.instantiate" c_name = hop.inputconst(Void, 'instantiate') v_instantiate = hop.genop('getfield', [vtypeptr, c_name], - resulttype = rclass.OBJECT_VTABLE.instantiate) + resulttype=rclass.OBJECT_VTABLE.instantiate) # "my_instantiate()" v_inst = hop.genop('indirect_call', [v_instantiate, c_graphs], - resulttype = rclass.OBJECTPTR) + resulttype=rclass.OBJECTPTR) return hop.genop('cast_pointer', [v_inst], resulttype=r_instance) def getlowleveltype(self): diff --git a/rpython/rtyper/rmodel.py b/rpython/rtyper/rmodel.py --- a/rpython/rtyper/rmodel.py +++ b/rpython/rtyper/rmodel.py @@ -345,17 +345,6 @@ # ____________________________________________________________ -def inputdesc(reqtype, desc): - """Return a Constant for the given desc, of the requested type, - which can only be a Repr. - """ - assert isinstance(reqtype, Repr) - value = reqtype.convert_desc(desc) - lltype = reqtype.lowleveltype - c = Constant(value) - c.concretetype = lltype - return c - def inputconst(reqtype, value): """Return a Constant with the given value, of the requested type, which can be a Repr instance or a low-level type. diff --git a/rpython/rtyper/rpbc.py b/rpython/rtyper/rpbc.py --- a/rpython/rtyper/rpbc.py +++ b/rpython/rtyper/rpbc.py @@ -7,7 +7,7 @@ from rpython.rtyper.error import TyperError from rpython.rtyper.lltypesystem.lltype import typeOf, Void from rpython.rtyper.rmodel import (Repr, inputconst, CanBeNull, mangle, - inputdesc, warning, impossible_repr) + warning, impossible_repr) from rpython.tool.pairtype import pair, pairtype @@ -21,8 +21,8 @@ class __extend__(annmodel.SomePBC): def rtyper_makerepr(self, rtyper): - from rpython.rtyper.lltypesystem.rpbc import (FunctionsPBCRepr, - SmallFunctionSetPBCRepr, ClassesPBCRepr, MethodsPBCRepr) + from rpython.rtyper.lltypesystem.rpbc import ( + FunctionsPBCRepr, SmallFunctionSetPBCRepr, ClassesPBCRepr) kind = self.getKind() if issubclass(kind, description.FunctionDesc): sample = self.any_description() @@ -495,7 +495,9 @@ frozendesc1 = r_pbc1.frozendesc access = frozendesc1.queryattrfamily() if access is r_pbc2.access_set: - return inputdesc(r_pbc2, frozendesc1) + value = r_pbc2.convert_desc(frozendesc1) + lltype = r_pbc2.lowleveltype + return Constant(value, lltype) return NotImplemented class __extend__(pairtype(AbstractMultipleUnrelatedFrozenPBCRepr, @@ -746,7 +748,7 @@ s_shape = hop2.rtyper.annotator.bookkeeper.immutablevalue(new_shape) hop2.v_s_insertfirstarg(c_shape, s_shape) # reinsert adjusted shape -class AbstractMethodsPBCRepr(Repr): +class MethodsPBCRepr(Repr): """Representation selected for a PBC of MethodDescs. It assumes that all the methods come from the same name and have been read from instances with a common base.""" @@ -806,6 +808,42 @@ _, s_shape = hop2.r_s_popfirstarg() adjust_shape(hop2, s_shape) return hop2 + + def rtype_simple_call(self, hop): + return self.redispatch_call(hop, call_args=False) + + def rtype_call_args(self, hop): + return self.redispatch_call(hop, call_args=True) + + def redispatch_call(self, hop, call_args): + from rpython.rtyper.lltypesystem.rpbc import ( + FunctionsPBCRepr, SmallFunctionSetPBCRepr) + r_class = self.r_im_self.rclass + mangled_name, r_func = r_class.clsfields[self.methodname] + assert isinstance(r_func, (FunctionsPBCRepr, + OverriddenFunctionPBCRepr, + SmallFunctionSetPBCRepr)) + # s_func = r_func.s_pbc -- not precise enough, see + # test_precise_method_call_1. Build a more precise one... + funcdescs = [desc.funcdesc for desc in hop.args_s[0].descriptions] + s_func = annmodel.SomePBC(funcdescs, subset_of=r_func.s_pbc) + v_im_self = hop.inputarg(self, arg=0) + v_cls = self.r_im_self.getfield(v_im_self, '__class__', hop.llops) + v_func = r_class.getclsfield(v_cls, self.methodname, hop.llops) + + hop2 = self.add_instance_arg_to_hop(hop, call_args) + hop2.v_s_insertfirstarg(v_func, s_func) # insert 'function' + + if (type(hop2.args_r[0]) is SmallFunctionSetPBCRepr and + type(r_func) is FunctionsPBCRepr): + hop2.args_r[0] = FunctionsPBCRepr(self.rtyper, s_func) + else: + hop2.args_v[0] = hop2.llops.convertvar( + hop2.args_v[0], r_func, hop2.args_r[0]) + + # now hop2 looks like simple_call(function, self, args...) + return hop2.dispatch() + # ____________________________________________________________ def samesig(funcs): From noreply at buildbot.pypy.org Mon Jun 2 10:03:04 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Mon, 2 Jun 2014 10:03:04 +0200 (CEST) Subject: [pypy-commit] pypy gc-pinning: add can_pin_objects flag , pin and unpin in GC base Message-ID: <20140602080304.8D7C91C0542@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc-pinning Changeset: r71803:a0902525aa1c Date: 2014-06-02 03:59 -0400 http://bitbucket.org/pypy/pypy/changeset/a0902525aa1c/ Log: add can_pin_objects flag , pin and unpin in GC base diff --git a/rpython/memory/gc/base.py b/rpython/memory/gc/base.py --- a/rpython/memory/gc/base.py +++ b/rpython/memory/gc/base.py @@ -15,6 +15,7 @@ class GCBase(object): _alloc_flavor_ = "raw" moving_gc = False + can_pin_objects = False needs_write_barrier = False malloc_zero_filled = False prebuilt_gc_objects_are_static_roots = True @@ -174,6 +175,12 @@ def can_move(self, addr): return False + + def pin(self, addr): + pass + + def unpin(self, addr): + pass def set_max_heap_size(self, size): raise NotImplementedError From noreply at buildbot.pypy.org Mon Jun 2 10:03:06 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Mon, 2 Jun 2014 10:03:06 +0200 (CEST) Subject: [pypy-commit] pypy gc-pinning: modify can_move() and add pin() and unpin() Message-ID: <20140602080306.044721C0542@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc-pinning Changeset: r71804:6f41a99edd76 Date: 2014-06-02 04:01 -0400 http://bitbucket.org/pypy/pypy/changeset/6f41a99edd76/ Log: modify can_move() and add pin() and unpin() diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -165,6 +165,7 @@ class IncrementalMiniMarkGC(MovingGCBase): _alloc_flavor_ = "raw" + can_pin_objects = True inline_simple_malloc = True inline_simple_malloc_varsize = True needs_write_barrier = True @@ -884,7 +885,13 @@ def can_move(self, obj): """Overrides the parent can_move().""" - return self.is_in_nursery(obj) + return (self.is_in_nursery(obj) & not self.header(obj).tid & GCFLAG_PINNED) + + def pin(self, obj): + self.header(obj).tid |= GCFLAG_PINNED + + def unpin(self, obj): + self.header(obj).tid &= ~GCFLAG_PINNED def shrink_array(self, obj, smallerlength): From noreply at buildbot.pypy.org Mon Jun 2 11:03:01 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Mon, 2 Jun 2014 11:03:01 +0200 (CEST) Subject: [pypy-commit] extradoc extradoc: Some work on the introduction, background Message-ID: <20140602090301.E27991D2D70@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: extradoc Changeset: r5283:de5478830ead Date: 2014-06-02 11:03 +0200 http://bitbucket.org/pypy/extradoc/changeset/de5478830ead/ Log: Some work on the introduction, background diff --git a/talk/dls2014/paper/paper.tex b/talk/dls2014/paper/paper.tex --- a/talk/dls2014/paper/paper.tex +++ b/talk/dls2014/paper/paper.tex @@ -139,7 +139,8 @@ Dynamic languages like Python, PHP, Ruby, and JavaScript are usually regarded as very expressive but also very slow. In recent years, the introduction of just-in-time compilers (JIT) for these languages (e.g. -PyPy, V8, Tracemonkey) started to change this perception by delivering +PyPy~\cite{cfbolz09}, V8~\cite{kevin10}, IonMonkey~\cite{ionmonkey}) +started to change this perception by delivering good performance that enables new applications. However, a parallel programming model was not part of the design of those languages. Thus, the reference implementations of e.g. Python and Ruby use a single, @@ -151,22 +152,29 @@ while executing bytecode instructions and it may only be released in-between such instructions, it provides perfect isolation and atomicity between multiple threads for a series of -instructions. Another technology that can provide the same guarantees -is transactional memory (TM). \remi{cite our position paper} +instructions. Additionally, it provides the application with a +sequential consistency model~\cite{lamport79}. Another technology that +can provide the same guarantees is transactional memory +(TM). \remi{cite our position paper} -There have been several attempts at replacing the GIL with TM. Using -transactions to enclose multiple bytecode instructions, we can get the -very same semantics as the GIL while possibly executing several -transactions in parallel. Furthermore, by exposing these -interpreter-level transactions to the application in the form of -\emph{atomic blocks}, we give dynamic languages a new synchronisation -mechanism that avoids several of the problems of locks as they are -used now. +There have been several attempts at replacing the GIL with +TM~\cite{nicholas06,odaira14,fuad10}. Using transactions to enclose +multiple bytecode instructions, we can get the very same semantics as +the GIL while possibly executing several transactions in +parallel. Furthermore, by exposing these interpreter-level +transactions to the application in the form of \emph{atomic +blocks}~\cite{tim03,tim05}, we give dynamic languages a new +synchronisation mechanism that avoids several of the problems of locks +as they are used now. -\remi{cite and extract from (our pos. paper):} TM systems come in can be broadly categorised as hardware based (HTM), software based (STM), or hybrid systems (HyTM). HTM systems are limited -by hardware constraints, while STM systems have a lot of overhead. +by hardware constraints~\cite{odaira14,fuad10}, while STM systems have +a lot of overhead~\cite{cascaval08,drago11}. In \cite{wayforward14}, +we argue that STM is still the best way forward, especially since it +supports large atomic blocks as a new way for synchronising multiple +threads. There have been several attempts at lowering the overhead +of STM~\cite{warmhoff13,spear09} - sometimes at the cost of scalability. In this paper, we describe how we manage to lower the overhead of our STM system so that it can be seen as a viable replacement for the GIL. @@ -174,9 +182,7 @@ \begin{itemize}[noitemsep] \item We introduce a new software transactional memory (STM) system that performs well even on low numbers of CPUs. It uses a novel - combination of hardware features\arigo{"OS-level feature" maybe. - "Hardware feature" implies it only works on custom chips} - and garbage collector (GC) + combination of long-existing CPU features and garbage collector (GC) integration in order to keep the overhead of STM very low. \item This new STM system is used to replace the GIL in Python and is then evaluated extensively. @@ -189,36 +195,46 @@ \section{Background} +\subsection{Global Interpreter Lock} + +The GIL is a very simple synchronisation mechanism for supporting +multithreading in an interpreter. The basic guarantee is that the GIL +may only be released in between bytecode instructions. Thus, these +instructions are always executed atomically and in complete isolation +from others running in other threads. \emph{Atomicity} means that each +instruction and its effects seem to happen at one, indivisible point +in time. Other instructions never see inconsistent state of a +partially executed instruction (\emph{isolation}). + +In addition to these guarantees, instructions are executed in a +sequential consistency model~\cite{lamport79}. This means that +the outcome of any execution of instructions in multiple threads is +equal to \emph{some} sequential execution of them. + \subsection{Transactional Memory} Transactional memory (TM) is a concurrency control mechanism that comes from database systems. Using transactions, we can group a series of instructions performing operations on memory and make them happen -atomically and in complete isolation from other -transactions. \emph{Atomicity} means that all these instructions in -the transaction and their effects seem to happen at one, indivisible -point in time. Other transactions never see inconsistent state of a -partially executed transaction which is called \emph{isolation}. +atomically and in complete isolation from other transactions. +Atomicity and isolation are basic properties of transactions. If we start multiple such transactions in multiple threads, the TM system guarantees that the outcome of running the transactions is \emph{serialisable}. Meaning, the outcome is equal to some sequential -execution of these transactions. This means that the approach provides the same -semantics as using the GIL -while still allowing the TM system to -run transactions in parallel as an optimisation. -\remi{maybe some more explanation of how exactly TM replaces the GIL} +execution of these transactions. By that, we can again provide a +sequentially consistent model for programming in multiple threads. We +can therefore use TM to directly replace the GIL. Instead of releasing +and acquiring the GIL between bytecode instructions, we commit and +start the transactions our instructions are running in. \subsection{Python} -\cfbolz{a pypy introduction needs to go somewhere, a paragraph or so. maybe in the evaluation section} - -We implement and evaluate our system for the Python language. For the -actual implementation, we chose the PyPy interpreter because replacing -the GIL there with a TM system is just a matter of adding a new -transformation to the translation process of the interpreter. - +We implement and evaluate our system for the Python language. Python +is a dynamic programming language that was designed with GIL semantics +in mind. Its reference implementation, CPython~\cite{cpython}, uses a +GIL to synchronise instructions in multiple threads. Over the years, Python added multiple ways to provide concurrency and parallelism to its applications. We want to highlight two of them, namely \emph{threading} and \emph{multiprocessing}. @@ -240,37 +256,36 @@ We focus on the \emph{threading} approach. This requires us to remove the GIL from our interpreter in order to run code in parallel on multiple threads. One approach to this is fine-grained locking instead -of a single global lock. Jython and IronPython are implementations of -this. It requires great care in order to avoid deadlocks, which is why -we follow the TM approach that provides a \emph{direct} replacement -for the GIL. It does not require careful placing of locks in the right -spots. We will compare our work with Jython for evaluation. +of a single global lock. Jython~\cite{webjython} and +IronPython~\cite{ironpython} are implementations of this. It requires +great care in order to avoid deadlocks, which is why we follow the TM +approach that provides a \emph{direct} replacement for the GIL. It +does not require careful placing of locks in the right spots. We will +compare our work with Jython for evaluation. \subsection{Synchronisation} -\cfbolz{citation again needed for the whole subsection} - -It is well known that using locks to synchronise multiple threads is -hard. They are non-composable, have overhead, may deadlock, limit -scalability, and overall add a lot of complexity. For a better -parallel programming model for dynamic languages, we want to implement -another, well-known synchronisation mechanism: \emph{atomic blocks}. +In Python, since the GIL is not directly exposed to the interpreter, +applications still need to synchronise memory accesses from multiple +threads using locks. Locks can be very hard to get +right~\cite{christopher10,victor11,shan08}. They are non-composable, +have overhead, may deadlock, limit scalability, and add to the overall +complexity of the program logic. We think that \emph{atomic +blocks}~\cite{tim03,tim05} provide a better way for synchronisation. Atomic blocks are composable, deadlock-free, higher-level and expose useful atomicity and isolation guarantees to the application for a -series of instructions. An implementation using a GIL would simply -guarantee that the GIL is not released during the execution of the -atomic block. Using TM, we have the same effect by guaranteeing that -all instructions in an atomic block are executed inside a single -transaction. - - -\remi{STM, how atomicity \& isolation; reasons for overhead} +series of instructions. This is why we think that the introduction +of atomic blocks to Python is a valuable contribution. Since atomicity +is a property of transactions, TM and atomic blocks are a natural fit. \section{Method} +We now take a closer look at how our TM system that we use to replace +the GIL works, what properties it has, and how it is implemented. + \subsection{Transactional Memory Model} In this section, we characterise the model of our TM system and its @@ -1132,7 +1147,7 @@ \paragraph{Non-JIT benchmarks:} First we run our benchmarks on four different interpreters: Jython (fine-grained locking), CPython (GIL), and PyPy with STM and with the GIL (both without the JIT). The results -are shown in \ref{fig:performance-nojit}. +are shown in figure \ref{fig:performance-nojit}. As expected, all interpreters with a GIL do not scale with the number of threads. They even become slower because of the overhead of @@ -1162,13 +1177,17 @@ in the plots. Also, in order to get more stable results, we increased the input size of all benchmarks to get reasonable execution times. -The results are shown in \ref{fig:performance-nojit}. We see that the -performance is much less stable. There is certainly more work required -in this area. In general, we see that the group of non-locked -benchmarks certainly scales best. The other three scale barely or not -at all with the number of threads. The slowdown factor from GIL to STM -ranges around \remi{$1-2.4\times$} and we beat GIL performance in half -of the benchmarks. +The results are presented in figure \ref{fig:performance-nojit}. We +see that the performance is much less stable. There is certainly more +work required in this area. In general, we see that the group of +non-locked benchmarks certainly scales best. The other three scale +barely or not at all with the number of threads. The slowdown factor +from GIL to STM ranges around \remi{$1-2.4\times$} and we beat GIL +performance in half of the benchmarks. + +\remi{Reason for bad scaling: acceleration of code that produces +conflicts $-->$ more iterations $-->$ more conflicts. The overhead +doesn't get accelerated by the JIT.} \begin{figure}[h] @@ -1208,14 +1227,34 @@ \begin{thebibliography}{} \softraggedright +\bibitem{cfbolz09} Carl Friedrich Bolz, Antonio Cuni, Maciej + Fijalkowski, and Armin Rigo. 2009. Tracing the meta-level: PyPy's + tracing JIT compiler. \emph{In Proceedings of the 4th workshop on the + Implementation, Compilation, Optimization of Object-Oriented Languages + and Programming Systems} (ICOOOLPS '09). + +\bibitem{kevin10} Kevin Millikin, Florian Schneider. 2010. A New + Crankshaft for V8. + \url{http://blog.chromium.org/2010/12/new-crankshaft-for-v8.html} + +\bibitem{ionmonkey} IonMonkey from Mozilla. 2014. + \url{https://wiki.mozilla.org/IonMonkey/Overview} + +\bibitem{wayforward14} Remigius Meier, Armin Rigo. 2014. A Way Forward + in Parallelising Dynamic Languages. Under review in ICOOOLPS'14. + +\bibitem{cpython} CPython. \url{www.python.org} +\bibitem{webjython} The Jython Project, \url{www.jython.org} +\bibitem{ironpython} IronPython. \url{www.ironpython.net} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + \bibitem{dan07} Dan Grossman. 2007. The transactional memory / garbage collection analogy. \emph{In Proceedings of the 22nd annual ACM SIGPLAN conference on Object-oriented programming systems and applications} (OOPSLA '07). -\bibitem{webjython} - The Jython Project, \url{www.jython.org} \bibitem{odaira14} Odaira, Rei, Jose G. Castanos, and Hisanobu Tomari. "Eliminating From noreply at buildbot.pypy.org Mon Jun 2 12:57:32 2014 From: noreply at buildbot.pypy.org (fijal) Date: Mon, 2 Jun 2014 12:57:32 +0200 (CEST) Subject: [pypy-commit] pypy default: help the annotator a little Message-ID: <20140602105732.E039B1C02AF@cobra.cs.uni-duesseldorf.de> Author: Maciej Fijalkowski Branch: Changeset: r71805:de7fbdacc07c Date: 2014-06-02 12:56 +0200 http://bitbucket.org/pypy/pypy/changeset/de7fbdacc07c/ Log: help the annotator a little diff --git a/rpython/rlib/rStringIO.py b/rpython/rlib/rStringIO.py --- a/rpython/rlib/rStringIO.py +++ b/rpython/rlib/rStringIO.py @@ -73,6 +73,7 @@ self.__strings.append(buffer) def __slow_write(self, buffer): + assert buffer is not None # help annotator p = self.__pos assert p >= 0 endp = p + len(buffer) From noreply at buildbot.pypy.org Mon Jun 2 15:59:10 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Mon, 2 Jun 2014 15:59:10 +0200 (CEST) Subject: [pypy-commit] extradoc extradoc: add some refs Message-ID: <20140602135910.7128A1C0109@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: extradoc Changeset: r5284:4347238f4a63 Date: 2014-06-02 15:59 +0200 http://bitbucket.org/pypy/extradoc/changeset/4347238f4a63/ Log: add some refs diff --git a/talk/dls2014/paper/paper.tex b/talk/dls2014/paper/paper.tex --- a/talk/dls2014/paper/paper.tex +++ b/talk/dls2014/paper/paper.tex @@ -291,7 +291,7 @@ In this section, we characterise the model of our TM system and its guarantees as well as some of the design choices we made. This should clarify the general semantics in commonly used terms from the -literature.\remi{cite Transactional Memory 2nd edition} +literature~\cite{harris10}. Our TM system is fully implemented in software. However, we do exploit some more advanced features of current CPUs, particularly \emph{memory @@ -328,14 +328,14 @@ \emph{atomicity} for transactions at all times. Our method of choice is \emph{lazy version management}. Modifications by a transaction are not visible to another transaction before the former commits. -Furthermore, the isolation provides full \emph{opacity} to always -guarantee a consistent read set even for non-committed transactions. -\remi{cite On the Correctness of Transactional Memory} +Furthermore, the isolation provides full +\emph{opacity}~\cite{guerraoui08} to always guarantee a consistent +read set even for non-committed transactions. To also support these properties for irreversible operations that cannot be undone when we abort a transaction (e.g. I/O, syscalls, and non-transactional code in general), we use \emph{irrevocable} or -\emph{inevitable transactions}. These transactions are always +\emph{inevitable transactions}~\cite{blundell06,spear08}. These transactions are always guaranteed to commit, which is why they always have to win in case there is a conflict with another, normal transaction. There is always at most one such transaction running in the system, thus their @@ -373,7 +373,7 @@ translate this $SO$ to a real virtual memory address when used inside a thread, we need to add the thread's segment start address to the $SO$. The result of this operation is called a \emph{Linear Address - (LA)}. This is illustrated in Figure \ref{fig:Segment-Addressing}. + (LA)}. This is illustrated in figure \ref{fig:Segment-Addressing}. x86-CPUs provide a feature called \emph{memory segmentation}. It performs this translation from a $SO$ to a LA directly in hardware. We @@ -404,7 +404,7 @@ In order to eliminate the prohibitive memory requirements of keeping around $N$ segment copies, we share memory between them. The segments are initially allocated in a single range of virtual memory by a call -to \lstinline!mmap()!. As illustrated in Figure +to \lstinline!mmap()!. As illustrated in figure \ref{fig:mmap()-Page-Mapping}, \lstinline!mmap()! creates a mapping between a range of virtual memory pages and virtual file pages. The virtual file pages are then mapped lazily by the kernel to real @@ -423,7 +423,7 @@ \end{figure} -As illustrated in Figure \ref{fig:Page-Remapping}, in our initial +As illustrated in figure \ref{fig:Page-Remapping}, in our initial configuration (I) all segments are backed by their own range of virtual file pages. This is the share-nothing configuration where all threads have private versions of all objects. @@ -470,7 +470,7 @@ We now use these mechanisms to provide isolation for transactions. Using write barriers, we implement a \emph{Copy-On-Write (COW)} on the level of pages. Starting from the initial fully-shared configuration -(Figure \ref{fig:Page-Remapping}, (II)), when we need to modify an +(figure \ref{fig:Page-Remapping}, (II)), when we need to modify an object without other threads seeing the changes immediately, we ensure that all pages belonging to the object are private to our segment. @@ -538,7 +538,6 @@ resetting should be faster than re-sharing. \end{description} -\cfbolz{random question: did we investigate the extra memory requirements? we should characterise memory overhead somewhere, eg at least one byte per object for the read markers} \subsubsection{Summary} @@ -600,7 +599,7 @@ and pop objects on the shadow stack~\footnote{A stack for pointers to GC objects that allows for precise garbage collection. All objects on that stack are never seen as garbage and are thus always kept - alive.}. Objects have to be saved using this stack around calls + alive.~\cite{fergus02}}. Objects have to be saved using this stack around calls that may cause a GC cycle to happen, and also while there is no transaction running. In this simplified API, only \lstinline!stm_allocate()! and \lstinline!stm_commit_transaction()! @@ -627,7 +626,7 @@ However, the layout of a segment is not uniform and we actually privatise a few areas again right away. These areas are illustrated in -Figure \ref{fig:Segment-Layout} and explained here: +figure \ref{fig:Segment-Layout} and explained here: \begin{description}[noitemsep] \item [{NULL~page:}] This page is unmapped and will produce a segmentation violation when accessed. We use this to detect @@ -714,14 +713,13 @@ anymore. As seen in the API (section~\ref{sub:Application-Programming-Interfac}), -we use a \emph{shadow stack} in order to provide precise garbage +we use a \emph{shadow stack}~\cite{fergus02} in order to provide precise garbage collection. Any time we call a function that possibly triggers a collection, we need to save the objects that we need afterwards on the shadow stack using \lstinline!STM_PUSH_ROOT()!. That way, they will not be freed. And in case they were young, we get their new location in the old object space when getting them back from the stack using -\lstinline!STM_POP_ROOT()!. \remi{cite something which explains -shadowstacks in more detail} +\lstinline!STM_POP_ROOT()!. @@ -930,7 +928,7 @@ \begin{itemize}[noitemsep] \item prefer transactions that started earlier to younger transactions \item to support \emph{inevitable} transactions, we always prefer them - to others since they cannot abort + to others since they cannot abort (similar to \cite{blundell06}) \end{itemize} We can either simply abort a transaction to let the other one succeed, or we can also wait until the other transaction committed. The latter @@ -1151,9 +1149,10 @@ As expected, all interpreters with a GIL do not scale with the number of threads. They even become slower because of the overhead of -thread-switching and GIL handling. We also see Jython scale when we -expect it to (mandelbrot, raytrace, richards), and behave similar to -the GIL interpreters in the other cases. +thread-switching and GIL handling (see \cite{beazley10} for a detailed +analysis). We also see Jython scale when we expect it to (mandelbrot, +raytrace, richards), and behave similar to the GIL interpreters in the +other cases. PyPy using our STM system (pypy-stm-nojit) scales in all benchmarks to a certain degree. We see that the average overhead from switching from @@ -1206,6 +1205,27 @@ \section{Related Work} +Eliminate GIL: +\begin{itemize} +\item Previous attempts with HTM: \cite{nicholas06,odaira14,fuad10} +\item Previous attempts with STM: \cite{stmupdate13} +\end{itemize} + +Similar STMs: +\begin{itemize} +\item FastLane: \cite{warmhoff13} +\item TML: \cite{spear09} +\item Virtualizing HTM: \cite{rajwar05} +\item Page-based virtualizing HyTM: \cite{chung06} (XTM can be + implemented either in the OS as part of the virtual memory manager or + between underlying TM systems and the OS, like virtual machines; + Conflicts for overflowed transactions are tracked at page granularity; + XTM-e allows conflict detection at cache line granu- + larity, even for overflowed data in virtual memory) +\item using mmap(): Memory-Mapped Transactions +\item mem-protected conflict detection: \cite{martin09} +\end{itemize} + \section{Conclusions} @@ -1247,6 +1267,47 @@ \bibitem{webjython} The Jython Project, \url{www.jython.org} \bibitem{ironpython} IronPython. \url{www.ironpython.net} +\bibitem{beazley10} Beazley, David. "Understanding the python gil." + \emph{PyCON Python Conference}. Atlanta, Georgia. 2010. + +\bibitem{harris10} Harris, Tim, James Larus, and Ravi + Rajwar. "Transactional memory." \emph{Synthesis Lectures on Computer + Architecture 5.1} (2010): 1-263. + +\bibitem{guerraoui08} Guerraoui, Rachid, and Michal Kapalka. "On the + correctness of transactional memory." \emph{Proceedings of the 13th + ACM SIGPLAN Symposium on Principles and practice of parallel + programming.} ACM, 2008. + +\bibitem{blundell06} Blundell, Colin, E. Christopher Lewis, and Milo + Martin. "Unrestricted transactional memory: Supporting I/O and system + calls within transactions." (2006). +\bibitem{spear08} Spear, Michael F., et al. "Implementing and + exploiting inevitability in software transactional memory." + \emph{Parallel Processing, 2008}. ICPP'08. 37th International + Conference on. IEEE, 2008. + +\bibitem{fergus02} Fergus Henderson. 2002. Accurate garbage collection + in an uncooperative environment. \emph{In Proceedings of the 3rd + international symposium on Memory management} (ISMM '02). + +\bibitem{stmupdate13} Armin Rigo, Remigius Meier. Update on + STM. \url{morepypy.blogspot.ch/2013/10/update-on-stm.html} + +\bibitem{rajwar05} Rajwar, Ravi, Maurice Herlihy, and Konrad + Lai. "Virtualizing transactional memory." \emph{Computer + Architecture}, 2005. ISCA'05. Proceedings. 32nd International + Symposium on. IEEE, 2005. + +\bibitem{chung06} Chung, JaeWoong, et al. "Tradeoffs in transactional + memory virtualization." \emph{ACM SIGARCH Computer Architecture + News}. Vol. 34. No. 5. ACM, 2006. + +\bibitem{martin09} Martín Abadi, Tim Harris, and Mojtaba + Mehrara. 2009. Transactional memory with strong atomicity using + off-the-shelf memory protection hardware. SIGPLAN Not. 44, 4 (February + 2009), 185-196. + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \bibitem{dan07} From noreply at buildbot.pypy.org Mon Jun 2 16:45:56 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 2 Jun 2014 16:45:56 +0200 (CEST) Subject: [pypy-commit] extradoc extradoc: Tweaks Message-ID: <20140602144556.512041C0026@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: extradoc Changeset: r5285:67e9c71705a4 Date: 2014-06-02 16:45 +0200 http://bitbucket.org/pypy/extradoc/changeset/67e9c71705a4/ Log: Tweaks diff --git a/talk/dls2014/paper/paper.tex b/talk/dls2014/paper/paper.tex --- a/talk/dls2014/paper/paper.tex +++ b/talk/dls2014/paper/paper.tex @@ -167,7 +167,7 @@ synchronisation mechanism that avoids several of the problems of locks as they are used now. -TM systems come in can be broadly categorised as hardware based (HTM), +TM systems come in\arigo{typo?} can be broadly categorised as hardware based (HTM), software based (STM), or hybrid systems (HyTM). HTM systems are limited by hardware constraints~\cite{odaira14,fuad10}, while STM systems have a lot of overhead~\cite{cascaval08,drago11}. In \cite{wayforward14}, @@ -184,8 +184,8 @@ that performs well even on low numbers of CPUs. It uses a novel combination of long-existing CPU features and garbage collector (GC) integration in order to keep the overhead of STM very low. -\item This new STM system is used to replace the GIL in Python and is - then evaluated extensively. +\item This new STM system is used to replace the GIL in one implementation + of Python and is then evaluated extensively. \item We introduce atomic blocks to the Python language to provide a backwards compatible, composable synchronisation mechanism for threads. @@ -445,9 +445,8 @@ CPU. Then, depending on the current mapping of virtual pages to file pages, these LAs can map to a single file page in the sharing-segment, or to privatised file pages in the corresponding segments. This -mapping is also performed efficiently by CPUs that have a Memory -Management Unit (MMU) and can easily be done on every access to an -object. +mapping is cost-free: it is part of the normal operations done by the +Memory Management Unit (MMU) of the CPU on every memory access. In summary, $\%gs{::}SO$ is translated efficiently by the CPU to either a physical memory location which is shared between several @@ -469,15 +468,18 @@ We now use these mechanisms to provide isolation for transactions. Using write barriers, we implement a \emph{Copy-On-Write (COW)} on the -level of pages. Starting from the initial fully-shared configuration +level of pages~\footnote{Conflict detection still occurs on the level +of objects.}. Starting from the initial fully-shared configuration (figure \ref{fig:Page-Remapping}, (II)), when we need to modify an object without other threads seeing the changes immediately, we ensure that all pages belonging to the object are private to our segment. -To detect when to privatise pages, we use write barriers before every -write to an object. When the barrier detects that the object is not in -a private page (or any pages that belong to the object), we remap and -copy the pages to the thread's segment. From now on, the translation +More precisely, this is done by a write barrier that detects that we are +about to write to an old (pre-transaction) object that we did not record +in the write-set yet. When this occurs, the slow-path of the write barrier +will also check if the page (or pages) containing the object is still +shared, and if so, privatise it. This is done by remapping and copying +the page to the thread's own segment. From now on, the translation of $\%gs{::}SO$ in this particular thread will resolve to a private version of the object automatically. Note that the $SO$ used to reference the object does not change during that process. From noreply at buildbot.pypy.org Mon Jun 2 17:23:41 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:23:41 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: new branch for incminimark object pinning Message-ID: <20140602152341.2B2421C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71806:201bcd3becf9 Date: 2014-05-08 12:53 +0200 http://bitbucket.org/pypy/pypy/changeset/201bcd3becf9/ Log: new branch for incminimark object pinning From noreply at buildbot.pypy.org Mon Jun 2 17:23:42 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:23:42 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: added pinning functions to rgc, rfile, rffi and ll_os Message-ID: <20140602152342.C638C1C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71807:d6c6003d1ae9 Date: 2014-05-08 13:34 +0200 http://bitbucket.org/pypy/pypy/changeset/d6c6003d1ae9/ Log: added pinning functions to rgc, rfile, rffi and ll_os work in progress (for a long time). RFile example compiles and runs. diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -476,6 +476,11 @@ self.set_major_threshold_from(0.0) ll_assert(self.extra_threshold == 0, "extra_threshold set too early") self.initial_cleanup = self.nursery_size + + # XXX remove (groggi) + debug_print("nursery start ", self.nursery) + debug_print("nursery top ", self.nursery_top) + debug_stop("gc-set-nursery-size") diff --git a/rpython/rlib/rfile.py b/rpython/rlib/rfile.py --- a/rpython/rlib/rfile.py +++ b/rpython/rlib/rfile.py @@ -133,7 +133,7 @@ if not ll_file: raise ValueError("I/O operation on closed file") assert value is not None - ll_value = rffi.get_nonmovingbuffer(value) + ll_value, is_pinned, is_raw = rffi.get_nonmovingbuffer(value) try: # note that since we got a nonmoving buffer, it is either raw # or already cannot move, so the arithmetics below are fine @@ -143,7 +143,7 @@ errno = rposix.get_errno() raise OSError(errno, os.strerror(errno)) finally: - rffi.free_nonmovingbuffer(value, ll_value) + rffi.free_nonmovingbuffer(value, ll_value, is_pinned, is_raw) def close(self): """Closes the described file. diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py --- a/rpython/rlib/rgc.py +++ b/rpython/rlib/rgc.py @@ -18,6 +18,34 @@ """ pass +def pin(obj): + """If 'obj' can move, then attempt to temporarily fix it. This + function returns True if and only if 'obj' could be pinned; this is + a special state in the GC. Note that can_move(obj) still returns + True even on pinned objects, because once unpinned it will indeed be + able to move again. In other words, the code that succeeded in + pinning 'obj' can assume that it won't move until the corresponding + call to unpin(obj), despite can_move(obj) still being True. (This + is important if multiple threads try to os.write() the same string: + only one of them will succeed in pinning the string.) + + Note that this can return False for any reason, e.g. if the 'obj' is + already non-movable or already pinned, if the GC doesn't support + pinning, or if there are too many pinned objects. + """ + # XXX doc string based on gc-minimark-pinning branch + # XXX use doc string a basis for implementation behavior + # XXX update doc string to match actual behavior + return False + +def unpin(obj): + """Unpin 'obj', allowing it to move again. + Must only be called after a call to pin(obj) returned True. + """ + # XXX update doc string to match actual behavior + raise AssertionError("pin() always returns False, " + "so unpin() should not be called") + # ____________________________________________________________ # Annotation and specialization diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -23,6 +23,9 @@ from rpython.translator.platform import CompilationError import os, sys +# XXX remove (groggi) +from rpython.rlib.debug import debug_print, debug_start, debug_stop + class CConstant(Symbolic): """ A C-level constant, maybe #define, rendered directly. """ @@ -735,7 +738,7 @@ i += 1 return assert_str0(b.build()) - # str -> char* + # str -> char*, bool, bool # Can't inline this because of the raw address manipulation. @jit.dont_look_inside def get_nonmovingbuffer(data): @@ -744,23 +747,52 @@ arithmetic to return a pointer to the characters of a string if the string is already nonmovable. Must be followed by a free_nonmovingbuffer call. + + First bool returned indicates if 'data' was pinned. Second bool returned + indicates if we did a raw alloc because pinning didn't work. Bot bools + should never be true at the same time. """ + # XXX update doc string + + debug_start("groggi-get_nonmovingbuffer") + debug_print("data address ", cast_ptr_to_adr(data)) + lldata = llstrtype(data) + count = len(data) + + pinned = False if rgc.can_move(data): - count = len(data) - buf = lltype.malloc(TYPEP.TO, count, flavor='raw') - copy_string_to_raw(lldata, buf, 0, count) - return buf + if rgc.pin(data): + debug_print("raw_and_pinned: len = %s" % count) + pinned = True + else: + debug_print("allocating_raw_and_copying: len = %s" % count) + + buf = lltype.malloc(TYPEP.TO, count, flavor='raw') + copy_string_to_raw(lldata, buf, 0, count) + + debug_stop("groggi-get_nonmovingbuffer") + return buf, pinned, True + # ^^^ raw malloc used to get a nonmovable copy else: - data_start = cast_ptr_to_adr(lldata) + \ - offsetof(STRTYPE, 'chars') + itemoffsetof(STRTYPE.chars, 0) - return cast(TYPEP, data_start) + debug_print("raw_and_nonmovable: len = %s" % count) + + # following code is executed if: + # - rgc.can_move(data) and rgc.pin(data) both returned true + # - rgc.can_move(data) returned false + data_start = cast_ptr_to_adr(lldata) + \ + offsetof(STRTYPE, 'chars') + itemoffsetof(STRTYPE.chars, 0) + + debug_stop("groggi-get_nonmovingbuffer") + return cast(TYPEP, data_start), pinned, False + # ^^^ already nonmovable. Therefore it's not raw allocated nor + # pinned. get_nonmovingbuffer._annenforceargs_ = [strtype] - # (str, char*) -> None + # (str, char*, bool, bool) -> None # Can't inline this because of the raw address manipulation. @jit.dont_look_inside - def free_nonmovingbuffer(data, buf): + def free_nonmovingbuffer(data, buf, is_pinned, is_raw): """ Either free a non-moving buffer or keep the original storage alive. """ @@ -769,14 +801,23 @@ # if 'buf' points inside 'data'. This is only possible if we # followed the 2nd case in get_nonmovingbuffer(); in the first case, # 'buf' points to its own raw-malloced memory. - data = llstrtype(data) - data_start = cast_ptr_to_adr(data) + \ - offsetof(STRTYPE, 'chars') + itemoffsetof(STRTYPE.chars, 0) - followed_2nd_path = (buf == cast(TYPEP, data_start)) + + debug_start("groggi-free_nonmovingbuffer") + debug_print("data address ", cast_ptr_to_adr(data)) + + assert not (is_pinned and is_raw) + + if is_pinned: + rgc.unpin(data) + elif is_raw: + lltype.free(buf, flavor='raw') + # if is_pinned and is_raw are false: data was already nonmovable, + # we have nothing to clean up + keepalive_until_here(data) - if not followed_2nd_path: - lltype.free(buf, flavor='raw') - free_nonmovingbuffer._annenforceargs_ = [strtype, None] + + debug_stop("groggi-free_nonmovingbuffer") + free_nonmovingbuffer._annenforceargs_ = [strtype, None, bool, bool] # int -> (char*, str) def alloc_buffer(count): diff --git a/rpython/rtyper/module/ll_os.py b/rpython/rtyper/module/ll_os.py --- a/rpython/rtyper/module/ll_os.py +++ b/rpython/rtyper/module/ll_os.py @@ -1028,7 +1028,7 @@ def os_write_llimpl(fd, data): count = len(data) rposix.validate_fd(fd) - buf = rffi.get_nonmovingbuffer(data) + buf, is_pinned, is_raw = rffi.get_nonmovingbuffer(data) try: written = rffi.cast(lltype.Signed, os_write( rffi.cast(rffi.INT, fd), @@ -1036,7 +1036,7 @@ if written < 0: raise OSError(rposix.get_errno(), "os_write failed") finally: - rffi.free_nonmovingbuffer(data, buf) + rffi.free_nonmovingbuffer(data, buf, is_pinned, is_raw) return written return extdef([int, str], SomeInteger(nonneg=True), From noreply at buildbot.pypy.org Mon Jun 2 17:23:44 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:23:44 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: GC transformation now replaces rgc.pin/rgc.unpin Message-ID: <20140602152344.0775E1C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71808:267d6a45eb2a Date: 2014-05-08 14:35 +0200 http://bitbucket.org/pypy/pypy/changeset/267d6a45eb2a/ Log: GC transformation now replaces rgc.pin/rgc.unpin with actual GC pin function. RFile example compiles and runs. diff --git a/rpython/memory/gc/base.py b/rpython/memory/gc/base.py --- a/rpython/memory/gc/base.py +++ b/rpython/memory/gc/base.py @@ -175,6 +175,12 @@ def can_move(self, addr): return False + def pin(self, addr): + return False + + def unpin(self, addr): + pass + def set_max_heap_size(self, size): raise NotImplementedError diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -65,6 +65,9 @@ from rpython.rlib.debug import ll_assert, debug_print, debug_start, debug_stop from rpython.rlib.objectmodel import specialize +# XXX remove (groggi) +from rpython.rlib.debug import debug_print, debug_start, debug_stop + # # Handles the objects in 2 generations: @@ -891,6 +894,16 @@ """Overrides the parent can_move().""" return self.is_in_nursery(obj) + def pin(self, obj): + debug_start("groggi-incminimark-pin") + debug_stop("groggi-incminimark-pin") + return False + + + def unpin(self, obj): + debug_start("groggi-incminimark-unpin") + debug_stop("groggi-incminimark-unpin") + def shrink_array(self, obj, smallerlength): # diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -456,6 +456,16 @@ annmodel.SomeInteger(nonneg=True)], annmodel.s_None) + # XXX understand this, correct? (groggi) + self.pin_ptr = getfn(GCClass.pin, + [s_gc, s_gcref], + annmodel.SomeBool()) + + # XXX understand this, correct? (groggi) + self.unpin_ptr = getfn(GCClass.unpin, + [s_gc, s_gcref], + annmodel.s_None) + self.write_barrier_ptr = None self.write_barrier_from_array_ptr = None if GCClass.needs_write_barrier: @@ -967,6 +977,15 @@ self.c_const_gc, v_size]) + def gct_gc_pin(self, hop): + op = hop.spaceop + hop.genop("direct_call", [self.pin_ptr, self.c_const_gc, op.args[0]], + resultvar=op.result) + + def gct_gc_unpin(self, hop): + op = hop.spaceop + hop.genop("direct_call", [self.unpin_ptr, self.c_const_gc, op.args[0]]) + def gct_gc_thread_run(self, hop): assert self.translator.config.translation.thread if hasattr(self.root_walker, 'thread_run_ptr'): diff --git a/rpython/memory/gcwrapper.py b/rpython/memory/gcwrapper.py --- a/rpython/memory/gcwrapper.py +++ b/rpython/memory/gcwrapper.py @@ -5,6 +5,9 @@ from rpython.memory import gctypelayout from rpython.flowspace.model import Constant +# XXX remove (groggi) +from rpython.rlib.debug import debug_print, debug_start, debug_stop + class GCManagedHeap(object): @@ -122,6 +125,16 @@ def can_move(self, addr): return self.gc.can_move(addr) + def pin(self, addr): + debug_start("groggi-gcwrapper-pin") + debug_stop("groggi-gcwrapper-pin") + return self.gc.pin(addr) + + def unpin(self, addr): + debug_start("groggi-gcwrapper-unpin") + self.gc.unpin(addr) + debug_stop("groggi-gcwrapper-unpin") + def weakref_create_getlazy(self, objgetter): # we have to be lazy in reading the llinterp variable containing # the 'obj' pointer, because the gc.malloc() call below could diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py --- a/rpython/rlib/rgc.py +++ b/rpython/rlib/rgc.py @@ -8,6 +8,9 @@ from rpython.rtyper.extregistry import ExtRegistryEntry from rpython.rtyper.lltypesystem import lltype, llmemory +# XXX remove (groggi) +from rpython.rlib.debug import debug_print, debug_start, debug_stop + # ____________________________________________________________ # General GC features @@ -36,6 +39,8 @@ # XXX doc string based on gc-minimark-pinning branch # XXX use doc string a basis for implementation behavior # XXX update doc string to match actual behavior + debug_start("groggi-rgc-pin") + debug_stop("groggi-rgc-pin") return False def unpin(obj): @@ -602,5 +607,32 @@ hop.exception_cannot_occur() return hop.genop('gc_gcflag_extra', vlist, resulttype = hop.r_result) +class Entry(ExtRegistryEntry): # XXX understand this, is it correct? (groggi) + _about_ = pin + + def compute_result_annotation(self, s_arg): + from rpython.annotator.model import SomeBool + return SomeBool() + + def specialize_call(self, hop): + hop.exception_cannot_occur() + v_obj, = hop.inputargs(hop.args_r[0]) + v_addr = hop.genop('cast_ptr_to_adr', [v_obj], + resulttype=llmemory.Address) + return hop.genop('gc_pin', [v_addr], resulttype=lltype.Bool) + +class Entry(ExtRegistryEntry): # XXX understand this, is it correct? (groggi) + _about_ = unpin + + def compute_result_annotation(self, s_arg): + pass + + def specialize_call(self, hop): + hop.exception_cannot_occur() + v_obj, = hop.inputargs(hop.args_r[0]) + v_addr = hop.genop('cast_ptr_to_adr', [v_obj], + resulttype=llmemory.Address) + hop.genop('gc_unpin', [v_addr]) + def lltype_is_gc(TP): return getattr(getattr(TP, "TO", None), "_gckind", "?") == 'gc' diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py --- a/rpython/rtyper/llinterp.py +++ b/rpython/rtyper/llinterp.py @@ -859,6 +859,12 @@ def op_gc_stack_bottom(self): pass # marker for trackgcroot.py + def op_gc_pin(self, obj): + return self.heap.pin(obj) + + def op_gc_unpin(self, obj): + self.heap.unpin(obj) + def op_gc_detach_callback_pieces(self): raise NotImplementedError("gc_detach_callback_pieces") def op_gc_reattach_callback_pieces(self): diff --git a/rpython/rtyper/lltypesystem/llheap.py b/rpython/rtyper/lltypesystem/llheap.py --- a/rpython/rtyper/lltypesystem/llheap.py +++ b/rpython/rtyper/lltypesystem/llheap.py @@ -32,3 +32,10 @@ def thread_die(): pass + +def pin(obj): + return False + +def unpin(obj): + raise AssertionError("pin() always returns False, " + "so unpin() should not be called") diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -480,6 +480,8 @@ 'gc_writebarrier': LLOp(canrun=True), 'gc_writebarrier_before_copy': LLOp(canrun=True), 'gc_heap_stats' : LLOp(canmallocgc=True), + 'gc_pin' : LLOp(canrun=True), # XXX understand this, correct? (groggi) + 'gc_unpin' : LLOp(canrun=True), # XXX understand this, correct? (groggi) 'gc_get_rpy_roots' : LLOp(), 'gc_get_rpy_referents': LLOp(), From noreply at buildbot.pypy.org Mon Jun 2 17:23:45 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:23:45 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: started writing tests for object pinning GCs Message-ID: <20140602152345.4E2951C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71809:3fc50a1fcf86 Date: 2014-05-08 21:13 +0200 http://bitbucket.org/pypy/pypy/changeset/3fc50a1fcf86/ Log: started writing tests for object pinning GCs diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -0,0 +1,19 @@ +import py +from rpython.rtyper.lltypesystem import lltype +from rpython.memory.gc.incminimark import IncrementalMiniMarkGC +from test_direct import BaseDirectGCTest + +S = lltype.GcForwardReference() +S.become(lltype.GcStruct('S', ('someInt', lltype.Signed))) + +class PinningGCTest(BaseDirectGCTest): + def test_simple(self): + someIntValue = 100 + obj = self.malloc(S) + obj.someInt = someIntValue + self.gc.pin(obj) + self.gc.collect() # obj should still live + assert obj.someInt == someIntValue + +class TestIncminimark(PinningGCTest): + from rpython.memory.gc.incminimark import IncrementalMiniMarkGC as GCClass \ No newline at end of file From noreply at buildbot.pypy.org Mon Jun 2 17:23:46 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:23:46 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: Merge release-2.3.x into gc-incminimark-pinning Message-ID: <20140602152346.B5A631C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71810:e45d765f3c2d Date: 2014-05-09 10:51 +0200 http://bitbucket.org/pypy/pypy/changeset/e45d765f3c2d/ Log: Merge release-2.3.x into gc-incminimark-pinning diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -9,3 +9,4 @@ 20e51c4389ed4469b66bb9d6289ce0ecfc82c4b9 release-2.3.0 20e51c4389ed4469b66bb9d6289ce0ecfc82c4b9 release-2.3.0 0000000000000000000000000000000000000000 release-2.3.0 +394146e9bb673514c61f0150ab2013ccf78e8de7 release-2.3 diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -44,31 +44,33 @@ Alex Gaynor Michael Hudson David Schneider + Matti Picus + Brian Kearns + Philip Jenvey Holger Krekel Christian Tismer Hakan Ardo Benjamin Peterson - Matti Picus - Philip Jenvey + Manuel Jacob Anders Chrigstrom - Brian Kearns Eric van Riet Paap + Wim Lavrijsen + Ronan Lamy Richard Emslie Alexander Schremmer - Wim Lavrijsen Dan Villiom Podlaski Christiansen - Manuel Jacob Lukas Diekmann Sven Hager Anders Lehmann Aurelien Campeas Niklaus Haldimann - Ronan Lamy Camillo Bruni Laura Creighton Toon Verwaest + Remi Meier Leonardo Santagada Seo Sanghyeon + Romain Guillebert Justin Peel Ronny Pfannschmidt David Edelsohn @@ -80,52 +82,61 @@ Daniel Roberts Niko Matsakis Adrien Di Mascio + Alexander Hesse Ludovic Aubry - Alexander Hesse Jacob Hallen - Romain Guillebert Jason Creighton Alex Martelli Michal Bendowski Jan de Mooij + stian Michael Foord Stephan Diehl Stefan Schwarzer Valentino Volonghi Tomek Meka Patrick Maupin - stian Bob Ippolito Bruno Gola Jean-Paul Calderone Timo Paulssen + Squeaky Alexandre Fayolle Simon Burton Marius Gedminas John Witulski + Konstantin Lopuhin Greg Price Dario Bertini Mark Pearse Simon Cross - Konstantin Lopuhin Andreas Stührk Jean-Philippe St. Pierre Guido van Rossum Pavel Vinogradov + Paweł Piotr Przeradowski Paul deGrandis Ilya Osadchiy + Tobias Oberstein Adrian Kuhn Boris Feigin + Stefano Rivera tav + Taavi Burns Georg Brandl Bert Freudenberg Stian Andreassen - Stefano Rivera + Laurence Tratt Wanja Saatkamp Gerald Klix Mike Blume - Taavi Burns Oscar Nierstrasz + Stefan H. Muller + Jeremy Thurgood + Gregor Wegberg + Rami Chowdhury + Tobias Pape + Edd Barrett David Malcolm Eugene Oden Henry Mason @@ -135,18 +146,16 @@ Dusty Phillips Lukas Renggli Guenter Jantzen - Tobias Oberstein - Remi Meier Ned Batchelder Amit Regmi Ben Young Nicolas Chauvat Andrew Durdin + Andrew Chambers Michael Schneider Nicholas Riley Jason Chu Igor Trindade Oliveira - Jeremy Thurgood Rocco Moretti Gintautas Miliauskas Michael Twomey @@ -159,18 +168,19 @@ Karl Bartel Brian Dorsey Victor Stinner + Andrews Medina Stuart Williams Jasper Schulz + Christian Hudon Toby Watson Antoine Pitrou Aaron Iles Michael Cheng Justas Sadzevicius + Mikael Schönenberg Gasper Zejn Neil Shepperd - Mikael Schönenberg Elmo Mäntynen - Tobias Pape Jonathan David Riehl Stanislaw Halik Anders Qvist @@ -182,19 +192,18 @@ Alexander Sedov Corbin Simpson Christopher Pope - Laurence Tratt - Guillebert Romain + wenzhuman Christian Tismer + Marc Abramowitz Dan Stromberg Stefano Parmesan - Christian Hudon Alexis Daboville Jens-Uwe Mager Carl Meyer Karl Ramm Pieter Zieschang Gabriel - Paweł Piotr Przeradowski + Lukas Vacek Andrew Dalke Sylvain Thenault Nathan Taylor @@ -205,6 +214,7 @@ Travis Francis Athougies Kristjan Valur Jonsson Neil Blakey-Milner + anatoly techtonik Lutz Paelike Lucio Torre Lars Wassermann @@ -218,13 +228,14 @@ Martin Blais Lene Wagner Tomo Cocoa - Andrews Medina roberto at goyle + Yury V. Zaytsev + Anna Katrina Dominguez William Leslie Bobby Impollonia timo at eistee.fritz.box Andrew Thompson - Yusei Tahara + Ben Darnell Roberto De Ioris Juan Francisco Cantero Hurtado Godefroid Chappelle @@ -235,27 +246,35 @@ Anders Sigfridsson Yasir Suhail Floris Bruynooghe + Laurens Van Houtven Akira Li Gustavo Niemeyer Stephan Busemann - Anna Katrina Dominguez + Rafał Gałczyński + Yusei Tahara Christian Muirhead James Lan shoma hosaka - Daniel Neuhäuser + Daniel Neuh?user + Matthew Miller Buck Golemon Konrad Delong Dinu Gherman Chris Lambacher coolbutuseless at gmail.com + Rodrigo Araújo + w31rd0 Jim Baker - Rodrigo Araújo + James Robert Armin Ronacher Brett Cannon yrttyr + aliceinwire + OlivierBlanvillain Zooko Wilcox-O Hearn Tomer Chachamu Christopher Groskopf + jiaaro opassembler.py Antony Lee Jim Hunziker @@ -263,12 +282,13 @@ Even Wiik Thomassen jbs soareschen + Kurt Griffiths + Mike Bayer Flavio Percoco Kristoffer Kleine yasirs Michael Chermside Anna Ravencroft - Andrew Chambers Julien Phalip Dan Loewenherz diff --git a/lib_pypy/_tkinter/tklib.py b/lib_pypy/_tkinter/tklib.py --- a/lib_pypy/_tkinter/tklib.py +++ b/lib_pypy/_tkinter/tklib.py @@ -121,6 +121,10 @@ incdirs = [] linklibs = ['tcl85', 'tk85'] libdirs = [] +elif sys.platform == 'darwin': + incdirs = ['/System/Library/Frameworks/Tk.framework/Versions/Current/Headers/'] + linklibs = ['tcl', 'tk'] + libdirs = [] else: incdirs=['/usr/include/tcl'] linklibs=['tcl', 'tk'] diff --git a/pypy/doc/whatsnew-2.3.0.rst b/pypy/doc/whatsnew-2.3.0.rst --- a/pypy/doc/whatsnew-2.3.0.rst +++ b/pypy/doc/whatsnew-2.3.0.rst @@ -167,3 +167,6 @@ .. branch: fix-tpname Changes hacks surrounding W_TypeObject.name to match CPython's tp_name + +.. branch: tkinter_osx_packaging +OS/X specific header path diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -3,4 +3,4 @@ ======================= .. this is a revision shortly after release-2.3.x -.. startrev: ec864bd08d50 +.. startrev: b2cc67adbaad diff --git a/pypy/module/fcntl/interp_fcntl.py b/pypy/module/fcntl/interp_fcntl.py --- a/pypy/module/fcntl/interp_fcntl.py +++ b/pypy/module/fcntl/interp_fcntl.py @@ -62,8 +62,8 @@ fcntl_int = external('fcntl', [rffi.INT, rffi.INT, rffi.INT], rffi.INT) fcntl_str = external('fcntl', [rffi.INT, rffi.INT, rffi.CCHARP], rffi.INT) fcntl_flock = external('fcntl', [rffi.INT, rffi.INT, _flock], rffi.INT) -ioctl_int = external('ioctl', [rffi.INT, rffi.INT, rffi.INT], rffi.INT) -ioctl_str = external('ioctl', [rffi.INT, rffi.INT, rffi.CCHARP], rffi.INT) +ioctl_int = external('ioctl', [rffi.INT, rffi.UINT, rffi.INT], rffi.INT) +ioctl_str = external('ioctl', [rffi.INT, rffi.UINT, rffi.CCHARP], rffi.INT) has_flock = cConfig.has_flock if has_flock: diff --git a/pypy/module/fcntl/test/test_fcntl.py b/pypy/module/fcntl/test/test_fcntl.py --- a/pypy/module/fcntl/test/test_fcntl.py +++ b/pypy/module/fcntl/test/test_fcntl.py @@ -11,7 +11,9 @@ os.unlink(i) class AppTestFcntl: - spaceconfig = dict(usemodules=('fcntl', 'array', 'struct', 'termios', 'select', 'rctime')) + spaceconfig = dict(usemodules=('fcntl', 'array', 'struct', 'termios', + 'select', 'rctime')) + def setup_class(cls): tmpprefix = str(udir.ensure('test_fcntl', dir=1).join('tmp_')) cls.w_tmp = cls.space.wrap(tmpprefix) @@ -267,6 +269,31 @@ os.close(mfd) os.close(sfd) + def test_ioctl_signed_unsigned_code_param(self): + import fcntl + import os + import pty + import struct + import termios + + mfd, sfd = pty.openpty() + try: + if termios.TIOCSWINSZ < 0: + set_winsz_opcode_maybe_neg = termios.TIOCSWINSZ + set_winsz_opcode_pos = termios.TIOCSWINSZ & 0xffffffffL + else: + set_winsz_opcode_pos = termios.TIOCSWINSZ + set_winsz_opcode_maybe_neg, = struct.unpack("i", + struct.pack("I", termios.TIOCSWINSZ)) + + our_winsz = struct.pack("HHHH",80,25,0,0) + # test both with a positive and potentially negative ioctl code + new_winsz = fcntl.ioctl(mfd, set_winsz_opcode_pos, our_winsz) + new_winsz = fcntl.ioctl(mfd, set_winsz_opcode_maybe_neg, our_winsz) + finally: + os.close(mfd) + os.close(sfd) + def test_large_flag(self): import sys if any(plat in sys.platform From noreply at buildbot.pypy.org Mon Jun 2 17:23:48 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:23:48 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: added more test cases for gc object pinning. Message-ID: <20140602152348.201411C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71811:ec36db23050d Date: 2014-05-12 11:50 +0200 http://bitbucket.org/pypy/pypy/changeset/ec36db23050d/ Log: added more test cases for gc object pinning. The main test case (test_simple_pin) still fails because of missing implementation. diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -1,5 +1,5 @@ import py -from rpython.rtyper.lltypesystem import lltype +from rpython.rtyper.lltypesystem import lltype, llmemory from rpython.memory.gc.incminimark import IncrementalMiniMarkGC from test_direct import BaseDirectGCTest @@ -7,13 +7,52 @@ S.become(lltype.GcStruct('S', ('someInt', lltype.Signed))) class PinningGCTest(BaseDirectGCTest): - def test_simple(self): - someIntValue = 100 - obj = self.malloc(S) - obj.someInt = someIntValue - self.gc.pin(obj) - self.gc.collect() # obj should still live - assert obj.someInt == someIntValue + + def test_simple_pin(self): + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + ptr.someInt = 100 + assert self.gc.pin(adr) + self.gc.collect() # ptr should still live + assert ptr.someInt == 100 + + def test_pin_can_move(self): + # even a pinned ptrect is considered to be movable. Only the code + # that called pin() knows if it is currently movable or not. + # Additionally it could be unpinned anytime. + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + assert self.gc.can_move(adr) + assert self.gc.pin(adr) + assert self.gc.can_move(adr) + + def test_pin_twice(self): + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + assert self.gc.pin(adr) + assert not self.gc.pin(adr) + + # XXX test with multiple mallocs, and only part of them is pinned + class TestIncminimark(PinningGCTest): - from rpython.memory.gc.incminimark import IncrementalMiniMarkGC as GCClass \ No newline at end of file + from rpython.memory.gc.incminimark import IncrementalMiniMarkGC as GCClass + + def test_pin_old(self): + ptr = self.malloc(S) + ptr.someInt = 100 + self.stackroots.append(ptr) + self.gc.collect() + ptr = self.stackroots[0] + adr = llmemory.cast_ptr_to_adr(ptr) + assert ptr.someInt == 100 + assert not self.gc.is_in_nursery(adr) + assert not self.gc.pin(adr) + # ^^^ should not be possible, struct is already old and won't + # move. + + # XXX test/define what happens if we try to pin an object that is too + # big for the nursery and will be raw-malloc'ed. + + # XXX test/define what happens if pinned object already has a shadow + # => shadow handling. From noreply at buildbot.pypy.org Mon Jun 2 17:23:49 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:23:49 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: fixed comment inside test Message-ID: <20140602152349.5B6701C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71812:5290b0ad0c35 Date: 2014-05-12 12:05 +0200 http://bitbucket.org/pypy/pypy/changeset/5290b0ad0c35/ Log: fixed comment inside test diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -17,9 +17,8 @@ assert ptr.someInt == 100 def test_pin_can_move(self): - # even a pinned ptrect is considered to be movable. Only the code - # that called pin() knows if it is currently movable or not. - # Additionally it could be unpinned anytime. + # even a pinned object is considered to be movable. Only the caller + # of pin() knows if it is currently movable or not. ptr = self.malloc(S) adr = llmemory.cast_ptr_to_adr(ptr) assert self.gc.can_move(adr) From noreply at buildbot.pypy.org Mon Jun 2 17:23:50 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:23:50 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: started with object pinning implementation inside incminimark. Message-ID: <20140602152350.980B11C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71813:f5617eea321d Date: 2014-05-12 12:12 +0200 http://bitbucket.org/pypy/pypy/changeset/f5617eea321d/ Log: started with object pinning implementation inside incminimark. Implemented the pin()/unpin() methods. This work is based on the `gc-minimark-pinning` branch. diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -83,6 +83,7 @@ # minimarkpage.py (if they are small), or raw-malloced (if they are not # small). Collected by regular mark-n-sweep during major collections. # +# XXX update doc string to contain object pinning (groggi) WORD = LONG_BIT // 8 NULL = llmemory.NULL @@ -136,7 +137,13 @@ # a minor collection. GCFLAG_VISITED_RMY = first_gcflag << 8 -_GCFLAG_FIRST_UNUSED = first_gcflag << 9 # the first unused bit +# The following flag is set on nursery objects of which we expect not to +# move. This means that a young object with this flag is not moved out +# of the nursery during a minor collection. See pin()/unpin() for further +# details. +GCFLAG_PINNED = first_gcflag << 9 + +_GCFLAG_FIRST_UNUSED = first_gcflag << 10 # the first unused bit # States for the incremental GC @@ -361,6 +368,15 @@ # minor collection. self.nursery_objects_shadows = self.AddressDict() # + # A sorted deque containing all pinned objects *before* the last + # minor collection. This deque must be consulted when considering + # next nursery ceiling. + self.nursery_barriers = self.AddressDeque() + # + # Counter tracking how many pinned objects currently reside inside + # the nursery. + self.pinned_objects_in_nursery = 0 + # # Allocate a nursery. In case of auto_nursery_size, start by # allocating a very small nursery, enough to do things like look # up the env var, which requires the GC; and then really @@ -896,12 +912,42 @@ def pin(self, obj): debug_start("groggi-incminimark-pin") + # Tries to pin the given 'obj'. On success this method returns True, + # otherwise False. There are multiple reasons why a call returns False + # and it should be always expected that pinning is likely to fail + # (return False). + + # XXX what happens if nursery is full of pinned objects? (groggi) + # XXX what happens if pinned object references movable data? (groggi) + + if not self.is_in_nursery(obj): + # Old objects are already non-moving, therefore pinning + # makes no sense. If you run into this case, you may forgot + # to check if can_move(obj) already returns True in which + # case a call to pin() is unnecessary. + return False + if self.header(obj).tid & GCFLAG_PINNED: + # Already pinned, we do not allow to pin it again. + # Reason: It would be possible that the first caller unpins + # while the second caller thinks it's still pinned. + return False + + self.header(obj).tid |= GCFLAG_PINNED + self.pinned_objects_in_nursery += 1 + debug_print("pinned_objects_in_nursery: ", self.pinned_objects_in_nursery) debug_stop("groggi-incminimark-pin") - return False + return True def unpin(self, obj): + # Unpins a previously pinned 'obj'. This should only be called + # after a pin(obj). debug_start("groggi-incminimark-unpin") + ll_assert(self.header(obj) & GCFLAG_PINNED != 0, + "unpin: object is already not pinned") + self.header(obj).tid &= ~GCFLAG_PINNED + self.pinned_objects_in_nursery -= 1 + debug_print("pinned_objects_in_nursery: ", self.pinned_objects_in_nursery) debug_stop("groggi-incminimark-unpin") From noreply at buildbot.pypy.org Mon Jun 2 17:23:51 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:23:51 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: fix/extended object pinning test. Message-ID: <20140602152351.E76D31C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71814:c38c9596e3d1 Date: 2014-05-12 15:59 +0200 http://bitbucket.org/pypy/pypy/changeset/c38c9596e3d1/ Log: fix/extended object pinning test. Forgot to add the object to stackroots. Using now a Linked List like data structure for testing. diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -4,17 +4,25 @@ from test_direct import BaseDirectGCTest S = lltype.GcForwardReference() -S.become(lltype.GcStruct('S', ('someInt', lltype.Signed))) +S.become(lltype.GcStruct('S', + ('someInt', lltype.Signed), + ('next', lltype.Ptr(S)))) class PinningGCTest(BaseDirectGCTest): def test_simple_pin(self): - ptr = self.malloc(S) - adr = llmemory.cast_ptr_to_adr(ptr) - ptr.someInt = 100 - assert self.gc.pin(adr) - self.gc.collect() # ptr should still live - assert ptr.someInt == 100 + ptrRoot = self.malloc(S) + self.stackroots.append(ptrRoot) + + ptrNext = self.malloc(S) + adrNext = llmemory.cast_ptr_to_adr(ptrNext) + + self.write(ptrRoot, 'next', ptrNext) + ptrNext.someInt = 100 + + assert self.gc.pin(adrNext) + self.gc.collect() # ptrNext should still live + assert ptrNext.someInt == 100 def test_pin_can_move(self): # even a pinned object is considered to be movable. Only the caller From noreply at buildbot.pypy.org Mon Jun 2 17:23:53 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:23:53 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: added sort() to AddressStack using fijal's work in gc-minimark-pinning Message-ID: <20140602152353.361DF1C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71815:cc3d72cc6edb Date: 2014-05-12 16:04 +0200 http://bitbucket.org/pypy/pypy/changeset/cc3d72cc6edb/ Log: added sort() to AddressStack using fijal's work in gc-minimark- pinning This was implemented by Maciej Fijalkowski in the gc-minimark- pinning branch. See commits: * 3ada686cfd218cac6f08e857c9cb4ec43cd68102 * d4a2fdd9a4ac012675a7a29b10364cb0db079954 diff --git a/rpython/memory/support.py b/rpython/memory/support.py --- a/rpython/memory/support.py +++ b/rpython/memory/support.py @@ -58,7 +58,30 @@ unused_chunks = FreeList() cache[chunk_size] = unused_chunks, null_chunk - return unused_chunks, null_chunk + + def partition(array, left, right): + last_item = array[right] + pivot = last_item + storeindex = left + for i in range(left, right): + if array[i] >= pivot: + array[i], array[storeindex] = array[storeindex], array[i] + storeindex += 1 + # Move pivot to its final place + array[storeindex], array[right] = last_item, array[storeindex] + return storeindex + + def quicksort(array, left, right): + # sort array[left:right+1] (i.e. bounds included) + if right > left: + pivotnewindex = partition(array, left, right) + quicksort(array, left, pivotnewindex - 1) + quicksort(array, pivotnewindex + 1, right) + + def sort_chunk(chunk, size): + quicksort(chunk.items, 0, size - 1) + + return unused_chunks, null_chunk, sort_chunk def get_address_stack(chunk_size=DEFAULT_CHUNK_SIZE, cache={}): @@ -67,7 +90,7 @@ except KeyError: pass - unused_chunks, null_chunk = get_chunk_manager(chunk_size) + unused_chunks, null_chunk, sort_chunk = get_chunk_manager(chunk_size) class AddressStack(object): _alloc_flavor_ = "raw" @@ -174,6 +197,13 @@ chunk.items[count] = got got = next + def sort(self): + """Sorts the items in the AddressStack. They must not be more + than one chunk of them. This results in a **reverse** order, + so that the first pop()ped items are the smallest ones.""" + ll_assert(self.chunk.next == null_chunk, "too big for sorting") + sort_chunk(self.chunk, self.used_in_last_chunk) + cache[chunk_size] = AddressStack return AddressStack diff --git a/rpython/memory/test/test_support.py b/rpython/memory/test/test_support.py --- a/rpython/memory/test/test_support.py +++ b/rpython/memory/test/test_support.py @@ -3,9 +3,11 @@ from rpython.memory.support import get_address_deque from rpython.rtyper.test.test_llinterp import interpret -from rpython.rtyper.lltypesystem import lltype, llmemory +from rpython.rtyper.lltypesystem import lltype, llmemory, llarena from rpython.rtyper.lltypesystem.llmemory import raw_malloc, raw_free, NULL +import random + class TestAddressStack(object): def test_simple_access(self): AddressStack = get_address_stack() @@ -106,6 +108,23 @@ assert b == a assert ll.length() == i + def test_sort(self): + AddressStack = get_address_stack(chunk_size=15) + lla = llarena.arena_malloc(10, 2) + addrs = [lla + i for i in range(10)] + for _ in range(13): + ll = AddressStack() + addr_copy = addrs[:] + random.shuffle(addr_copy) + for i in addr_copy: + ll.append(i) + ll.sort() + expected = range(10) + for i in expected: + a = ll.pop() + assert a == addrs[i] + + class TestAddressDeque: def test_big_access(self): From noreply at buildbot.pypy.org Mon Jun 2 17:23:54 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:23:54 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: another rewrite of the test case for gc object pinning. Message-ID: <20140602152354.870F51C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71816:0e3a70d54595 Date: 2014-05-14 14:13 +0200 http://bitbucket.org/pypy/pypy/changeset/0e3a70d54595/ Log: another rewrite of the test case for gc object pinning. basically we now have two tests which are based on previouse commits instead of only one that is not expected to work under current conditions diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -4,25 +4,43 @@ from test_direct import BaseDirectGCTest S = lltype.GcForwardReference() -S.become(lltype.GcStruct('S', +S.become(lltype.GcStruct('pinning_test_struct', ('someInt', lltype.Signed), ('next', lltype.Ptr(S)))) class PinningGCTest(BaseDirectGCTest): def test_simple_pin(self): - ptrRoot = self.malloc(S) - self.stackroots.append(ptrRoot) + ptr = self.malloc(S) + ptr.someInt = 100 + self.stackroots.append(ptr) - ptrNext = self.malloc(S) - adrNext = llmemory.cast_ptr_to_adr(ptrNext) + adr = llmemory.cast_ptr_to_adr(ptr) + assert self.gc.pin(adr) - self.write(ptrRoot, 'next', ptrNext) - ptrNext.someInt = 100 + self.gc.collect() - assert self.gc.pin(adrNext) - self.gc.collect() # ptrNext should still live - assert ptrNext.someInt == 100 + assert self.gc.is_in_nursery(adr) + assert ptr.someInt == 100 + + # XXX not implemented yet + def test_pin_referenced_from_stackroot(self): + root_ptr = self.malloc(S) + next_ptr = self.malloc(S) + self.write(root_ptr, 'next', next_ptr) + self.stackroots.append(root_ptr) + next_ptr.someInt = 100 + + next_adr = llmemory.cast_ptr_to_adr(next_ptr) + assert self.gc.pin(next_adr) + + self.gc.collect() + + assert self.gc.is_in_nursery(adr) + assert next_ptr.someInt == 100 + root_ptr = self.stackroots[0] + assert root_ptr.next == next_ptr + def test_pin_can_move(self): # even a pinned object is considered to be movable. Only the caller From noreply at buildbot.pypy.org Mon Jun 2 17:23:55 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:23:55 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: first version passing `test_simple_pin` in `test_object_pinning.py`. Message-ID: <20140602152355.E16371C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71817:f804676aaa5e Date: 2014-05-14 14:14 +0200 http://bitbucket.org/pypy/pypy/changeset/f804676aaa5e/ Log: first version passing `test_simple_pin` in `test_object_pinning.py`. does not translate right now. diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -495,11 +495,6 @@ self.set_major_threshold_from(0.0) ll_assert(self.extra_threshold == 0, "extra_threshold set too early") self.initial_cleanup = self.nursery_size - - # XXX remove (groggi) - debug_print("nursery start ", self.nursery) - debug_print("nursery top ", self.nursery_top) - debug_stop("gc-set-nursery-size") @@ -706,37 +701,82 @@ and finally reserve 'totalsize' bytes at the start of the now-empty nursery. """ - if self.nursery_top < self.nursery_real_top: - self.move_nursery_top(totalsize) - return prev_result - self.minor_collection() - # - # If the gc_state is not STATE_SCANNING, we're in the middle of - # an incremental major collection. In this case, always progress - # one step. If the gc_state is STATE_SCANNING, wait until there - # is too much garbage before starting the next major collection. - if (self.gc_state != STATE_SCANNING or - self.get_total_memory_used() > - self.next_major_collection_threshold): - self.major_collection_step() + # XXX update doc to contain nursery_barrier (groggi) + + # keep track how many iteration we've gone trough + count = 0 + while True: + if self.nursery_barriers.non_empty(): + # we have multiple blocks of free memory in the nursery + # which are divided by pinned object. First thing to do + # is to move to the next free block. + size_gc_header = self.gcheaderbuilder.size_gc_header + pinned_obj_size = size_gc_header + self.get_size( + self.nursery_top + size_gc_header) + # ^^^ nursery_top points to the beginning of the header of + # the next object. To get the right address to call + # get_size(), we need to add the header to the address. + # + # move search area to the next free memory block in the + # nursery. + self.nursery_free = self.nursery_top + pinned_obj_size + self.move_nursery_top(llarena.getfakearenaaddress( + self.nursery_barriers.popleft()) - self.nursery_free) + else: + count += 1 + # + # no barriers (i.e. pinned objects) left. Check if there is + # enough space till we reach the real top of the nursery. + if self.nursery_top < self.nursery_real_top: + self.move_nursery_top(totalsize) + return prev_result + # + self.minor_collection() + if count == 1: + # + # If the gc_state is not STATE_SCANNING, we're in the middle of + # an incremental major collection. In this case, always progress + # one step. If the gc_state is STATE_SCANNING, wait until there + # is too much garbage before starting the next major collection. + if (self.gc_state != STATE_SCANNING or + self.get_total_memory_used() > + self.next_major_collection_threshold): + self.major_collection_step() + # + # The nursery might not be empty now, because of + # execute_finalizers(). If it is almost full again, + # we need to fix it with another call to minor_collection(). + if self.nursery_free + totalsize > self.nursery_top: + # + if self.nursery_free + totalsize > self.nursery_real_top: + self.minor_collection() + # then the nursery is empty + # XXX ^^^ not necessarily, update comment (groggi) + else: + # we just need to clean up a bit more of the nursery + #self.move_nursery_top(totalsize) + # do a loop, should take care of finding space + # XXX ^^^ rewrite comment the moment we're sure it's + # the correct way. + pass + else: + ll_assert(count == 2, + "Seeing minor_collection() at least twice. " + "Too many pinned objects?") + # - # The nursery might not be empty now, because of - # execute_finalizers(). If it is almost full again, - # we need to fix it with another call to minor_collection(). - if self.nursery_free + totalsize > self.nursery_top: - # - if self.nursery_free + totalsize > self.nursery_real_top: - self.minor_collection() - # then the nursery is empty - else: - # we just need to clean up a bit more of the nursery - self.move_nursery_top(totalsize) - # - result = self.nursery_free - self.nursery_free = result + totalsize - ll_assert(self.nursery_free <= self.nursery_top, "nursery overflow") + # attempt to get 'totalzise' out of the nursery now. This may + # fail again, and then we loop. Should be the uncommon case. + # XXX measure "uncommon" case (groggi) + result = self.nursery_free + self.nursery_free = result + totalsize + if self.nursery_free <= self.nursery_top: + break # if self.debug_tiny_nursery >= 0: # for debugging + # XXX solution for this assert? (groggi) + ll_assert(not self.nursery_barriers.non_empty(), + "no support for nursery debug and pinning") if self.nursery_top - self.nursery_free > self.debug_tiny_nursery: self.nursery_free = self.nursery_top - self.debug_tiny_nursery # @@ -911,7 +951,6 @@ return self.is_in_nursery(obj) def pin(self, obj): - debug_start("groggi-incminimark-pin") # Tries to pin the given 'obj'. On success this method returns True, # otherwise False. There are multiple reasons why a call returns False # and it should be always expected that pinning is likely to fail @@ -934,21 +973,16 @@ self.header(obj).tid |= GCFLAG_PINNED self.pinned_objects_in_nursery += 1 - debug_print("pinned_objects_in_nursery: ", self.pinned_objects_in_nursery) - debug_stop("groggi-incminimark-pin") return True def unpin(self, obj): # Unpins a previously pinned 'obj'. This should only be called # after a pin(obj). - debug_start("groggi-incminimark-unpin") ll_assert(self.header(obj) & GCFLAG_PINNED != 0, "unpin: object is already not pinned") self.header(obj).tid &= ~GCFLAG_PINNED self.pinned_objects_in_nursery -= 1 - debug_print("pinned_objects_in_nursery: ", self.pinned_objects_in_nursery) - debug_stop("groggi-incminimark-unpin") def shrink_array(self, obj, smallerlength): @@ -1119,11 +1153,19 @@ def debug_check_object(self, obj): # We are after a minor collection, and possibly after a major - # collection step. No object should be in the nursery - ll_assert(not self.is_in_nursery(obj), - "object in nursery after collection") - ll_assert(self.header(obj).tid & GCFLAG_VISITED_RMY == 0, - "GCFLAG_VISITED_RMY after collection") + # collection step. No object should be in the nursery (except + # pinned ones) + if self.header(obj).tid & GCFLAG_PINNED == 0: + ll_assert(not self.is_in_nursery(obj), + "object in nursery after collection") + ll_assert(self.header(obj).tid & GCFLAG_VISITED_RMY == 0, + "GCFLAG_VISITED_RMY after collection") + else: + # pinned objects are always in the nursery + ll_assert(self.is_in_nursery(obj), + "pinned object not in nursery") + # XXX gc-minimark-pinning checks for GCFLAG_TRACK_YOUNG_POINTER + # (groggi) if self.gc_state == STATE_SCANNING: self._debug_check_object_scanning(obj) @@ -1169,9 +1211,11 @@ # All objects should have this flag, except if they # don't have any GC pointer typeid = self.get_type_id(obj) - if self.has_gcptr(typeid): - ll_assert(self.header(obj).tid & GCFLAG_TRACK_YOUNG_PTRS != 0, - "missing GCFLAG_TRACK_YOUNG_PTRS") + if not self.header(obj).tid & GCFLAG_PINNED: + # XXX do we need checks if the object is actually pinned? (groggi) + if self.has_gcptr(typeid): + ll_assert(self.header(obj).tid & GCFLAG_TRACK_YOUNG_PTRS != 0, + "missing GCFLAG_TRACK_YOUNG_PTRS") # the GCFLAG_FINALIZATION_ORDERING should not be set between coll. ll_assert(self.header(obj).tid & GCFLAG_FINALIZATION_ORDERING == 0, "unexpected GCFLAG_FINALIZATION_ORDERING") @@ -1466,6 +1510,18 @@ self.old_objects_pointing_to_young.foreach( self._add_to_more_objects_to_trace, None) # + # Keeps track of surviving pinned objects. See also `_trace_drag_out()` + # where this stack is filled. + self.surviving_pinned_objects = self.AddressStack() + # + # The following counter keeps track of the amount of alive and pinned + # objects inside the nursery. The counter is reset, as we have to + # check which pinned objects are actually still alive. Pinning an + # object does not prevent the removal of an object, if it's not used + # anymore. + # XXX is this true? does it make sense? (groggi) + self.pinned_objects_in_nursery = 0 + # # First, find the roots that point to young objects. All nursery # objects found are copied out of the nursery, and the occasional # young raw-malloced object is flagged with GCFLAG_VISITED_RMY. @@ -1505,6 +1561,8 @@ self.deal_with_young_objects_with_finalizers() # # Clear this mapping. + # XXX gc-minimark-pinning contains some additional code + # in regard to pinned object. TODO (groggi) if self.nursery_objects_shadows.length() > 0: self.nursery_objects_shadows.clear() # @@ -1513,17 +1571,64 @@ if self.young_rawmalloced_objects: self.free_young_rawmalloced_objects() # - # All live nursery objects are out, and the rest dies. Fill - # the nursery up to the cleanup point with zeros - llarena.arena_reset(self.nursery, self.nursery_size, 0) - llarena.arena_reset(self.nursery, self.initial_cleanup, 2) - self.debug_rotate_nursery() + # All live nursery objects are out of the nursery or pinned inside + # the nursery. Create nursery barriers to protect the pinned object, + # fill the rest of the nursery with zeros and reset the current nursery + # pointer. + size_gc_header = self.gcheaderbuilder.size_gc_header + nursery_barriers = self.AddressDeque() + prev = self.nursery + self.surviving_pinned_objects.sort() + assert self.pinned_objects_in_nursery == \ + self.surviving_pinned_objects.length() + while self.surviving_pinned_objects.non_empty(): + # + # prepare information about the surviving pinned object + next = self.surviving_pinned_objects.pop() + assert next >= prev + # + # clear the arena between the last pinned object or arena start + # and the pinned object + pinned_obj_size = llarena.getfakearenaaddress(next) - prev + llarena.arena_reset(prev, pinned_obj_size, 2) + # + # clean up object's flags + obj = next + size_gc_header + self.header(obj).tid &= ~GCFLAG_VISITED + # + # create a new nursery barrier for the pinned object + nursery_barriers.append(next) + # + # update 'prev' to the end of the 'next' object + prev = prev + pinned_obj_size + \ + (size_gc_header + self.get_size(obj)) + # + # clear the rest of the arena + llarena.arena_reset(prev, self.nursery_real_top - prev, 2) + # ^^^ calculate the size of the last continuous + # arena block. + # + self.surviving_pinned_objects.delete() + self.nursery_barriers.delete() + self.nursery_barriers = nursery_barriers + # XXX gc-minimark-pinning does a debug_rotate_nursery() here (groggi) self.nursery_free = self.nursery - self.nursery_top = self.nursery + self.initial_cleanup - self.nursery_real_top = self.nursery + self.nursery_size + self.nursery_barriers.append(self.nursery + self.nursery_size) + self.nursery_top = self.nursery_barriers.popleft() + +# All live nursery objects are out, and the rest dies. Fill +# the nursery up to the cleanup point with zeros +# llarena.arena_reset(self.nursery, self.nursery_size, 0) +# llarena.arena_reset(self.nursery, self.initial_cleanup, 2) +# self.debug_rotate_nursery() +# self.nursery_free = self.nursery +# self.nursery_top = self.nursery + self.initial_cleanup +# self.nursery_real_top = self.nursery + self.nursery_size # debug_print("minor collect, total memory used:", self.get_total_memory_used()) + debug_print("number of pinned objects:", + self.pinned_objects_in_nursery) if self.DEBUG >= 2: self.debug_check_consistency() # expensive! # @@ -1685,7 +1790,7 @@ return # size_gc_header = self.gcheaderbuilder.size_gc_header - if self.header(obj).tid & GCFLAG_HAS_SHADOW == 0: + if self.header(obj).tid & (GCFLAG_HAS_SHADOW | GCFLAG_PINNED) == 0: # # Common case: 'obj' was not already forwarded (otherwise # tid == -42, containing all flags), and it doesn't have the @@ -1696,12 +1801,25 @@ newhdr = self._malloc_out_of_nursery(totalsize) # elif self.is_forwarded(obj): + # XXX check if GCFLAG_PINNED is relevant for this case (groggi) # # 'obj' was already forwarded. Change the original reference # to point to its forwarding address, and we're done. root.address[0] = self.get_forwarding_address(obj) return # + elif self.header(obj).tid & GCFLAG_PINNED: + hdr = self.header(obj) + if hdr.tid & GCFLAG_VISITED: + # already visited and keeping track of the object + return + hdr.tid |= GCFLAG_VISITED + # XXX check for object flags that are not supported alongside + # GCFLAG_PINNED (groggi) + self.surviving_pinned_objects.append( + llarena.getfakearenaaddress(obj - size_gc_header)) + self.pinned_objects_in_nursery += 1 + return else: # First visit to an object that has already a shadow. newobj = self.nursery_objects_shadows.get(obj) @@ -1857,7 +1975,7 @@ # Debugging checks ll_assert(self.nursery_free == self.nursery, "nursery not empty in major_collection_step()") - self.debug_check_consistency() + self.debug_check_consistency() # YYY # XXX currently very course increments, get this working then split @@ -2108,7 +2226,8 @@ # and the GCFLAG_VISITED will be reset at the end of the # collection. hdr = self.header(obj) - if hdr.tid & (GCFLAG_VISITED | GCFLAG_NO_HEAP_PTRS): + if hdr.tid & (GCFLAG_VISITED | GCFLAG_NO_HEAP_PTRS | GCFLAG_PINNED): + # XXX ^^^ update doc in any way because of GCFLAG_PINNED addition? (groggi) return 0 # # It's the first time. We set the flag VISITED. The trick is From noreply at buildbot.pypy.org Mon Jun 2 17:23:57 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:23:57 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: remove comment left from debugging Message-ID: <20140602152357.309491C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71818:08b887c048f8 Date: 2014-05-14 14:15 +0200 http://bitbucket.org/pypy/pypy/changeset/08b887c048f8/ Log: remove comment left from debugging diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -1975,7 +1975,7 @@ # Debugging checks ll_assert(self.nursery_free == self.nursery, "nursery not empty in major_collection_step()") - self.debug_check_consistency() # YYY + self.debug_check_consistency() # XXX currently very course increments, get this working then split From noreply at buildbot.pypy.org Mon Jun 2 17:23:58 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:23:58 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: fixed translation problem for RFile example. Message-ID: <20140602152358.683611C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71819:efb1928f2226 Date: 2014-05-14 14:29 +0200 http://bitbucket.org/pypy/pypy/changeset/efb1928f2226/ Log: fixed translation problem for RFile example. Still not completly understanding the translation process, but getting there step by step. diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -979,7 +979,7 @@ def unpin(self, obj): # Unpins a previously pinned 'obj'. This should only be called # after a pin(obj). - ll_assert(self.header(obj) & GCFLAG_PINNED != 0, + ll_assert(self.header(obj).tid & GCFLAG_PINNED != 0, "unpin: object is already not pinned") self.header(obj).tid &= ~GCFLAG_PINNED self.pinned_objects_in_nursery -= 1 diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -58,6 +58,7 @@ assert not self.gc.pin(adr) # XXX test with multiple mallocs, and only part of them is pinned + # XXX test unpin() class TestIncminimark(PinningGCTest): diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -458,12 +458,12 @@ # XXX understand this, correct? (groggi) self.pin_ptr = getfn(GCClass.pin, - [s_gc, s_gcref], + [s_gc, SomeAddress()], annmodel.SomeBool()) # XXX understand this, correct? (groggi) self.unpin_ptr = getfn(GCClass.unpin, - [s_gc, s_gcref], + [s_gc, SomeAddress()], annmodel.s_None) self.write_barrier_ptr = None From noreply at buildbot.pypy.org Mon Jun 2 17:23:59 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:23:59 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: add debug output with nursery addresses. Message-ID: <20140602152359.BE3711C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71820:4c3b5fc85ad1 Date: 2014-05-14 14:47 +0200 http://bitbucket.org/pypy/pypy/changeset/4c3b5fc85ad1/ Log: add debug output with nursery addresses. This was once introduced in a previouse commit, but removed by mistake. diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -495,6 +495,9 @@ self.set_major_threshold_from(0.0) ll_assert(self.extra_threshold == 0, "extra_threshold set too early") self.initial_cleanup = self.nursery_size + debug_print("nursery start: ", self.nursery) # XXX remove (groggi) + debug_print("nursery top: ", self.nursery_top) # XXX remove (groggi) + debug_print("nursery_real_top: ", self.nursery_real_top) # XXX remove (groggi) debug_stop("gc-set-nursery-size") From noreply at buildbot.pypy.org Mon Jun 2 17:24:01 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:01 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: Merged release-2.3.x into gc-incminimark-pinning Message-ID: <20140602152401.1A07F1C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71821:7c48f4e1f802 Date: 2014-05-14 14:57 +0200 http://bitbucket.org/pypy/pypy/changeset/7c48f4e1f802/ Log: Merged release-2.3.x into gc-incminimark-pinning diff --git a/pypy/module/struct/__init__.py b/pypy/module/struct/__init__.py --- a/pypy/module/struct/__init__.py +++ b/pypy/module/struct/__init__.py @@ -45,6 +45,8 @@ The variable struct.error is an exception raised on errors.""" + applevel_name = "_struct" + interpleveldefs = { 'error': 'interp_struct.get_error(space)', @@ -55,6 +57,7 @@ 'unpack_from': 'interp_struct.unpack_from', 'Struct': 'interp_struct.W_Struct', + '_clearcache': 'interp_struct.clearcache', } appleveldefs = { diff --git a/pypy/module/struct/interp_struct.py b/pypy/module/struct/interp_struct.py --- a/pypy/module/struct/interp_struct.py +++ b/pypy/module/struct/interp_struct.py @@ -138,3 +138,6 @@ pack_into=interp2app(W_Struct.descr_pack_into), unpack_from=interp2app(W_Struct.descr_unpack_from), ) + +def clearcache(space): + """No-op on PyPy""" diff --git a/rpython/translator/c/gcc/instruction.py b/rpython/translator/c/gcc/instruction.py --- a/rpython/translator/c/gcc/instruction.py +++ b/rpython/translator/c/gcc/instruction.py @@ -184,6 +184,9 @@ def __init__(self): self.delta = -7.25 # use this non-integer value as a marker +class InsnPushed(InsnStackAdjust): + pass + class InsnStop(Insn): _args_ = ['reason'] def __init__(self, reason='?'): diff --git a/rpython/translator/c/gcc/test/elf64/track_random_rsp_rbp.s b/rpython/translator/c/gcc/test/elf64/track_random_rsp_rbp.s new file mode 100644 --- /dev/null +++ b/rpython/translator/c/gcc/test/elf64/track_random_rsp_rbp.s @@ -0,0 +1,158 @@ + .type seterror.part.1, @function +seterror.part.1: +.LFB77: + .cfi_startproc + pushq %r14 + .cfi_def_cfa_offset 16 + .cfi_offset 14, -16 + pushq %r13 + .cfi_def_cfa_offset 24 + .cfi_offset 13, -24 + pushq %r12 + .cfi_def_cfa_offset 32 + .cfi_offset 12, -32 + pushq %rbp + .cfi_def_cfa_offset 40 + .cfi_offset 6, -40 + pushq %rbx + .cfi_def_cfa_offset 48 + .cfi_offset 3, -48 + subq $512, %rsp + .cfi_def_cfa_offset 560 + testq %r8, %r8 + je .L30 +.L11: + movq PyPyExc_TypeError at GOTPCREL(%rip), %rax + movq %r8, %rsi + movq (%rax), %rdi + call PyPyErr_SetString at PLT + ;; expected {552(%rsp) | 512(%rsp), 528(%rsp), 536(%rsp), 544(%rsp), %r15, 520(%rsp) | } + addq $512, %rsp + .cfi_remember_state + .cfi_def_cfa_offset 48 + popq %rbx + .cfi_def_cfa_offset 40 + popq %rbp + .cfi_def_cfa_offset 32 + popq %r12 + .cfi_def_cfa_offset 24 + popq %r13 + .cfi_def_cfa_offset 16 + popq %r14 + .cfi_def_cfa_offset 8 + ret + .p2align 4,,10 + .p2align 3 +.L30: + .cfi_restore_state + testq %rcx, %rcx + movq %rsi, %r12 + movl %edi, %r14d + movq %rdx, %r13 + movq %rsp, %rbp + movl $512, %esi + movq %rsp, %rbx + je .L13 + leaq .LC6(%rip), %rdx + movl $512, %esi + movq %rsp, %rdi + xorl %eax, %eax + movq %rsp, %rbx + call PyPyOS_snprintf at PLT + ;; expected {552(%rsp) | 512(%rsp), 528(%rsp), 536(%rsp), 544(%rsp), %r15, 520(%rsp) | } +.L14: + movl (%rbx), %eax + addq $4, %rbx + leal -16843009(%rax), %esi + notl %eax + andl %eax, %esi + andl $-2139062144, %esi + je .L14 + movl %esi, %eax + shrl $16, %eax + testl $32896, %esi + cmove %eax, %esi + leaq 2(%rbx), %rax + cmove %rax, %rbx + addb %sil, %sil + movq %rbp, %rsi + sbbq $3, %rbx + subq %rbx, %rsi + addq $512, %rsi +.L13: + testl %r14d, %r14d + je .L16 + leaq .LC7(%rip), %rdx + movq %rbx, %rdi + movl %r14d, %ecx + xorl %eax, %eax + call PyPyOS_snprintf at PLT + ;; expected {552(%rsp) | 512(%rsp), 528(%rsp), 536(%rsp), 544(%rsp), %r15, 520(%rsp) | } + movq %rbx, %rdi + call strlen at PLT + ;; expected {552(%rsp) | 512(%rsp), 528(%rsp), 536(%rsp), 544(%rsp), %r15, 520(%rsp) | } + addq %rax, %rbx + movl 0(%r13), %eax + testl %eax, %eax + jle .L18 + movq %rbx, %rdx + subq %rbp, %rdx + cmpl $219, %edx + jg .L18 + addq $4, %r13 + xorl %r14d, %r14d + .p2align 4,,10 + .p2align 3 +.L21: + movq %rbp, %rsi + leal -1(%rax), %ecx + leaq .LC8(%rip), %rdx + subq %rbx, %rsi + movq %rbx, %rdi + xorl %eax, %eax + addq $512, %rsi + addl $1, %r14d + call PyPyOS_snprintf at PLT + ;; expected {552(%rsp) | 512(%rsp), 528(%rsp), 536(%rsp), 544(%rsp), %r15, 520(%rsp) | } + movq %rbx, %rdi + call strlen at PLT + ;; expected {552(%rsp) | 512(%rsp), 528(%rsp), 536(%rsp), 544(%rsp), %r15, 520(%rsp) | } + addq %rax, %rbx + movl 0(%r13), %eax + testl %eax, %eax + jle .L18 + cmpl $32, %r14d + je .L18 + movq %rbx, %rdx + addq $4, %r13 + subq %rbp, %rdx + cmpl $219, %edx + jle .L21 + jmp .L18 + .p2align 4,,10 + .p2align 3 +.L16: + leaq .LC9(%rip), %rdx + movq %rbx, %rdi + xorl %eax, %eax + call PyPyOS_snprintf at PLT + ;; expected {552(%rsp) | 512(%rsp), 528(%rsp), 536(%rsp), 544(%rsp), %r15, 520(%rsp) | } + movq %rbx, %rdi + call strlen at PLT + ;; expected {552(%rsp) | 512(%rsp), 528(%rsp), 536(%rsp), 544(%rsp), %r15, 520(%rsp) | } + addq %rax, %rbx +.L18: + movq %rbp, %rsi + leaq .LC10(%rip), %rdx + movq %r12, %rcx + subq %rbx, %rsi + movq %rbx, %rdi + xorl %eax, %eax + addq $512, %rsi + call PyPyOS_snprintf at PLT + ;; expected {552(%rsp) | 512(%rsp), 528(%rsp), 536(%rsp), 544(%rsp), %r15, 520(%rsp) | } + movq %rbp, %r8 + jmp .L11 + .cfi_endproc +.LFE77: + .size seterror.part.1, .-seterror.part.1 diff --git a/rpython/translator/c/gcc/test/test_trackgcroot.py b/rpython/translator/c/gcc/test/test_trackgcroot.py --- a/rpython/translator/c/gcc/test/test_trackgcroot.py +++ b/rpython/translator/c/gcc/test/test_trackgcroot.py @@ -130,7 +130,7 @@ elif format == 'darwin' or format == 'darwin64': py.test.skip("disabled on OS/X's terribly old gcc") else: - r_globallabel = re.compile(r"([\w]+)=[.]+") + r_globallabel = re.compile(r"([\w.]+)=[.]+") print print path.dirpath().basename + '/' + path.basename lines = path.readlines() diff --git a/rpython/translator/c/gcc/trackgcroot.py b/rpython/translator/c/gcc/trackgcroot.py --- a/rpython/translator/c/gcc/trackgcroot.py +++ b/rpython/translator/c/gcc/trackgcroot.py @@ -8,7 +8,7 @@ from rpython.translator.c.gcc.instruction import InsnSetLocal, InsnCopyLocal from rpython.translator.c.gcc.instruction import InsnPrologue, InsnEpilogue from rpython.translator.c.gcc.instruction import InsnGCROOT, InsnCondJump -from rpython.translator.c.gcc.instruction import InsnStackAdjust +from rpython.translator.c.gcc.instruction import InsnStackAdjust, InsnPushed from rpython.translator.c.gcc.instruction import InsnCannotFollowEsp from rpython.translator.c.gcc.instruction import LocalVar, somenewvalue from rpython.translator.c.gcc.instruction import frameloc_esp, frameloc_ebp @@ -665,14 +665,22 @@ match = self.r_unaryinsn.match(line) source = match.group(1) return self.insns_for_copy(source, self.TOP_OF_STACK_MINUS_WORD) + \ - [InsnStackAdjust(-self.WORD)] + [InsnPushed(-self.WORD)] def _visit_pop(self, target): return [InsnStackAdjust(+self.WORD)] + \ self.insns_for_copy(self.TOP_OF_STACK_MINUS_WORD, target) def _visit_prologue(self): - # for the prologue of functions that use %ebp as frame pointer + # For the prologue of functions that use %ebp as frame pointer. + # First, find the latest InsnStackAdjust; if it's not a PUSH, + # then consider that this 'mov %rsp, %rbp' is actually unrelated + i = -1 + while not isinstance(self.insns[i], InsnStackAdjust): + i -= 1 + if not isinstance(self.insns[i], InsnPushed): + return [] + # self.uses_frame_pointer = True self.r_localvar = self.r_localvarfp return [InsnPrologue(self.WORD)] From noreply at buildbot.pypy.org Mon Jun 2 17:24:02 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:02 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: another test rewrite... Message-ID: <20140602152402.54F6B1C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71822:1d6f4c0637ba Date: 2014-05-14 16:15 +0200 http://bitbucket.org/pypy/pypy/changeset/1d6f4c0637ba/ Log: another test rewrite... diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -1,5 +1,5 @@ import py -from rpython.rtyper.lltypesystem import lltype, llmemory +from rpython.rtyper.lltypesystem import lltype, llmemory, llarena from rpython.memory.gc.incminimark import IncrementalMiniMarkGC from test_direct import BaseDirectGCTest @@ -10,6 +10,32 @@ class PinningGCTest(BaseDirectGCTest): + def test_pin_can_move(self): + # even a pinned object is considered to be movable. Only the caller + # of pin() knows if it is currently movable or not. + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + assert self.gc.can_move(adr) + assert self.gc.pin(adr) + assert self.gc.can_move(adr) + + def test_pin_twice(self): + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + assert self.gc.pin(adr) + assert not self.gc.pin(adr) + + def test_unpin_not_pinned(self): + # this test checks a requirement of the unpin() interface + ptr = self.malloc(S) + py.test.raises(Exception, + self.gc.unpin, llmemory.cast_ptr_to_adr(ptr)) + + # XXX test with multiple mallocs, and only part of them is pinned + +class TestIncminimark(PinningGCTest): + from rpython.memory.gc.incminimark import IncrementalMiniMarkGC as GCClass + def test_simple_pin(self): ptr = self.malloc(S) ptr.someInt = 100 @@ -23,7 +49,24 @@ assert self.gc.is_in_nursery(adr) assert ptr.someInt == 100 - # XXX not implemented yet + def test_simple_pin_unpin(self): + ptr = self.malloc(S) + ptr.someInt = 100 + self.stackroots.append(ptr) + adr = llmemory.cast_ptr_to_adr(ptr) + # check if pin worked + assert self.gc.pin(adr) + self.gc.collect() + assert self.gc.is_in_nursery(adr) + assert ptr.someInt == 100 + # unpin and check if object is gone from nursery + self.gc.unpin(adr) + self.gc.collect() + py.test.raises(RuntimeError, 'ptr.someInt') + ptr_old = self.stackroots[0] + assert ptr_old.someInt == 100 + + @py.test.mark.xfail(reason="Not implemented yet", run=False) def test_pin_referenced_from_stackroot(self): root_ptr = self.malloc(S) next_ptr = self.malloc(S) @@ -40,29 +83,6 @@ assert next_ptr.someInt == 100 root_ptr = self.stackroots[0] assert root_ptr.next == next_ptr - - - def test_pin_can_move(self): - # even a pinned object is considered to be movable. Only the caller - # of pin() knows if it is currently movable or not. - ptr = self.malloc(S) - adr = llmemory.cast_ptr_to_adr(ptr) - assert self.gc.can_move(adr) - assert self.gc.pin(adr) - assert self.gc.can_move(adr) - - def test_pin_twice(self): - ptr = self.malloc(S) - adr = llmemory.cast_ptr_to_adr(ptr) - assert self.gc.pin(adr) - assert not self.gc.pin(adr) - - # XXX test with multiple mallocs, and only part of them is pinned - # XXX test unpin() - - -class TestIncminimark(PinningGCTest): - from rpython.memory.gc.incminimark import IncrementalMiniMarkGC as GCClass def test_pin_old(self): ptr = self.malloc(S) From noreply at buildbot.pypy.org Mon Jun 2 17:24:03 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:03 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: added failing test that shouldn't fail. Message-ID: <20140602152403.845001C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71823:da5833a62ffa Date: 2014-05-14 16:16 +0200 http://bitbucket.org/pypy/pypy/changeset/da5833a62ffa/ Log: added failing test that shouldn't fail. diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -97,6 +97,24 @@ # ^^^ should not be possible, struct is already old and won't # move. + def test_pin_malloc_pin(self): + first_ptr = self.malloc(S) + first_ptr.someInt = 101 + self.stackroots.append(first_ptr) + assert self.gc.pin(llmemory.cast_ptr_to_adr(first_ptr)) + + self.gc.collect() + assert first_ptr.someInt == 101 + + second_ptr = self.malloc(S) + second_ptr.someInt = 102 + self.stackroots.append(second_ptr) + assert self.gc.pin(llmemory.cast_ptr_to_adr(second_ptr)) + + self.gc.collect() + assert first_ptr.someInt == 101 + assert second_ptr.someInt == 102 + # XXX test/define what happens if we try to pin an object that is too # big for the nursery and will be raw-malloc'ed. From noreply at buildbot.pypy.org Mon Jun 2 17:24:04 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:04 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: fix for `test_pin_malloc_pin` testcase. Message-ID: <20140602152404.BB5391C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71824:d0a8ba4741c0 Date: 2014-05-14 16:52 +0200 http://bitbucket.org/pypy/pypy/changeset/d0a8ba4741c0/ Log: fix for `test_pin_malloc_pin` testcase. This seems to fix also the problem running the RFile example with 10'000 iterations and reading 512 Bytes each time. The fix feels correct but I must investigate more to be sure, added a # XXX therefore. diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -723,8 +723,9 @@ # move search area to the next free memory block in the # nursery. self.nursery_free = self.nursery_top + pinned_obj_size - self.move_nursery_top(llarena.getfakearenaaddress( - self.nursery_barriers.popleft()) - self.nursery_free) + # XXX should be, but check if the new area + # (nursery_free to nursery_top) was reset (arena_reset()). (groggi) + self.nursery_top = self.nursery_barriers.popleft() else: count += 1 # @@ -766,7 +767,6 @@ ll_assert(count == 2, "Seeing minor_collection() at least twice. " "Too many pinned objects?") - # # attempt to get 'totalzise' out of the nursery now. This may # fail again, and then we loop. Should be the uncommon case. From noreply at buildbot.pypy.org Mon Jun 2 17:24:06 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:06 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: added new test that succeed but must be investigated to know that this Message-ID: <20140602152406.069C61C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71825:50ae1687bc68 Date: 2014-05-16 13:09 +0200 http://bitbucket.org/pypy/pypy/changeset/50ae1687bc68/ Log: added new test that succeed but must be investigated to know that this is not happening by chance. diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -31,6 +31,30 @@ py.test.raises(Exception, self.gc.unpin, llmemory.cast_ptr_to_adr(ptr)) + def test_pin_id(self): + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + self.stackroots.append(ptr) + ptr.someInt = 100 + assert self.gc.pin(adr) + # XXX incminimark: leads to a shadow. + # Check if this really works. (groggi) + self.gc.id(ptr) + self.gc.collect() + assert ptr.someInt == 100 + + def test_pin_hash(self): + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + self.stackroots.append(ptr) + ptr.someInt = 100 + assert self.gc.pin(adr) + # XXX incminimark: leads to a shadow. + # Check if this really works. (groggi) + self.gc.identityhash(ptr) + self.gc.collect() + assert ptr.someInt == 100 + # XXX test with multiple mallocs, and only part of them is pinned class TestIncminimark(PinningGCTest): From noreply at buildbot.pypy.org Mon Jun 2 17:24:07 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:07 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: minor tweaks Message-ID: <20140602152407.50B4A1C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71826:5cb4decd574d Date: 2014-05-18 13:08 +0200 http://bitbucket.org/pypy/pypy/changeset/5cb4decd574d/ Log: minor tweaks diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -707,7 +707,7 @@ # XXX update doc to contain nursery_barrier (groggi) # keep track how many iteration we've gone trough - count = 0 + minor_collection_count = 0 while True: if self.nursery_barriers.non_empty(): # we have multiple blocks of free memory in the nursery @@ -726,8 +726,10 @@ # XXX should be, but check if the new area # (nursery_free to nursery_top) was reset (arena_reset()). (groggi) self.nursery_top = self.nursery_barriers.popleft() + # XXX should we progress a step in the major collection? + # in original version this is always done if we call this + # function. (groggi) else: - count += 1 # # no barriers (i.e. pinned objects) left. Check if there is # enough space till we reach the real top of the nursery. @@ -736,7 +738,8 @@ return prev_result # self.minor_collection() - if count == 1: + if minor_collection_count == 0: + minor_collection_count += 1 # # If the gc_state is not STATE_SCANNING, we're in the middle of # an incremental major collection. In this case, always progress @@ -754,17 +757,14 @@ # if self.nursery_free + totalsize > self.nursery_real_top: self.minor_collection() - # then the nursery is empty - # XXX ^^^ not necessarily, update comment (groggi) + # then the nursery is empty (except pinned objects) else: - # we just need to clean up a bit more of the nursery - #self.move_nursery_top(totalsize) - # do a loop, should take care of finding space - # XXX ^^^ rewrite comment the moment we're sure it's - # the correct way. + # execute loop one more time. This should find + # enough space in most cases to allocate the + # object pass else: - ll_assert(count == 2, + ll_assert(minor_collection_count >= 1, "Seeing minor_collection() at least twice. " "Too many pinned objects?") # From noreply at buildbot.pypy.org Mon Jun 2 17:24:08 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:08 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: another XXX todo for our test set Message-ID: <20140602152408.7FB091C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71827:aeb7a384e60a Date: 2014-05-18 13:08 +0200 http://bitbucket.org/pypy/pypy/changeset/aeb7a384e60a/ Log: another XXX todo for our test set diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -144,3 +144,6 @@ # XXX test/define what happens if pinned object already has a shadow # => shadow handling. + + # XXX fill nursery with pinned objects -> + define behavior for such a + # case From noreply at buildbot.pypy.org Mon Jun 2 17:24:09 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:09 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: move tests around Message-ID: <20140602152409.C6E681C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71828:1bc4c3b0bd3a Date: 2014-05-19 21:24 +0200 http://bitbucket.org/pypy/pypy/changeset/1bc4c3b0bd3a/ Log: move tests around shadowing is a GC specific feature, test on incminimark only for now diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -31,30 +31,6 @@ py.test.raises(Exception, self.gc.unpin, llmemory.cast_ptr_to_adr(ptr)) - def test_pin_id(self): - ptr = self.malloc(S) - adr = llmemory.cast_ptr_to_adr(ptr) - self.stackroots.append(ptr) - ptr.someInt = 100 - assert self.gc.pin(adr) - # XXX incminimark: leads to a shadow. - # Check if this really works. (groggi) - self.gc.id(ptr) - self.gc.collect() - assert ptr.someInt == 100 - - def test_pin_hash(self): - ptr = self.malloc(S) - adr = llmemory.cast_ptr_to_adr(ptr) - self.stackroots.append(ptr) - ptr.someInt = 100 - assert self.gc.pin(adr) - # XXX incminimark: leads to a shadow. - # Check if this really works. (groggi) - self.gc.identityhash(ptr) - self.gc.collect() - assert ptr.someInt == 100 - # XXX test with multiple mallocs, and only part of them is pinned class TestIncminimark(PinningGCTest): @@ -139,6 +115,26 @@ assert first_ptr.someInt == 101 assert second_ptr.someInt == 102 + def test_pin_shadow_1(self): + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + self.stackroots.append(ptr) + ptr.someInt = 100 + assert self.gc.pin(adr) + self.gc.id(ptr) # allocate shadow + self.gc.collect() + assert ptr.someInt == 100 + + def test_pin_shadow_2(self): + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + self.stackroots.append(ptr) + ptr.someInt = 100 + assert self.gc.pin(adr) + self.gc.identityhash(ptr) # allocate shadow + self.gc.collect() + assert ptr.someInt == 100 + # XXX test/define what happens if we try to pin an object that is too # big for the nursery and will be raw-malloc'ed. From noreply at buildbot.pypy.org Mon Jun 2 17:24:11 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:11 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: imported test from gc-minimark-pinning branch Message-ID: <20140602152411.0427C1C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71829:8785c1ae62ff Date: 2014-05-19 21:25 +0200 http://bitbucket.org/pypy/pypy/changeset/8785c1ae62ff/ Log: imported test from gc-minimark-pinning branch test_pin_1 from that older branch not importated as yet unclear what the exact use-case is or if it even adds something to the test set diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -135,6 +135,27 @@ self.gc.collect() assert ptr.someInt == 100 + def test_pin_shadow_3(self): + s = self.malloc(S) + self.stackroots.append(s) + self.gc.id(s) # allocate shadow + self.gc.pin(llmemory.cast_ptr_to_adr(s)) + + print("free: %s" % self.gc.nursery_free) + print("top: %s" % self.gc.nursery_top) + self.gc.minor_collection() + # XXX it seems like we adjust nursery_free wrong after the minor + # collection or there is some other bug. (groggi) + print("free: %s" % self.gc.nursery_free) + print("top: %s" % self.gc.nursery_top) + + self.gc.unpin(llmemory.cast_ptr_to_adr(s)) + assert self.gc.nursery_free != self.gc.nursery + # we still have a pinned object + self.gc.minor_collection() + assert self.gc.nursery_free == self.gc.nursery + # we don't have a pinned object any more + # XXX test/define what happens if we try to pin an object that is too # big for the nursery and will be raw-malloc'ed. From noreply at buildbot.pypy.org Mon Jun 2 17:24:12 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:12 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: extended and corrected pinning tests to represent the current approach Message-ID: <20140602152412.462671C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71830:ca3298f5878c Date: 2014-05-20 13:36 +0200 http://bitbucket.org/pypy/pypy/changeset/ca3298f5878c/ Log: extended and corrected pinning tests to represent the current approach diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -122,8 +122,13 @@ ptr.someInt = 100 assert self.gc.pin(adr) self.gc.id(ptr) # allocate shadow - self.gc.collect() + self.gc.minor_collection() + assert self.gc.is_in_nursery(adr) assert ptr.someInt == 100 + self.gc.unpin(adr) + self.gc.minor_collection() # move to shadow + adr = llmemory.cast_ptr_to_adr(self.stackroots[0]) + assert not self.gc.is_in_nursery(adr) def test_pin_shadow_2(self): ptr = self.malloc(S) @@ -132,29 +137,39 @@ ptr.someInt = 100 assert self.gc.pin(adr) self.gc.identityhash(ptr) # allocate shadow - self.gc.collect() + self.gc.minor_collection() + assert self.gc.is_in_nursery(adr) assert ptr.someInt == 100 + self.gc.unpin(adr) + self.gc.minor_collection() # move to shadow + adr = llmemory.cast_ptr_to_adr(self.stackroots[0]) + assert not self.gc.is_in_nursery(adr) def test_pin_shadow_3(self): - s = self.malloc(S) - self.stackroots.append(s) - self.gc.id(s) # allocate shadow - self.gc.pin(llmemory.cast_ptr_to_adr(s)) - - print("free: %s" % self.gc.nursery_free) - print("top: %s" % self.gc.nursery_top) + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + ptr.someInt = 100 # not used, just nice to have for identification + self.stackroots.append(ptr) + self.gc.id(ptr) # allocate shadow + + assert self.gc.pin(adr) + self.gc.minor_collection() # object stays in nursery + assert self.gc.is_in_nursery(adr) + + self.gc.unpin(adr) + # we still have a pinned object at the beginning. There is no space left + # to malloc an object before the pinned one. + assert self.gc.is_in_nursery(adr) + assert self.gc.nursery_free == self.gc.nursery + assert self.gc.nursery_top == self.gc.nursery + self.gc.minor_collection() - # XXX it seems like we adjust nursery_free wrong after the minor - # collection or there is some other bug. (groggi) - print("free: %s" % self.gc.nursery_free) - print("top: %s" % self.gc.nursery_top) - - self.gc.unpin(llmemory.cast_ptr_to_adr(s)) - assert self.gc.nursery_free != self.gc.nursery - # we still have a pinned object - self.gc.minor_collection() + # we don't have a pinned object any more. There is now space left at + # the beginning of our nursery for new objects. + adr = llmemory.cast_ptr_to_adr(self.stackroots[0]) + assert not self.gc.is_in_nursery(adr) assert self.gc.nursery_free == self.gc.nursery - # we don't have a pinned object any more + assert self.gc.nursery_top > self.gc.nursery # XXX test/define what happens if we try to pin an object that is too # big for the nursery and will be raw-malloc'ed. From noreply at buildbot.pypy.org Mon Jun 2 17:24:13 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:13 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: implemented shadow handling, passing all three available tests. Message-ID: <20140602152413.7BC941C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71831:cb3c392ce68b Date: 2014-05-20 13:38 +0200 http://bitbucket.org/pypy/pypy/changeset/cb3c392ce68b/ Log: implemented shadow handling, passing all three available tests. diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -723,8 +723,6 @@ # move search area to the next free memory block in the # nursery. self.nursery_free = self.nursery_top + pinned_obj_size - # XXX should be, but check if the new area - # (nursery_free to nursery_top) was reset (arena_reset()). (groggi) self.nursery_top = self.nursery_barriers.popleft() # XXX should we progress a step in the major collection? # in original version this is always done if we call this @@ -1490,6 +1488,14 @@ self.old_objects_with_cards_set.append(dest_addr) dest_hdr.tid |= GCFLAG_CARDS_SET + def record_pinned_object_with_shadow(self, obj, new_shadow_object_dict): + # checks if the pinned object has a shadow and if so add it to the + # dict of shadows. + obj = obj + self.gcheaderbuilder.size_gc_header + shadow = self.nursery_objects_shadows.get(obj) + if shadow != NULL: + new_shadow_object_dict.setitem(obj, shadow) + # ---------- # Nursery collection @@ -1499,6 +1505,22 @@ # debug_start("gc-minor") # + # All nursery barriers right now are invalid from this point on. They + # are evaluated anew as part of a minor collection. + self.nursery_barriers.delete() + # + # Keeps track of surviving pinned objects. See also `_trace_drag_out()` + # where this stack is filled. + self.surviving_pinned_objects = self.AddressStack() + # + # The following counter keeps track of the amount of alive and pinned + # objects inside the nursery. The counter is reset, as we have to + # check which pinned objects are actually still alive. Pinning an + # object does not prevent the removal of an object, if it's not used + # anymore. + # XXX is this true? does it make sense? (groggi) + self.pinned_objects_in_nursery = 0 + # # Before everything else, remove from 'old_objects_pointing_to_young' # the young arrays. if self.young_rawmalloced_objects: @@ -1513,18 +1535,6 @@ self.old_objects_pointing_to_young.foreach( self._add_to_more_objects_to_trace, None) # - # Keeps track of surviving pinned objects. See also `_trace_drag_out()` - # where this stack is filled. - self.surviving_pinned_objects = self.AddressStack() - # - # The following counter keeps track of the amount of alive and pinned - # objects inside the nursery. The counter is reset, as we have to - # check which pinned objects are actually still alive. Pinning an - # object does not prevent the removal of an object, if it's not used - # anymore. - # XXX is this true? does it make sense? (groggi) - self.pinned_objects_in_nursery = 0 - # # First, find the roots that point to young objects. All nursery # objects found are copied out of the nursery, and the occasional # young raw-malloced object is flagged with GCFLAG_VISITED_RMY. @@ -1563,11 +1573,20 @@ if self.young_objects_with_light_finalizers.non_empty(): self.deal_with_young_objects_with_finalizers() # - # Clear this mapping. - # XXX gc-minimark-pinning contains some additional code - # in regard to pinned object. TODO (groggi) + # Clear this mapping. Without pinned objects we just clear the dict + # as all objects in the nursery are dragged out of the nursery and, if + # needed, into their shadow. However, if we have pinned objects we have + # to check if those pinned object have a shadow and keep a dictionary + # filled with shadow information for them as they stay in the nursery. if self.nursery_objects_shadows.length() > 0: - self.nursery_objects_shadows.clear() + if self.surviving_pinned_objects.non_empty(): + new_shadows = self.AddressDict() + self.surviving_pinned_objects.foreach( + self.record_pinned_object_with_shadow, new_shadows) + self.nursery_objects_shadows.delete() + self.nursery_objects_shadows = new_shadows + else: + self.nursery_objects_shadows.clear() # # Walk the list of young raw-malloced objects, and either free # them or make them old. @@ -1607,27 +1626,20 @@ (size_gc_header + self.get_size(obj)) # # clear the rest of the arena + # XXX resetting just to self.nursery_top may be enough? (groggi) llarena.arena_reset(prev, self.nursery_real_top - prev, 2) # ^^^ calculate the size of the last continuous # arena block. # + debug_print("last: resetting arena from %s for size: %s" % + (prev, self.nursery_real_top - prev)) self.surviving_pinned_objects.delete() - self.nursery_barriers.delete() self.nursery_barriers = nursery_barriers # XXX gc-minimark-pinning does a debug_rotate_nursery() here (groggi) self.nursery_free = self.nursery - self.nursery_barriers.append(self.nursery + self.nursery_size) + self.nursery_barriers.append(self.nursery_real_top) self.nursery_top = self.nursery_barriers.popleft() -# All live nursery objects are out, and the rest dies. Fill -# the nursery up to the cleanup point with zeros -# llarena.arena_reset(self.nursery, self.nursery_size, 0) -# llarena.arena_reset(self.nursery, self.initial_cleanup, 2) -# self.debug_rotate_nursery() -# self.nursery_free = self.nursery -# self.nursery_top = self.nursery + self.initial_cleanup -# self.nursery_real_top = self.nursery + self.nursery_size - # debug_print("minor collect, total memory used:", self.get_total_memory_used()) debug_print("number of pinned objects:", @@ -1804,7 +1816,6 @@ newhdr = self._malloc_out_of_nursery(totalsize) # elif self.is_forwarded(obj): - # XXX check if GCFLAG_PINNED is relevant for this case (groggi) # # 'obj' was already forwarded. Change the original reference # to point to its forwarding address, and we're done. From noreply at buildbot.pypy.org Mon Jun 2 17:24:15 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:15 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: remove solved XXX and unused imports Message-ID: <20140602152415.75CDE1C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71832:796ec55adced Date: 2014-05-20 13:41 +0200 http://bitbucket.org/pypy/pypy/changeset/796ec55adced/ Log: remove solved XXX and unused imports diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -456,12 +456,10 @@ annmodel.SomeInteger(nonneg=True)], annmodel.s_None) - # XXX understand this, correct? (groggi) self.pin_ptr = getfn(GCClass.pin, [s_gc, SomeAddress()], annmodel.SomeBool()) - # XXX understand this, correct? (groggi) self.unpin_ptr = getfn(GCClass.unpin, [s_gc, SomeAddress()], annmodel.s_None) diff --git a/rpython/memory/gcwrapper.py b/rpython/memory/gcwrapper.py --- a/rpython/memory/gcwrapper.py +++ b/rpython/memory/gcwrapper.py @@ -5,9 +5,6 @@ from rpython.memory import gctypelayout from rpython.flowspace.model import Constant -# XXX remove (groggi) -from rpython.rlib.debug import debug_print, debug_start, debug_stop - class GCManagedHeap(object): From noreply at buildbot.pypy.org Mon Jun 2 17:24:16 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:16 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: remove solved XXX Message-ID: <20140602152416.B2D391C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71833:f080d6ebf00e Date: 2014-05-20 13:42 +0200 http://bitbucket.org/pypy/pypy/changeset/f080d6ebf00e/ Log: remove solved XXX diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -174,8 +174,5 @@ # XXX test/define what happens if we try to pin an object that is too # big for the nursery and will be raw-malloc'ed. - # XXX test/define what happens if pinned object already has a shadow - # => shadow handling. - # XXX fill nursery with pinned objects -> + define behavior for such a # case From noreply at buildbot.pypy.org Mon Jun 2 17:24:17 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:17 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: add XXX with possible solution hint for not implemented feature Message-ID: <20140602152417.EF6881C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71834:1bade3ea1e85 Date: 2014-05-20 13:47 +0200 http://bitbucket.org/pypy/pypy/changeset/1bade3ea1e85/ Log: add XXX with possible solution hint for not implemented feature diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -68,6 +68,8 @@ @py.test.mark.xfail(reason="Not implemented yet", run=False) def test_pin_referenced_from_stackroot(self): + # XXX most likely somehow connected with `old_objects_pointing_to_young` + # (groggi) root_ptr = self.malloc(S) next_ptr = self.malloc(S) self.write(root_ptr, 'next', next_ptr) From noreply at buildbot.pypy.org Mon Jun 2 17:24:19 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:19 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: introduce `can_usually_pin_objects` to gc base class Message-ID: <20140602152419.433F01C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71835:f51dc9dc5677 Date: 2014-05-20 14:31 +0200 http://bitbucket.org/pypy/pypy/changeset/f51dc9dc5677/ Log: introduce `can_usually_pin_objects` to gc base class This is based on the same introduction in gc-minimark-pinning. It's useful for tests right now. diff --git a/rpython/memory/gc/base.py b/rpython/memory/gc/base.py --- a/rpython/memory/gc/base.py +++ b/rpython/memory/gc/base.py @@ -18,6 +18,7 @@ needs_write_barrier = False malloc_zero_filled = False prebuilt_gc_objects_are_static_roots = True + can_usually_pin_objects = False object_minimal_size = 0 gcflag_extra = 0 # or a real GC flag that is always 0 when not collecting diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -180,6 +180,7 @@ needs_write_barrier = True prebuilt_gc_objects_are_static_roots = False malloc_zero_filled = True # xxx experiment with False + can_usually_pin_objects = True gcflag_extra = GCFLAG_EXTRA # All objects start with a HDR, i.e. with a field 'tid' which contains From noreply at buildbot.pypy.org Mon Jun 2 17:24:20 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:20 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: remove forgotten debug_start/debug_stop Message-ID: <20140602152420.7CC2F1C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71836:3d936370c8aa Date: 2014-05-20 14:31 +0200 http://bitbucket.org/pypy/pypy/changeset/3d936370c8aa/ Log: remove forgotten debug_start/debug_stop diff --git a/rpython/memory/gcwrapper.py b/rpython/memory/gcwrapper.py --- a/rpython/memory/gcwrapper.py +++ b/rpython/memory/gcwrapper.py @@ -123,14 +123,10 @@ return self.gc.can_move(addr) def pin(self, addr): - debug_start("groggi-gcwrapper-pin") - debug_stop("groggi-gcwrapper-pin") return self.gc.pin(addr) def unpin(self, addr): - debug_start("groggi-gcwrapper-unpin") self.gc.unpin(addr) - debug_stop("groggi-gcwrapper-unpin") def weakref_create_getlazy(self, objgetter): # we have to be lazy in reading the llinterp variable containing From noreply at buildbot.pypy.org Mon Jun 2 17:24:21 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:21 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: import test from gc-minimark-pinning. Message-ID: <20140602152421.B79181C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71837:98f3f8e85506 Date: 2014-05-20 14:32 +0200 http://bitbucket.org/pypy/pypy/changeset/98f3f8e85506/ Log: import test from gc-minimark-pinning. diff --git a/rpython/memory/test/gc_test_base.py b/rpython/memory/test/gc_test_base.py --- a/rpython/memory/test/gc_test_base.py +++ b/rpython/memory/test/gc_test_base.py @@ -792,6 +792,24 @@ assert rgc.get_gcflag_extra(a2) == False self.interpret(fn, []) + def test_pinning(self): + def fn(n): + s = str(n) + if not rgc.can_move(s): + return 13 + res = int(rgc.pin(s)) + if res: + rgc.unpin(s) + return res + + res = self.interpret(fn, [10]) + if not self.GCClass.moving_gc: + assert res == 13 + elif self.GCClass.can_usually_pin_objects: + assert res == 1 + else: + assert res == 0 or res == 13 + from rpython.rlib.objectmodel import UnboxedValue class TaggedBase(object): From noreply at buildbot.pypy.org Mon Jun 2 17:24:22 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:22 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: added new rffi.*_nonmovingbuffer API to RSocket Message-ID: <20140602152422.ED8681C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71838:1279f06e2bb1 Date: 2014-05-20 14:43 +0200 http://bitbucket.org/pypy/pypy/changeset/1279f06e2bb1/ Log: added new rffi.*_nonmovingbuffer API to RSocket diff --git a/rpython/rlib/rsocket.py b/rpython/rlib/rsocket.py --- a/rpython/rlib/rsocket.py +++ b/rpython/rlib/rsocket.py @@ -878,18 +878,18 @@ """Send a data string to the socket. For the optional flags argument, see the Unix manual. Return the number of bytes sent; this may be less than len(data) if the network is busy.""" - dataptr = rffi.get_nonmovingbuffer(data) + dataptr, is_pinned, is_raw = rffi.get_nonmovingbuffer(data) try: return self.send_raw(dataptr, len(data), flags) finally: - rffi.free_nonmovingbuffer(data, dataptr) + rffi.free_nonmovingbuffer(data, dataptr, is_pinned, is_raw) def sendall(self, data, flags=0, signal_checker=None): """Send a data string to the socket. For the optional flags argument, see the Unix manual. This calls send() repeatedly until all data is sent. If an error occurs, it's impossible to tell how much data has been sent.""" - dataptr = rffi.get_nonmovingbuffer(data) + dataptr, is_pinned, is_raw = rffi.get_nonmovingbuffer(data) try: remaining = len(data) p = dataptr @@ -904,7 +904,7 @@ if signal_checker is not None: signal_checker() finally: - rffi.free_nonmovingbuffer(data, dataptr) + rffi.free_nonmovingbuffer(data, dataptr, is_pinned, is_raw) def sendto(self, data, flags, address): """Like send(data, flags) but allows specifying the destination @@ -1303,7 +1303,7 @@ raise RSocketError("unknown address family") if len(packed) != srcsize: raise ValueError("packed IP wrong length for inet_ntop") - srcbuf = rffi.get_nonmovingbuffer(packed) + srcbuf = rffi.get_nonmovingbuffer(packed, is_pinned, is_raw) try: dstbuf = mallocbuf(dstsize) try: @@ -1314,7 +1314,7 @@ finally: lltype.free(dstbuf, flavor='raw') finally: - rffi.free_nonmovingbuffer(packed, srcbuf) + rffi.free_nonmovingbuffer(packed, srcbuf, is_pinned, is_raw) def setdefaulttimeout(timeout): if timeout < 0.0: From noreply at buildbot.pypy.org Mon Jun 2 17:24:24 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:24 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: remove unnecessary debug_start/debug_stop Message-ID: <20140602152424.326C51C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71839:bbbfcfe326e7 Date: 2014-05-20 21:24 +0200 http://bitbucket.org/pypy/pypy/changeset/bbbfcfe326e7/ Log: remove unnecessary debug_start/debug_stop diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py --- a/rpython/rlib/rgc.py +++ b/rpython/rlib/rgc.py @@ -8,9 +8,6 @@ from rpython.rtyper.extregistry import ExtRegistryEntry from rpython.rtyper.lltypesystem import lltype, llmemory -# XXX remove (groggi) -from rpython.rlib.debug import debug_print, debug_start, debug_stop - # ____________________________________________________________ # General GC features @@ -39,8 +36,6 @@ # XXX doc string based on gc-minimark-pinning branch # XXX use doc string a basis for implementation behavior # XXX update doc string to match actual behavior - debug_start("groggi-rgc-pin") - debug_stop("groggi-rgc-pin") return False def unpin(obj): From noreply at buildbot.pypy.org Mon Jun 2 17:24:25 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:25 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: passing most test_rffi tests again. wip to get all tests running again Message-ID: <20140602152425.6CEDE1C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71840:b3af9dfe4046 Date: 2014-05-20 21:46 +0200 http://bitbucket.org/pypy/pypy/changeset/b3af9dfe4046/ Log: passing most test_rffi tests again. wip to get all tests running again diff --git a/rpython/memory/gctransform/transform.py b/rpython/memory/gctransform/transform.py --- a/rpython/memory/gctransform/transform.py +++ b/rpython/memory/gctransform/transform.py @@ -594,5 +594,11 @@ def gct_gc_can_move(self, hop): return hop.cast_result(rmodel.inputconst(lltype.Bool, False)) + def gct_gc_pin(self, hop): + return hop.cast_result(rmodel.inputconst(lltype.Bool, False)) + + def gct_gc_unpin(self, hop): + pass + def gct_shrink_array(self, hop): return hop.cast_result(rmodel.inputconst(lltype.Bool, False)) From noreply at buildbot.pypy.org Mon Jun 2 17:24:26 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:26 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: introducing scoped_nonmovingbuffer/scoped_nonmoving_unicodebuffer. Message-ID: <20140602152426.C0E8C1C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71841:36698a8ec959 Date: 2014-05-20 21:50 +0200 http://bitbucket.org/pypy/pypy/changeset/36698a8ec959/ Log: introducing scoped_nonmovingbuffer/scoped_nonmoving_unicodebuffer. This is based on the same construct used in gc-minimark-pinning diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py --- a/pypy/module/_multibytecodec/c_codecs.py +++ b/pypy/module/_multibytecodec/c_codecs.py @@ -127,8 +127,7 @@ def decodeex(decodebuf, stringdata, errors="strict", errorcb=None, namecb=None, ignore_error=0): inleft = len(stringdata) - inbuf = rffi.get_nonmovingbuffer(stringdata) - try: + with rffi.scoped_nonmovingbuffer(stringdata) as inbuf: if pypy_cjk_dec_init(decodebuf, inbuf, inleft) < 0: raise MemoryError while True: @@ -140,9 +139,6 @@ src = pypy_cjk_dec_outbuf(decodebuf) length = pypy_cjk_dec_outlen(decodebuf) return rffi.wcharpsize2unicode(src, length) - # - finally: - rffi.free_nonmovingbuffer(stringdata, inbuf) def multibytecodec_decerror(decodebuf, e, errors, errorcb, namecb, stringdata): @@ -171,11 +167,8 @@ assert errorcb replace, end = errorcb(errors, namecb, reason, stringdata, start, end) - inbuf = rffi.get_nonmoving_unicodebuffer(replace) - try: + with rffi.scoped_nonmoving_unicodebuffer(replace) as inbuf: r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, len(replace), end) - finally: - rffi.free_nonmoving_unicodebuffer(replace, inbuf) if r == MBERR_NOMEMORY: raise MemoryError @@ -222,8 +215,7 @@ def encodeex(encodebuf, unicodedata, errors="strict", errorcb=None, namecb=None, ignore_error=0): inleft = len(unicodedata) - inbuf = rffi.get_nonmoving_unicodebuffer(unicodedata) - try: + with rffi.scoped_nonmoving_unicodebuffer(unicodedata) as inbuf: if pypy_cjk_enc_init(encodebuf, inbuf, inleft) < 0: raise MemoryError if ignore_error == 0: @@ -245,9 +237,6 @@ src = pypy_cjk_enc_outbuf(encodebuf) length = pypy_cjk_enc_outlen(encodebuf) return rffi.charpsize2str(src, length) - # - finally: - rffi.free_nonmoving_unicodebuffer(unicodedata, inbuf) def multibytecodec_encerror(encodebuf, e, errors, errorcb, namecb, unicodedata): @@ -287,10 +276,7 @@ assert retu is not None codec = pypy_cjk_enc_getcodec(encodebuf) replace = encode(codec, retu, "strict", errorcb, namecb) - inbuf = rffi.get_nonmovingbuffer(replace) - try: + with rffi.scoped_nonmovingbuffer(replace) as inbuf: r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end) - finally: - rffi.free_nonmovingbuffer(replace, inbuf) if r == MBERR_NOMEMORY: raise MemoryError diff --git a/pypy/module/cpyext/object.py b/pypy/module/cpyext/object.py --- a/pypy/module/cpyext/object.py +++ b/pypy/module/cpyext/object.py @@ -446,11 +446,8 @@ count = space.len_w(w_str) data = space.str_w(w_str) - buf = rffi.get_nonmovingbuffer(data) - try: + with rffi.scoped_nonmovingbuffer(data) as buf: fwrite(buf, 1, count, fp) - finally: - rffi.free_nonmovingbuffer(data, buf) return 0 diff --git a/rpython/rlib/rfile.py b/rpython/rlib/rfile.py --- a/rpython/rlib/rfile.py +++ b/rpython/rlib/rfile.py @@ -133,8 +133,7 @@ if not ll_file: raise ValueError("I/O operation on closed file") assert value is not None - ll_value, is_pinned, is_raw = rffi.get_nonmovingbuffer(value) - try: + with rffi.scoped_nonmovingbuffer(value) as ll_value: # note that since we got a nonmoving buffer, it is either raw # or already cannot move, so the arithmetics below are fine length = len(value) @@ -142,8 +141,6 @@ if bytes != length: errno = rposix.get_errno() raise OSError(errno, os.strerror(errno)) - finally: - rffi.free_nonmovingbuffer(value, ll_value, is_pinned, is_raw) def close(self): """Closes the described file. diff --git a/rpython/rlib/rsocket.py b/rpython/rlib/rsocket.py --- a/rpython/rlib/rsocket.py +++ b/rpython/rlib/rsocket.py @@ -878,19 +878,15 @@ """Send a data string to the socket. For the optional flags argument, see the Unix manual. Return the number of bytes sent; this may be less than len(data) if the network is busy.""" - dataptr, is_pinned, is_raw = rffi.get_nonmovingbuffer(data) - try: + with rffi.scoped_nonmovingbuffer(data) as dataptr: return self.send_raw(dataptr, len(data), flags) - finally: - rffi.free_nonmovingbuffer(data, dataptr, is_pinned, is_raw) def sendall(self, data, flags=0, signal_checker=None): """Send a data string to the socket. For the optional flags argument, see the Unix manual. This calls send() repeatedly until all data is sent. If an error occurs, it's impossible to tell how much data has been sent.""" - dataptr, is_pinned, is_raw = rffi.get_nonmovingbuffer(data) - try: + with rffi.scoped_nonmovingbuffer(data) as dataptr: remaining = len(data) p = dataptr while remaining > 0: @@ -903,8 +899,6 @@ raise if signal_checker is not None: signal_checker() - finally: - rffi.free_nonmovingbuffer(data, dataptr, is_pinned, is_raw) def sendto(self, data, flags, address): """Like send(data, flags) but allows specifying the destination @@ -1303,8 +1297,7 @@ raise RSocketError("unknown address family") if len(packed) != srcsize: raise ValueError("packed IP wrong length for inet_ntop") - srcbuf = rffi.get_nonmovingbuffer(packed, is_pinned, is_raw) - try: + with rffi.scoped_nonmovingbuffer(packed) as srcbuf: dstbuf = mallocbuf(dstsize) try: res = _c.inet_ntop(family, srcbuf, dstbuf, dstsize) @@ -1313,8 +1306,6 @@ return rffi.charp2str(res) finally: lltype.free(dstbuf, flavor='raw') - finally: - rffi.free_nonmovingbuffer(packed, srcbuf, is_pinned, is_raw) def setdefaulttimeout(timeout): if timeout < 0.0: diff --git a/rpython/rlib/rzlib.py b/rpython/rlib/rzlib.py --- a/rpython/rlib/rzlib.py +++ b/rpython/rlib/rzlib.py @@ -171,11 +171,8 @@ Compute the CRC32 checksum of the string, possibly with the given start value, and return it as a unsigned 32 bit integer. """ - bytes = rffi.get_nonmovingbuffer(string) - try: + with rffi.scoped_nonmovingbuffer(string) as bytes: checksum = _crc32(start, rffi.cast(Bytefp, bytes), len(string)) - finally: - rffi.free_nonmovingbuffer(string, bytes) return checksum @@ -186,11 +183,8 @@ Compute the Adler-32 checksum of the string, possibly with the given start value, and return it as a unsigned 32 bit integer. """ - bytes = rffi.get_nonmovingbuffer(string) - try: + with rffi.scoped_nonmovingbuffer(string) as bytes: checksum = _adler32(start, rffi.cast(Bytefp, bytes), len(string)) - finally: - rffi.free_nonmovingbuffer(string, bytes) return checksum # ____________________________________________________________ diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -1133,20 +1133,19 @@ def __init__(self, data): self.data = data def __enter__(self): - self.buf = get_nonmovingbuffer(self.data) + self.buf, self.pinned, self.is_raw = get_nonmovingbuffer(self.data) return self.buf def __exit__(self, *args): - free_nonmovingbuffer(self.data, self.buf) - + free_nonmovingbuffer(self.data, self.buf, self.pinned, self.is_raw) class scoped_nonmoving_unicodebuffer: def __init__(self, data): self.data = data def __enter__(self): - self.buf = get_nonmoving_unicodebuffer(self.data) + self.buf, self.pinned, self.is_raw = get_nonmoving_unicodebuffer(self.data) return self.buf def __exit__(self, *args): - free_nonmoving_unicodebuffer(self.data, self.buf) + free_nonmoving_unicodebuffer(self.data, self.buf, self.pinned, self.is_raw) class scoped_alloc_buffer: def __init__(self, size): diff --git a/rpython/rtyper/lltypesystem/test/test_rffi.py b/rpython/rtyper/lltypesystem/test/test_rffi.py --- a/rpython/rtyper/lltypesystem/test/test_rffi.py +++ b/rpython/rtyper/lltypesystem/test/test_rffi.py @@ -512,7 +512,7 @@ def test_nonmovingbuffer(self): d = 'some cool data that should not move' def f(): - buf = get_nonmovingbuffer(d) + buf, is_pinned, is_raw = get_nonmovingbuffer(d) try: counter = 0 for i in range(len(d)): @@ -520,7 +520,7 @@ counter += 1 return counter finally: - free_nonmovingbuffer(d, buf) + free_nonmovingbuffer(d, buf, is_pinned, is_raw) assert f() == len(d) fn = self.compile(f, [], gcpolicy='ref') assert fn() == len(d) @@ -530,13 +530,13 @@ def f(): counter = 0 for n in range(32): - buf = get_nonmovingbuffer(d) + buf, is_pinned, is_raw = get_nonmovingbuffer(d) try: for i in range(len(d)): if buf[i] == d[i]: counter += 1 finally: - free_nonmovingbuffer(d, buf) + free_nonmovingbuffer(d, buf, is_pinned, is_raw) return counter fn = self.compile(f, [], gcpolicy='semispace') # The semispace gc uses raw_malloc for its internal data structs diff --git a/rpython/rtyper/module/ll_os.py b/rpython/rtyper/module/ll_os.py --- a/rpython/rtyper/module/ll_os.py +++ b/rpython/rtyper/module/ll_os.py @@ -1028,15 +1028,12 @@ def os_write_llimpl(fd, data): count = len(data) rposix.validate_fd(fd) - buf, is_pinned, is_raw = rffi.get_nonmovingbuffer(data) - try: + with rffi.scoped_nonmovingbuffer(data) as buf: written = rffi.cast(lltype.Signed, os_write( rffi.cast(rffi.INT, fd), buf, rffi.cast(rffi.SIZE_T, count))) if written < 0: raise OSError(rposix.get_errno(), "os_write failed") - finally: - rffi.free_nonmovingbuffer(data, buf, is_pinned, is_raw) return written return extdef([int, str], SomeInteger(nonneg=True), From noreply at buildbot.pypy.org Mon Jun 2 17:24:28 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:28 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: remove not used debug_print Message-ID: <20140602152428.1AC551C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71842:d2276aba69b8 Date: 2014-05-22 17:12 +0200 http://bitbucket.org/pypy/pypy/changeset/d2276aba69b8/ Log: remove not used debug_print diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -1632,8 +1632,6 @@ # ^^^ calculate the size of the last continuous # arena block. # - debug_print("last: resetting arena from %s for size: %s" % - (prev, self.nursery_real_top - prev)) self.surviving_pinned_objects.delete() self.nursery_barriers = nursery_barriers # XXX gc-minimark-pinning does a debug_rotate_nursery() here (groggi) From noreply at buildbot.pypy.org Mon Jun 2 17:24:29 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:29 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: reimplement GC transformation methods for pin()/unpin() Message-ID: <20140602152429.521B81C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71843:79956cac29d4 Date: 2014-05-22 17:13 +0200 http://bitbucket.org/pypy/pypy/changeset/79956cac29d4/ Log: reimplement GC transformation methods for pin()/unpin() Still not 100% sure I do the right thing, but tests pass now and it seems correct to me. diff --git a/rpython/memory/gctransform/transform.py b/rpython/memory/gctransform/transform.py --- a/rpython/memory/gctransform/transform.py +++ b/rpython/memory/gctransform/transform.py @@ -347,6 +347,15 @@ [rmodel.inputconst(lltype.Bool, False)], resultvar=op.result) + def gct_gc_pin(self, hop): + op = hop.spaceop + hop.genop("same_as", + [rmodel.inputconst(lltype.Bool, False)], + resultvar=op.result) + + def gct_gc_unpin(self, hop): + pass + def gct_gc_identityhash(self, hop): # must be implemented in the various GCs raise NotImplementedError @@ -594,11 +603,5 @@ def gct_gc_can_move(self, hop): return hop.cast_result(rmodel.inputconst(lltype.Bool, False)) - def gct_gc_pin(self, hop): - return hop.cast_result(rmodel.inputconst(lltype.Bool, False)) - - def gct_gc_unpin(self, hop): - pass - def gct_shrink_array(self, hop): return hop.cast_result(rmodel.inputconst(lltype.Bool, False)) From noreply at buildbot.pypy.org Mon Jun 2 17:24:30 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:30 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: move ExtRegistryEntry classes to their rgc method counterparts Message-ID: <20140602152430.8A5301C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71844:c1d096aacb11 Date: 2014-05-22 17:27 +0200 http://bitbucket.org/pypy/pypy/changeset/c1d096aacb11/ Log: move ExtRegistryEntry classes to their rgc method counterparts diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py --- a/rpython/rlib/rgc.py +++ b/rpython/rlib/rgc.py @@ -38,6 +38,20 @@ # XXX update doc string to match actual behavior return False +class PinEntry(ExtRegistryEntry): + _about_ = pin + + def compute_result_annotation(self, s_arg): + from rpython.annotator.model import s_Bool + return s_Bool + + def specialize_call(self, hop): + hop.exception_cannot_occur() + v_obj, = hop.inputargs(hop.args_r[0]) + v_addr = hop.genop('cast_ptr_to_adr', [v_obj], + resulttype=llmemory.Address) + return hop.genop('gc_pin', [v_addr], resulttype=lltype.Bool) + def unpin(obj): """Unpin 'obj', allowing it to move again. Must only be called after a call to pin(obj) returned True. @@ -46,6 +60,19 @@ raise AssertionError("pin() always returns False, " "so unpin() should not be called") +class UnpinEntry(ExtRegistryEntry): + _about_ = unpin + + def compute_result_annotation(self, s_arg): + pass + + def specialize_call(self, hop): + hop.exception_cannot_occur() + v_obj, = hop.inputargs(hop.args_r[0]) + v_addr = hop.genop('cast_ptr_to_adr', [v_obj], + resulttype=llmemory.Address) + hop.genop('gc_unpin', [v_addr]) + # ____________________________________________________________ # Annotation and specialization @@ -602,32 +629,5 @@ hop.exception_cannot_occur() return hop.genop('gc_gcflag_extra', vlist, resulttype = hop.r_result) -class Entry(ExtRegistryEntry): # XXX understand this, is it correct? (groggi) - _about_ = pin - - def compute_result_annotation(self, s_arg): - from rpython.annotator.model import SomeBool - return SomeBool() - - def specialize_call(self, hop): - hop.exception_cannot_occur() - v_obj, = hop.inputargs(hop.args_r[0]) - v_addr = hop.genop('cast_ptr_to_adr', [v_obj], - resulttype=llmemory.Address) - return hop.genop('gc_pin', [v_addr], resulttype=lltype.Bool) - -class Entry(ExtRegistryEntry): # XXX understand this, is it correct? (groggi) - _about_ = unpin - - def compute_result_annotation(self, s_arg): - pass - - def specialize_call(self, hop): - hop.exception_cannot_occur() - v_obj, = hop.inputargs(hop.args_r[0]) - v_addr = hop.genop('cast_ptr_to_adr', [v_obj], - resulttype=llmemory.Address) - hop.genop('gc_unpin', [v_addr]) - def lltype_is_gc(TP): return getattr(getattr(TP, "TO", None), "_gckind", "?") == 'gc' From noreply at buildbot.pypy.org Mon Jun 2 17:24:31 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:31 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: remove debug_* calls Message-ID: <20140602152431.BFDC91C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71845:a18149ea516a Date: 2014-05-22 17:34 +0200 http://bitbucket.org/pypy/pypy/changeset/a18149ea516a/ Log: remove debug_* calls diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -23,9 +23,6 @@ from rpython.translator.platform import CompilationError import os, sys -# XXX remove (groggi) -from rpython.rlib.debug import debug_print, debug_start, debug_stop - class CConstant(Symbolic): """ A C-level constant, maybe #define, rendered directly. """ @@ -754,36 +751,25 @@ """ # XXX update doc string - debug_start("groggi-get_nonmovingbuffer") - debug_print("data address ", cast_ptr_to_adr(data)) - lldata = llstrtype(data) count = len(data) pinned = False if rgc.can_move(data): if rgc.pin(data): - debug_print("raw_and_pinned: len = %s" % count) pinned = True else: - debug_print("allocating_raw_and_copying: len = %s" % count) - buf = lltype.malloc(TYPEP.TO, count, flavor='raw') copy_string_to_raw(lldata, buf, 0, count) - - debug_stop("groggi-get_nonmovingbuffer") return buf, pinned, True # ^^^ raw malloc used to get a nonmovable copy - else: - debug_print("raw_and_nonmovable: len = %s" % count) - + # # following code is executed if: # - rgc.can_move(data) and rgc.pin(data) both returned true # - rgc.can_move(data) returned false data_start = cast_ptr_to_adr(lldata) + \ offsetof(STRTYPE, 'chars') + itemoffsetof(STRTYPE.chars, 0) - debug_stop("groggi-get_nonmovingbuffer") return cast(TYPEP, data_start), pinned, False # ^^^ already nonmovable. Therefore it's not raw allocated nor # pinned. @@ -802,9 +788,6 @@ # followed the 2nd case in get_nonmovingbuffer(); in the first case, # 'buf' points to its own raw-malloced memory. - debug_start("groggi-free_nonmovingbuffer") - debug_print("data address ", cast_ptr_to_adr(data)) - assert not (is_pinned and is_raw) if is_pinned: @@ -813,10 +796,7 @@ lltype.free(buf, flavor='raw') # if is_pinned and is_raw are false: data was already nonmovable, # we have nothing to clean up - keepalive_until_here(data) - - debug_stop("groggi-free_nonmovingbuffer") free_nonmovingbuffer._annenforceargs_ = [strtype, None, bool, bool] # int -> (char*, str) From noreply at buildbot.pypy.org Mon Jun 2 17:24:33 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:33 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: additional asserts in the spirit of gc-minimark-pinning Message-ID: <20140602152433.008201C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71846:80ba7a7d8065 Date: 2014-05-22 20:45 +0200 http://bitbucket.org/pypy/pypy/changeset/80ba7a7d8065/ Log: additional asserts in the spirit of gc-minimark-pinning diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -1163,11 +1163,11 @@ ll_assert(self.header(obj).tid & GCFLAG_VISITED_RMY == 0, "GCFLAG_VISITED_RMY after collection") else: - # pinned objects are always in the nursery ll_assert(self.is_in_nursery(obj), "pinned object not in nursery") - # XXX gc-minimark-pinning checks for GCFLAG_TRACK_YOUNG_POINTER - # (groggi) + # XXX check if we can support that or if it makes no sense (groggi) + ll_assert(not self.header(obj).tid & GCFLAG_TRACK_YOUNG_PTRS, + "pinned nursery object with GCFLAG_TRACK_YOUNG_PTRS") if self.gc_state == STATE_SCANNING: self._debug_check_object_scanning(obj) @@ -1827,8 +1827,10 @@ # already visited and keeping track of the object return hdr.tid |= GCFLAG_VISITED - # XXX check for object flags that are not supported alongside - # GCFLAG_PINNED (groggi) + # XXX add additional checks for unsupported pinned objects (groggi) + # XXX implement unsupported object types with pinning + ll_assert(not self.header(obj).tid & GCFLAG_HAS_CARDS, + "pinned object with GCFLAG_HAS_CARDS not supported") self.surviving_pinned_objects.append( llarena.getfakearenaaddress(obj - size_gc_header)) self.pinned_objects_in_nursery += 1 From noreply at buildbot.pypy.org Mon Jun 2 17:24:34 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:34 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: added todo item Message-ID: <20140602152434.360491C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71847:4a0eecd88f46 Date: 2014-05-22 22:39 +0200 http://bitbucket.org/pypy/pypy/changeset/4a0eecd88f46/ Log: added todo item diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -787,6 +787,7 @@ # if 'buf' points inside 'data'. This is only possible if we # followed the 2nd case in get_nonmovingbuffer(); in the first case, # 'buf' points to its own raw-malloced memory. + # XXX fix comment (groggi) assert not (is_pinned and is_raw) From noreply at buildbot.pypy.org Mon Jun 2 17:24:35 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:35 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: Fixes the issue with pinning objects that are converted to ll2ctypes. Message-ID: <20140602152435.72C381C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71848:84898085a098 Date: 2014-05-25 19:23 +0200 http://bitbucket.org/pypy/pypy/changeset/84898085a098/ Log: Fixes the issue with pinning objects that are converted to ll2ctypes. This is highly based on changeset 27dcf8e by Armin Rigo. diff --git a/rpython/rtyper/lltypesystem/llarena.py b/rpython/rtyper/lltypesystem/llarena.py --- a/rpython/rtyper/lltypesystem/llarena.py +++ b/rpython/rtyper/lltypesystem/llarena.py @@ -1,4 +1,4 @@ -import array, weakref +import array from rpython.rtyper.lltypesystem import llmemory from rpython.rlib.rarithmetic import is_valid_int @@ -16,8 +16,6 @@ pass class Arena(object): - object_arena_location = {} # {container: (arena, offset)} - old_object_arena_location = weakref.WeakKeyDictionary() _count_arenas = 0 def __init__(self, nbytes, zero): @@ -49,7 +47,7 @@ assert offset >= stop, "object overlaps cleared area" else: obj = ptr._obj - _dictdel(Arena.object_arena_location, obj) + obj.__arena_location__[0] = False # no longer valid del self.objectptrs[offset] del self.objectsizes[offset] obj._free() @@ -111,8 +109,7 @@ self.objectptrs[offset] = objaddr.ptr self.objectsizes[offset] = bytes container = objaddr.ptr._obj - Arena.object_arena_location[container] = self, offset - Arena.old_object_arena_location[container] = self, offset + container.__arena_location__ = [True, self, offset] def shrink_obj(self, offset, newsize): oldbytes = self.objectsizes[offset] @@ -203,12 +200,12 @@ return None, None obj = other.ptr._obj innerobject = False - while obj not in Arena.object_arena_location: + while not getattr(obj, '__arena_location__', (False,))[0]: obj = obj._parentstructure() if obj is None: return None, None # not found in the arena innerobject = True - arena, offset = Arena.object_arena_location[obj] + _, arena, offset = obj.__arena_location__ if innerobject: # 'obj' is really inside the object allocated from the arena, # so it's likely that its address "should be" a bit larger than @@ -272,8 +269,8 @@ def _oldobj_to_address(obj): obj = obj._normalizedcontainer(check=False) try: - arena, offset = Arena.old_object_arena_location[obj] - except KeyError: + _, arena, offset = obj.__arena_location__ + except AttributeError: if obj._was_freed(): msg = "taking address of %r, but it was freed" else: @@ -281,16 +278,6 @@ raise RuntimeError(msg % (obj,)) return arena.getaddr(offset) -def _dictdel(d, key): - # hack - try: - del d[key] - except KeyError: - items = d.items() - d.clear() - d.update(items) - del d[key] - class RoundedUpForAllocation(llmemory.AddressOffset): """A size that is rounded up in order to preserve alignment of objects following it. For arenas containing heterogenous objects. diff --git a/rpython/rtyper/lltypesystem/lltype.py b/rpython/rtyper/lltypesystem/lltype.py --- a/rpython/rtyper/lltypesystem/lltype.py +++ b/rpython/rtyper/lltypesystem/lltype.py @@ -1567,7 +1567,7 @@ return cache[tag] except KeyError: class _struct1(_struct): - __slots__ = flds + __slots__ = tag + ('__arena_location__',) cache[tag] = _struct1 return _struct1 @@ -1653,7 +1653,7 @@ class _array(_parentable): _kind = "array" - __slots__ = ('items',) + __slots__ = ('items', '__arena_location__',) def __init__(self, TYPE, n, initialization=None, parent=None, parentindex=None): if not is_valid_int(n): From noreply at buildbot.pypy.org Mon Jun 2 17:24:36 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:36 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: no apparent reason to restirct to is_pinnd xor is_raw. Message-ID: <20140602152436.A99871C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71849:616a845382ef Date: 2014-05-25 19:24 +0200 http://bitbucket.org/pypy/pypy/changeset/616a845382ef/ Log: no apparent reason to restirct to is_pinnd xor is_raw. diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -789,11 +789,10 @@ # 'buf' points to its own raw-malloced memory. # XXX fix comment (groggi) - assert not (is_pinned and is_raw) if is_pinned: rgc.unpin(data) - elif is_raw: + if is_raw: lltype.free(buf, flavor='raw') # if is_pinned and is_raw are false: data was already nonmovable, # we have nothing to clean up From noreply at buildbot.pypy.org Mon Jun 2 17:24:38 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:38 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: Merge release-2.3.x into gc-incminimark-pinning Message-ID: <20140602152438.5DED41C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71850:0c70ca589f85 Date: 2014-05-28 16:01 +0200 http://bitbucket.org/pypy/pypy/changeset/0c70ca589f85/ Log: Merge release-2.3.x into gc-incminimark-pinning diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -47,7 +47,7 @@ # The short X.Y version. version = '2.3' # The full version, including alpha/beta/rc tags. -release = '2.3.0' +release = '2.3.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst --- a/pypy/doc/how-to-release.rst +++ b/pypy/doc/how-to-release.rst @@ -28,11 +28,6 @@ pypy/doc/tool/makecontributor.py generates the list of contributors * rename pypy/doc/whatsnew_head.rst to whatsnew_VERSION.rst and create a fresh whatsnew_head.rst after the release -* merge PYPY_IRC_TOPIC environment variable handling from previous release - in pypy/doc/getting-started-dev.rst, pypy/doc/man/pypy.1.rst, and - pypy/interpreter/app_main.py so release versions will not print a random - IRC topic by default. -* change the tracker to have a new release tag to file bugs against * go to pypy/tool/release and run: force-builds.py * wait for builds to complete, make sure there are no failures diff --git a/pypy/doc/index-of-release-notes.rst b/pypy/doc/index-of-release-notes.rst --- a/pypy/doc/index-of-release-notes.rst +++ b/pypy/doc/index-of-release-notes.rst @@ -1,19 +1,42 @@ Historical release notes ------------------------- +======================== + +Cpython 2.7 compatible versions +=============================== .. toctree:: + release-2.3.0.rst + release-2.2.1.rst + release-2.2.0.rst + release-2.1.0.rst + release-2.1.0-beta2.rst + release-2.1.0-beta1.rst + release-2.1.0.rst + release-2.0.2.rst + release-2.0.1.rst + release-2.0.0.rst + release-2.0.0-beta2.rst + release-2.0.0-beta1.rst + release-1.9.0.rst + release-1.8.0.rst + release-1.7.0.rst + release-1.6.0.rst + release-1.5.0.rst + release-1.4.1.rst + release-1.4.0beta.rst + release-1.4.0.rst + release-1.3.0.rst + release-1.2.0.rst + release-1.1.0.rst + release-1.0.0.rst + release-0.99.0.rst + release-0.9.0.rst + release-0.8.0.rst + release-0.7.0.rst release-0.6 - release-0.7.0.rst - release-0.8.0.rst - release-0.9.0.rst - release-0.99.0.rst - release-1.0.0.rst - release-1.1.0.rst - release-1.2.0.rst - release-1.3.0.rst - release-1.4.0.rst - release-1.4.0beta.rst - release-1.4.1.rst - release-1.5.0.rst - release-1.6.0.rst + +Cpython 3.2 compatible versions +=============================== +.. toctree:: + release-pypy3-2.1.0-beta1.rst diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst --- a/pypy/doc/index.rst +++ b/pypy/doc/index.rst @@ -40,7 +40,7 @@ * `FAQ`_: some frequently asked questions. -* `Release 2.3.0`_: the latest official release +* `Release 2.3.1`_: the latest official release * `PyPy Blog`_: news and status info about PyPy diff --git a/pypy/doc/whatsnew-2.3.1.rst b/pypy/doc/whatsnew-2.3.1.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/whatsnew-2.3.1.rst @@ -0,0 +1,11 @@ +======================= +What's new since PyPy 2.3? +======================= + +.. this is a revision shortly after release-2.3 +.. startrev: 394146e9bb67 + +Move builtin ``struct`` module to ``_struct`` to allow ``pypy "-m idlelib.idle"`` + +Support compilation with gcc-4.9 + diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -1,6 +1,12 @@ ======================= -What's new in PyPy 2.3+ +What's new in PyPy 2.4+ ======================= .. this is a revision shortly after release-2.3.x .. startrev: b2cc67adbaad + +Added support for the stdlib gdbm module via cffi + +Fixes for issues #1769, #1764, #1762, #1752 + +Annotator cleanups diff --git a/pypy/interpreter/app_main.py b/pypy/interpreter/app_main.py --- a/pypy/interpreter/app_main.py +++ b/pypy/interpreter/app_main.py @@ -590,6 +590,11 @@ # handle the case where no command/filename/module is specified # on the command-line. + try: + from _ast import PyCF_ACCEPT_NULL_BYTES + except ImportError: + PyCF_ACCEPT_NULL_BYTES = 0 + # update sys.path *after* loading site.py, in case there is a # "site.py" file in the script's directory. Only run this if we're # executing the interactive prompt, if we're running a script we @@ -613,7 +618,8 @@ def run_it(): co_python_startup = compile(startup, python_startup, - 'exec') + 'exec', + PyCF_ACCEPT_NULL_BYTES) exec co_python_startup in mainmodule.__dict__ mainmodule.__file__ = python_startup run_toplevel(run_it) @@ -626,7 +632,8 @@ else: # If not interactive, just read and execute stdin normally. def run_it(): - co_stdin = compile(sys.stdin.read(), '', 'exec') + co_stdin = compile(sys.stdin.read(), '', 'exec', + PyCF_ACCEPT_NULL_BYTES) exec co_stdin in mainmodule.__dict__ mainmodule.__file__ = '' success = run_toplevel(run_it) diff --git a/pypy/interpreter/astcompiler/consts.py b/pypy/interpreter/astcompiler/consts.py --- a/pypy/interpreter/astcompiler/consts.py +++ b/pypy/interpreter/astcompiler/consts.py @@ -22,3 +22,4 @@ PyCF_SOURCE_IS_UTF8 = 0x0100 PyCF_DONT_IMPLY_DEDENT = 0x0200 PyCF_ONLY_AST = 0x0400 +PyCF_ACCEPT_NULL_BYTES = 0x10000000 # PyPy only, for compile() diff --git a/pypy/module/__builtin__/compiling.py b/pypy/module/__builtin__/compiling.py --- a/pypy/module/__builtin__/compiling.py +++ b/pypy/module/__builtin__/compiling.py @@ -24,7 +24,8 @@ """ ec = space.getexecutioncontext() if flags & ~(ec.compiler.compiler_flags | consts.PyCF_ONLY_AST | - consts.PyCF_DONT_IMPLY_DEDENT | consts.PyCF_SOURCE_IS_UTF8): + consts.PyCF_DONT_IMPLY_DEDENT | consts.PyCF_SOURCE_IS_UTF8 | + consts.PyCF_ACCEPT_NULL_BYTES): raise OperationError(space.w_ValueError, space.wrap("compile() unrecognized flags")) @@ -53,9 +54,10 @@ else: source = space.readbuf_w(w_source).as_str() - if '\x00' in source: - raise OperationError(space.w_TypeError, space.wrap( - "compile() expected string without null bytes")) + if not (flags & consts.PyCF_ACCEPT_NULL_BYTES): + if '\x00' in source: + raise OperationError(space.w_TypeError, space.wrap( + "compile() expected string without null bytes")) if flags & consts.PyCF_ONLY_AST: code = ec.compiler.compile_to_ast(source, filename, mode, flags) diff --git a/pypy/module/__builtin__/test/test_builtin.py b/pypy/module/__builtin__/test/test_builtin.py --- a/pypy/module/__builtin__/test/test_builtin.py +++ b/pypy/module/__builtin__/test/test_builtin.py @@ -610,6 +610,16 @@ firstlineno = co.co_firstlineno assert firstlineno == 2 + def test_compile_null_bytes(self): + import _ast + raises(TypeError, compile, '\x00', 'mymod', 'exec', 0) + raises(SyntaxError, compile, '\x00', 'mymod', 'exec', + _ast.PyCF_ACCEPT_NULL_BYTES) + src = "#abc\x00def\n" + raises(TypeError, compile, src, 'mymod', 'exec') + raises(TypeError, compile, src, 'mymod', 'exec', 0) + compile(src, 'mymod', 'exec', _ast.PyCF_ACCEPT_NULL_BYTES) # works + def test_print_function(self): import __builtin__ import sys diff --git a/pypy/module/_ast/__init__.py b/pypy/module/_ast/__init__.py --- a/pypy/module/_ast/__init__.py +++ b/pypy/module/_ast/__init__.py @@ -6,6 +6,8 @@ interpleveldefs = { "PyCF_ONLY_AST" : "space.wrap(%s)" % consts.PyCF_ONLY_AST, + "PyCF_ACCEPT_NULL_BYTES": + "space.wrap(%s)" % consts.PyCF_ACCEPT_NULL_BYTES, "__version__" : "space.wrap('82160')", # from CPython's svn. } appleveldefs = {} diff --git a/pypy/module/_file/interp_file.py b/pypy/module/_file/interp_file.py --- a/pypy/module/_file/interp_file.py +++ b/pypy/module/_file/interp_file.py @@ -209,11 +209,13 @@ while size > 0: # "peeks" on the underlying stream to see how many chars # we can safely read without reading past an end-of-line - peeked = stream.peek() - pn = peeked.find("\n", 0, size) + startindex, peeked = stream.peek() + assert 0 <= startindex <= len(peeked) + endindex = startindex + size + pn = peeked.find("\n", startindex, endindex) if pn < 0: - pn = min(size-1, len(peeked)) - c = stream.read(pn + 1) + pn = min(endindex - 1, len(peeked)) + c = stream.read(pn - startindex + 1) if not c: break result.append(c) diff --git a/pypy/module/bz2/interp_bz2.py b/pypy/module/bz2/interp_bz2.py --- a/pypy/module/bz2/interp_bz2.py +++ b/pypy/module/bz2/interp_bz2.py @@ -458,9 +458,7 @@ return result def peek(self): - pos = self.pos - assert pos >= 0 - return self.buffer[pos:] + return (self.pos, self.buffer) def try_to_find_file_descriptor(self): return self.stream.try_to_find_file_descriptor() diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h --- a/pypy/module/cpyext/include/patchlevel.h +++ b/pypy/module/cpyext/include/patchlevel.h @@ -29,7 +29,7 @@ #define PY_VERSION "2.7.6" /* PyPy version as a string */ -#define PYPY_VERSION "2.3.0" +#define PYPY_VERSION "2.3.1" /* Subversion Revision number of this file (not of the repository). * Empty since Mercurial migration. */ diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py --- a/pypy/module/sys/version.py +++ b/pypy/module/sys/version.py @@ -10,7 +10,7 @@ #XXX # sync CPYTHON_VERSION with patchlevel.h, package.py CPYTHON_API_VERSION = 1013 #XXX # sync with include/modsupport.h -PYPY_VERSION = (2, 3, 0, "final", 0) #XXX # sync patchlevel.h +PYPY_VERSION = (2, 3, 1, "final", 0) #XXX # sync patchlevel.h if platform.name == 'msvc': COMPILER_INFO = 'MSC v.%d 32 bit' % (platform.version * 10 + 600) diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -2058,20 +2058,26 @@ #END MARKING elif self.gc_state == STATE_SWEEPING: # - # Walk all rawmalloced objects and free the ones that don't - # have the GCFLAG_VISITED flag. Visit at most 'limit' objects. - limit = self.nursery_size // self.ac.page_size - remaining = self.free_unvisited_rawmalloc_objects_step(limit) - # - # Ask the ArenaCollection to visit a fraction of the objects. - # Free the ones that have not been visited above, and reset - # GCFLAG_VISITED on the others. Visit at most '3 * limit' - # pages minus the number of objects already visited above. - done = self.ac.mass_free_incremental(self._free_if_unvisited, - 2 * limit + remaining) + if self.raw_malloc_might_sweep.non_empty(): + # Walk all rawmalloced objects and free the ones that don't + # have the GCFLAG_VISITED flag. Visit at most 'limit' objects. + # This limit is conservatively high enough to guarantee that + # a total object size of at least '3 * nursery_size' bytes + # is processed. + limit = 3 * self.nursery_size // self.small_request_threshold + self.free_unvisited_rawmalloc_objects_step(limit) + done = False # the 2nd half below must still be done + else: + # Ask the ArenaCollection to visit a fraction of the objects. + # Free the ones that have not been visited above, and reset + # GCFLAG_VISITED on the others. Visit at most '3 * + # nursery_size' bytes. + limit = 3 * self.nursery_size // self.ac.page_size + done = self.ac.mass_free_incremental(self._free_if_unvisited, + limit) # XXX tweak the limits above # - if remaining > 0 and done: + if done: self.num_major_collects += 1 # # We also need to reset the GCFLAG_VISITED on prebuilt GC objects. diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -177,7 +177,7 @@ if intval < 0: sign = -1 - ival = r_uint(-intval) + ival = -r_uint(intval) elif intval > 0: sign = 1 ival = r_uint(intval) diff --git a/rpython/rlib/streamio.py b/rpython/rlib/streamio.py --- a/rpython/rlib/streamio.py +++ b/rpython/rlib/streamio.py @@ -234,11 +234,12 @@ while True: # "peeks" on the underlying stream to see how many characters # we can safely read without reading past an end-of-line - peeked = self.peek() - pn = peeked.find("\n") + startindex, peeked = self.peek() + assert 0 <= startindex <= len(peeked) + pn = peeked.find("\n", startindex) if pn < 0: pn = len(peeked) - c = self.read(pn + 1) + c = self.read(pn - startindex + 1) if not c: break result.append(c) @@ -265,7 +266,7 @@ pass def peek(self): - return '' + return (0, '') def try_to_find_file_descriptor(self): return -1 @@ -553,7 +554,7 @@ else: difpos = offset if -self.pos <= difpos <= currentsize: - self.pos += difpos + self.pos += intmask(difpos) return if whence == 1: offset -= currentsize @@ -705,9 +706,7 @@ return "".join(chunks) def peek(self): - pos = self.pos - assert pos >= 0 - return self.buf[pos:] + return (self.pos, self.buf) write = PassThrough("write", flush_buffers=True) truncate = PassThrough("truncate", flush_buffers=True) @@ -970,12 +969,13 @@ while True: # "peeks" on the underlying stream to see how many characters # we can safely read without reading past an end-of-line - peeked = self.base.peek() - pn = peeked.find("\n") - pr = peeked.find("\r") + startindex, peeked = self.base.peek() + assert 0 <= startindex <= len(peeked) + pn = peeked.find("\n", startindex) + pr = peeked.find("\r", startindex) if pn < 0: pn = len(peeked) if pr < 0: pr = len(peeked) - c = self.read(min(pn, pr) + 1) + c = self.read(min(pn, pr) - startindex + 1) if not c: break result.append(c) @@ -1028,7 +1028,7 @@ self.buf = "" def peek(self): - return self.buf + return (0, self.buf) write = PassThrough("write", flush_buffers=True) truncate = PassThrough("truncate", flush_buffers=True) diff --git a/rpython/translator/c/gcc/trackgcroot.py b/rpython/translator/c/gcc/trackgcroot.py --- a/rpython/translator/c/gcc/trackgcroot.py +++ b/rpython/translator/c/gcc/trackgcroot.py @@ -522,7 +522,7 @@ # raw data, not GC pointers 'movnt', 'mfence', 'lfence', 'sfence', # bit manipulations - 'bextr', + 'andn', 'bextr', 'blsi', 'blsmask', 'blsr', 'tzcnt', 'lzcnt', ]) # a partial list is hopefully good enough for now; it's all to support From noreply at buildbot.pypy.org Mon Jun 2 17:24:39 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:39 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: importing the same thing twice does not help... *ugh* Message-ID: <20140602152439.AA47F1C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71851:a51aa8c392ba Date: 2014-05-28 16:17 +0200 http://bitbucket.org/pypy/pypy/changeset/a51aa8c392ba/ Log: importing the same thing twice does not help... *ugh* diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -65,10 +65,6 @@ from rpython.rlib.debug import ll_assert, debug_print, debug_start, debug_stop from rpython.rlib.objectmodel import specialize -# XXX remove (groggi) -from rpython.rlib.debug import debug_print, debug_start, debug_stop - - # # Handles the objects in 2 generations: # From noreply at buildbot.pypy.org Mon Jun 2 17:24:40 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:40 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: remove not used debug_prints Message-ID: <20140602152440.E1CDC1C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71852:9f9f4dbd7363 Date: 2014-05-28 16:18 +0200 http://bitbucket.org/pypy/pypy/changeset/9f9f4dbd7363/ Log: remove not used debug_prints diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -492,9 +492,6 @@ self.set_major_threshold_from(0.0) ll_assert(self.extra_threshold == 0, "extra_threshold set too early") self.initial_cleanup = self.nursery_size - debug_print("nursery start: ", self.nursery) # XXX remove (groggi) - debug_print("nursery top: ", self.nursery_top) # XXX remove (groggi) - debug_print("nursery_real_top: ", self.nursery_real_top) # XXX remove (groggi) debug_stop("gc-set-nursery-size") From noreply at buildbot.pypy.org Mon Jun 2 17:24:42 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:42 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning-countlimit: starting new branch to try out simple limit for pinned objects. Message-ID: <20140602152442.2A7A01C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning-countlimit Changeset: r71853:69bd40f359e7 Date: 2014-05-29 19:00 +0200 http://bitbucket.org/pypy/pypy/changeset/69bd40f359e7/ Log: starting new branch to try out simple limit for pinned objects. This idea is based on the implementation in gc-minimark-pinning diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -965,6 +965,8 @@ # Reason: It would be possible that the first caller unpins # while the second caller thinks it's still pinned. return False + if not self.should_pin(obj): + return False self.header(obj).tid |= GCFLAG_PINNED self.pinned_objects_in_nursery += 1 @@ -979,6 +981,8 @@ self.header(obj).tid &= ~GCFLAG_PINNED self.pinned_objects_in_nursery -= 1 + def should_pin(self, obj): + return True def shrink_array(self, obj, smallerlength): # From noreply at buildbot.pypy.org Mon Jun 2 17:24:43 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:43 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: add failing test for full nursery with pinned objects (should assert) Message-ID: <20140602152443.47FC41C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71854:d2c02b0d48d0 Date: 2014-05-29 19:27 +0200 http://bitbucket.org/pypy/pypy/changeset/d2c02b0d48d0/ Log: add failing test for full nursery with pinned objects (should assert) diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -173,6 +173,24 @@ assert self.gc.nursery_free == self.gc.nursery assert self.gc.nursery_top > self.gc.nursery + def get_max_nursery_objects(self, TYPE): + typeid = self.get_type_id(TYPE) + size = self.gc.fixed_size(typeid) + self.gc.gcheaderbuilder.size_gc_header + raw_size = llmemory.raw_malloc_usage(size) + return self.gc.nursery_size // raw_size + + def test_pin_until_full(self): + object_mallocs = self.get_max_nursery_objects(S) + for instance_nr in xrange(object_mallocs): + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + ptr.someInt = 100 + instance_nr + self.stackroots.append(ptr) + self.gc.pin(adr) + # nursery should be full now, at least no space for another `S`. Next malloc should fail. + py.test.raises(Exception, self.malloc, S) + + # XXX test/define what happens if we try to pin an object that is too # big for the nursery and will be raw-malloc'ed. From noreply at buildbot.pypy.org Mon Jun 2 17:24:44 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:44 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: assertion fix, test passes now (test_pin_until_full) Message-ID: <20140602152444.5B78D1C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71855:4ab207b723e8 Date: 2014-05-29 19:28 +0200 http://bitbucket.org/pypy/pypy/changeset/4ab207b723e8/ Log: assertion fix, test passes now (test_pin_until_full) diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -756,7 +756,7 @@ # object pass else: - ll_assert(minor_collection_count >= 1, + ll_assert(minor_collection_count < 1, "Seeing minor_collection() at least twice. " "Too many pinned objects?") # From noreply at buildbot.pypy.org Mon Jun 2 17:24:45 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:45 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: rename test to reflect what we test Message-ID: <20140602152445.68BC11C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71856:e7a2b59135e5 Date: 2014-05-29 19:29 +0200 http://bitbucket.org/pypy/pypy/changeset/e7a2b59135e5/ Log: rename test to reflect what we test diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -179,7 +179,7 @@ raw_size = llmemory.raw_malloc_usage(size) return self.gc.nursery_size // raw_size - def test_pin_until_full(self): + def test_full_pinned_nursery_pin_fail(self): object_mallocs = self.get_max_nursery_objects(S) for instance_nr in xrange(object_mallocs): ptr = self.malloc(S) From noreply at buildbot.pypy.org Mon Jun 2 17:24:46 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:46 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning-countlimit: Merge gc-incminimark-pinning into gc-incminimark-pinning-countlimit Message-ID: <20140602152446.81F0F1C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning-countlimit Changeset: r71857:4402a3db123d Date: 2014-05-29 19:30 +0200 http://bitbucket.org/pypy/pypy/changeset/4402a3db123d/ Log: Merge gc-incminimark-pinning into gc-incminimark-pinning-countlimit diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -756,7 +756,7 @@ # object pass else: - ll_assert(minor_collection_count >= 1, + ll_assert(minor_collection_count < 1, "Seeing minor_collection() at least twice. " "Too many pinned objects?") # diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -173,6 +173,24 @@ assert self.gc.nursery_free == self.gc.nursery assert self.gc.nursery_top > self.gc.nursery + def get_max_nursery_objects(self, TYPE): + typeid = self.get_type_id(TYPE) + size = self.gc.fixed_size(typeid) + self.gc.gcheaderbuilder.size_gc_header + raw_size = llmemory.raw_malloc_usage(size) + return self.gc.nursery_size // raw_size + + def test_full_pinned_nursery_pin_fail(self): + object_mallocs = self.get_max_nursery_objects(S) + for instance_nr in xrange(object_mallocs): + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + ptr.someInt = 100 + instance_nr + self.stackroots.append(ptr) + self.gc.pin(adr) + # nursery should be full now, at least no space for another `S`. Next malloc should fail. + py.test.raises(Exception, self.malloc, S) + + # XXX test/define what happens if we try to pin an object that is too # big for the nursery and will be raw-malloc'ed. From noreply at buildbot.pypy.org Mon Jun 2 17:24:47 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:47 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning-countlimit: simple implementation and tests for limiting the amount of pinned objects Message-ID: <20140602152447.C42161C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning-countlimit Changeset: r71858:345cfc1ac23a Date: 2014-05-29 20:02 +0200 http://bitbucket.org/pypy/pypy/changeset/345cfc1ac23a/ Log: simple implementation and tests for limiting the amount of pinned objects diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -256,6 +256,10 @@ # so we trade it by cleaning it bit-by-bit, as we progress through # nursery. Has to fit at least one large object "nursery_cleanup": 32768 * WORD, + + # Number of objects that are allowed to be pinned in the nursery + # at the same time. + "max_number_of_pinned_objects": 100, } def __init__(self, config, @@ -268,6 +272,7 @@ major_collection_threshold=2.5, growth_rate_max=2.5, # for tests card_page_indices=0, + max_number_of_pinned_objects=100, large_object=8*WORD, ArenaCollectionClass=None, **kwds): @@ -284,6 +289,7 @@ self.max_heap_size = 0.0 self.max_heap_size_already_raised = False self.max_delta = float(r_uint(-1)) + self.max_number_of_pinned_objects = max_number_of_pinned_objects # self.card_page_indices = card_page_indices if self.card_page_indices > 0: @@ -965,7 +971,7 @@ # Reason: It would be possible that the first caller unpins # while the second caller thinks it's still pinned. return False - if not self.should_pin(obj): + if self.pinned_objects_in_nursery >= self.max_number_of_pinned_objects: return False self.header(obj).tid |= GCFLAG_PINNED @@ -981,9 +987,6 @@ self.header(obj).tid &= ~GCFLAG_PINNED self.pinned_objects_in_nursery -= 1 - def should_pin(self, obj): - return True - def shrink_array(self, obj, smallerlength): # # Only objects in the nursery can be "resized". Resizing them diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -3,6 +3,8 @@ from rpython.memory.gc.incminimark import IncrementalMiniMarkGC from test_direct import BaseDirectGCTest +# YYY + S = lltype.GcForwardReference() S.become(lltype.GcStruct('pinning_test_struct', ('someInt', lltype.Signed), @@ -172,6 +174,39 @@ assert not self.gc.is_in_nursery(adr) assert self.gc.nursery_free == self.gc.nursery assert self.gc.nursery_top > self.gc.nursery + + # XXX test/define what happens if we try to pin an object that is too + # big for the nursery and will be raw-malloc'ed. + + # XXX fill nursery with pinned objects -> + define behavior for such a + # case + + +class TestIncminimarkFewPinnedObjects(BaseDirectGCTest): + from rpython.memory.gc.incminimark import IncrementalMiniMarkGC as GCClass + + GC_PARAMS = {'max_number_of_pinned_objects': 5 + } + + def test_pinning_limit(self): + for instance_nr in xrange(self.GC_PARAMS['max_number_of_pinned_objects']): + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + ptr.someInt = 100 + instance_nr + self.stackroots.append(ptr) + self.gc.pin(adr) + # + # now we reached the maximum amount of pinned objects + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + self.stackroots.append(ptr) + assert not self.gc.pin(adr) + +class TestIncminimarkManyPinnedObjects(BaseDirectGCTest): + from rpython.memory.gc.incminimark import IncrementalMiniMarkGC as GCClass + + GC_PARAMS_PLENTY_PINNED_OBJECTS = {'max_number_of_pinned_objects': 50 + } def get_max_nursery_objects(self, TYPE): typeid = self.get_type_id(TYPE) @@ -181,18 +216,22 @@ def test_full_pinned_nursery_pin_fail(self): object_mallocs = self.get_max_nursery_objects(S) + # just to be sure we do not run into the limit as we test not the limiter + # but rather the case of a nursery full with pinned objects. + assert object_mallocs < self.gc.max_number_of_pinned_objects for instance_nr in xrange(object_mallocs): ptr = self.malloc(S) adr = llmemory.cast_ptr_to_adr(ptr) ptr.someInt = 100 + instance_nr self.stackroots.append(ptr) self.gc.pin(adr) + # # nursery should be full now, at least no space for another `S`. Next malloc should fail. py.test.raises(Exception, self.malloc, S) - # XXX test/define what happens if we try to pin an object that is too - # big for the nursery and will be raw-malloc'ed. + # XXX check if we have to take chunk size of AddressStack in /rpython/memory/support.py into account. + # As far as gc-minimark-pinning tells us: yes (groggi) - # XXX fill nursery with pinned objects -> + define behavior for such a - # case + + From noreply at buildbot.pypy.org Mon Jun 2 17:24:49 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:49 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: additional test. passes just fine Message-ID: <20140602152449.098161C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71859:4771af2b6a07 Date: 2014-05-29 20:36 +0200 http://bitbucket.org/pypy/pypy/changeset/4771af2b6a07/ Log: additional test. passes just fine diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -173,6 +173,30 @@ assert self.gc.nursery_free == self.gc.nursery assert self.gc.nursery_top > self.gc.nursery + def test_collect_dead_pinned_objects(self): + # prepare three object, where two are stackroots + ptr_stackroot_1 = self.malloc(S) + ptr_stackroot_1.someInt = 100 + self.stackroots.append(ptr_stackroot_1) + + ptr_not_stackroot = self.malloc(S) + + ptr_stackroot_2 = self.malloc(S) + ptr_stackroot_2.someInt = 100 + self.stackroots.append(ptr_stackroot_2) + + # pin all three objects + assert self.gc.pin(llmemory.cast_ptr_to_adr(ptr_stackroot_1)) + assert self.gc.pin(llmemory.cast_ptr_to_adr(ptr_not_stackroot)) + assert self.gc.pin(llmemory.cast_ptr_to_adr(ptr_stackroot_2)) + assert self.gc.pinned_objects_in_nursery == 3 + + self.gc.minor_collection() + assert self.gc.pinned_objects_in_nursery == 2 + assert ptr_stackroot_1.someInt == 100 + assert ptr_stackroot_2.someInt == 100 + py.test.raises(RuntimeError, 'ptr_not_stackroot.someInt') # should be freed + def get_max_nursery_objects(self, TYPE): typeid = self.get_type_id(TYPE) size = self.gc.fixed_size(typeid) + self.gc.gcheaderbuilder.size_gc_header From noreply at buildbot.pypy.org Mon Jun 2 17:24:50 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:50 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: remove XXX that was achieved Message-ID: <20140602152450.4ECC71C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71860:08dc504395e4 Date: 2014-05-29 20:37 +0200 http://bitbucket.org/pypy/pypy/changeset/08dc504395e4/ Log: remove XXX that was achieved well, there is a lot still to test. However, for now we have a test. diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -192,6 +192,7 @@ assert self.gc.pinned_objects_in_nursery == 3 self.gc.minor_collection() + # now the one not on the stack should be gone. assert self.gc.pinned_objects_in_nursery == 2 assert ptr_stackroot_1.someInt == 100 assert ptr_stackroot_2.someInt == 100 @@ -217,6 +218,4 @@ # XXX test/define what happens if we try to pin an object that is too # big for the nursery and will be raw-malloc'ed. - - # XXX fill nursery with pinned objects -> + define behavior for such a - # case + From noreply at buildbot.pypy.org Mon Jun 2 17:24:51 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:51 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: inline get_max_nursery_objects Message-ID: <20140602152451.87CB01C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71861:51e3de7d7ddc Date: 2014-05-29 20:40 +0200 http://bitbucket.org/pypy/pypy/changeset/51e3de7d7ddc/ Log: inline get_max_nursery_objects diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -198,14 +198,11 @@ assert ptr_stackroot_2.someInt == 100 py.test.raises(RuntimeError, 'ptr_not_stackroot.someInt') # should be freed - def get_max_nursery_objects(self, TYPE): + def test_full_pinned_nursery_pin_fail(self): typeid = self.get_type_id(TYPE) size = self.gc.fixed_size(typeid) + self.gc.gcheaderbuilder.size_gc_header raw_size = llmemory.raw_malloc_usage(size) - return self.gc.nursery_size // raw_size - - def test_full_pinned_nursery_pin_fail(self): - object_mallocs = self.get_max_nursery_objects(S) + object_mallocs = self.gc.nursery_size // raw_size for instance_nr in xrange(object_mallocs): ptr = self.malloc(S) adr = llmemory.cast_ptr_to_adr(ptr) @@ -218,4 +215,4 @@ # XXX test/define what happens if we try to pin an object that is too # big for the nursery and will be raw-malloc'ed. - + From noreply at buildbot.pypy.org Mon Jun 2 17:24:52 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:52 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: argh... forgot to run the test after inlining. varname fixed. Message-ID: <20140602152452.C9D351C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71862:0904f8c3e0a2 Date: 2014-05-29 20:43 +0200 http://bitbucket.org/pypy/pypy/changeset/0904f8c3e0a2/ Log: argh... forgot to run the test after inlining. varname fixed. diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -199,7 +199,7 @@ py.test.raises(RuntimeError, 'ptr_not_stackroot.someInt') # should be freed def test_full_pinned_nursery_pin_fail(self): - typeid = self.get_type_id(TYPE) + typeid = self.get_type_id(S) size = self.gc.fixed_size(typeid) + self.gc.gcheaderbuilder.size_gc_header raw_size = llmemory.raw_malloc_usage(size) object_mallocs = self.gc.nursery_size // raw_size From noreply at buildbot.pypy.org Mon Jun 2 17:24:54 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:54 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning-countlimit: There is a limit for sorting an AddressStack, add comment for this in Message-ID: <20140602152454.125EB1C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning-countlimit Changeset: r71863:59cb06c20b3a Date: 2014-05-30 16:53 +0200 http://bitbucket.org/pypy/pypy/changeset/59cb06c20b3a/ Log: There is a limit for sorting an AddressStack, add comment for this in regard to `max_number_of_pinned_objects`. This was visible in the sort() function comment, however I just didn't read it before. diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -258,7 +258,8 @@ "nursery_cleanup": 32768 * WORD, # Number of objects that are allowed to be pinned in the nursery - # at the same time. + # at the same time. Must be lesser than or equal to the chunk size + # of an AddressStack. "max_number_of_pinned_objects": 100, } From noreply at buildbot.pypy.org Mon Jun 2 17:24:55 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:55 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning-countlimit: close branch before merge Message-ID: <20140602152455.4B2641C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning-countlimit Changeset: r71864:a21ac0bdbe46 Date: 2014-05-30 16:55 +0200 http://bitbucket.org/pypy/pypy/changeset/a21ac0bdbe46/ Log: close branch before merge From noreply at buildbot.pypy.org Mon Jun 2 17:24:56 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:56 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: Merge gc-incminimark-pinning-countlimit into gc-incminimark-pinning. Message-ID: <20140602152456.8A9B41C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71865:d72628c24e5e Date: 2014-05-30 17:05 +0200 http://bitbucket.org/pypy/pypy/changeset/d72628c24e5e/ Log: Merge gc-incminimark-pinning-countlimit into gc-incminimark-pinning. We have to limit the amount of pinned objects by the number of pinned objects as AddressStack does not support sorting over multiple chunks! diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -256,6 +256,11 @@ # so we trade it by cleaning it bit-by-bit, as we progress through # nursery. Has to fit at least one large object "nursery_cleanup": 32768 * WORD, + + # Number of objects that are allowed to be pinned in the nursery + # at the same time. Must be lesser than or equal to the chunk size + # of an AddressStack. + "max_number_of_pinned_objects": 100, } def __init__(self, config, @@ -268,6 +273,7 @@ major_collection_threshold=2.5, growth_rate_max=2.5, # for tests card_page_indices=0, + max_number_of_pinned_objects=100, large_object=8*WORD, ArenaCollectionClass=None, **kwds): @@ -284,6 +290,7 @@ self.max_heap_size = 0.0 self.max_heap_size_already_raised = False self.max_delta = float(r_uint(-1)) + self.max_number_of_pinned_objects = max_number_of_pinned_objects # self.card_page_indices = card_page_indices if self.card_page_indices > 0: @@ -965,6 +972,8 @@ # Reason: It would be possible that the first caller unpins # while the second caller thinks it's still pinned. return False + if self.pinned_objects_in_nursery >= self.max_number_of_pinned_objects: + return False self.header(obj).tid |= GCFLAG_PINNED self.pinned_objects_in_nursery += 1 @@ -979,7 +988,6 @@ self.header(obj).tid &= ~GCFLAG_PINNED self.pinned_objects_in_nursery -= 1 - def shrink_array(self, obj, smallerlength): # # Only objects in the nursery can be "resized". Resizing them diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -33,6 +33,7 @@ # XXX test with multiple mallocs, and only part of them is pinned + class TestIncminimark(PinningGCTest): from rpython.memory.gc.incminimark import IncrementalMiniMarkGC as GCClass @@ -213,6 +214,50 @@ py.test.raises(Exception, self.malloc, S) - # XXX test/define what happens if we try to pin an object that is too - # big for the nursery and will be raw-malloc'ed. +class TestIncminimarkFewPinnedObjects(BaseDirectGCTest): + from rpython.memory.gc.incminimark import IncrementalMiniMarkGC as GCClass + GC_PARAMS = {'max_number_of_pinned_objects': 5 + } + + def test_pinning_limit(self): + for instance_nr in xrange(self.GC_PARAMS['max_number_of_pinned_objects']): + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + ptr.someInt = 100 + instance_nr + self.stackroots.append(ptr) + self.gc.pin(adr) + # + # now we reached the maximum amount of pinned objects + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + self.stackroots.append(ptr) + assert not self.gc.pin(adr) + + +class TestIncminimarkManyPinnedObjects(BaseDirectGCTest): + from rpython.memory.gc.incminimark import IncrementalMiniMarkGC as GCClass + + GC_PARAMS_PLENTY_PINNED_OBJECTS = {'max_number_of_pinned_objects': 50 + } + + def get_max_nursery_objects(self, TYPE): + typeid = self.get_type_id(TYPE) + size = self.gc.fixed_size(typeid) + self.gc.gcheaderbuilder.size_gc_header + raw_size = llmemory.raw_malloc_usage(size) + return self.gc.nursery_size // raw_size + + def test_full_pinned_nursery_pin_fail(self): + object_mallocs = self.get_max_nursery_objects(S) + # just to be sure we do not run into the limit as we test not the limiter + # but rather the case of a nursery full with pinned objects. + assert object_mallocs < self.gc.max_number_of_pinned_objects + for instance_nr in xrange(object_mallocs): + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + ptr.someInt = 100 + instance_nr + self.stackroots.append(ptr) + self.gc.pin(adr) + # + # nursery should be full now, at least no space for another `S`. Next malloc should fail. + py.test.raises(Exception, self.malloc, S) From noreply at buildbot.pypy.org Mon Jun 2 17:24:57 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:57 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: get rid of additional test classes Message-ID: <20140602152457.BA9911C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71866:e8fea965f0cf Date: 2014-05-30 18:50 +0200 http://bitbucket.org/pypy/pypy/changeset/e8fea965f0cf/ Log: get rid of additional test classes diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -213,15 +213,8 @@ # nursery should be full now, at least no space for another `S`. Next malloc should fail. py.test.raises(Exception, self.malloc, S) - -class TestIncminimarkFewPinnedObjects(BaseDirectGCTest): - from rpython.memory.gc.incminimark import IncrementalMiniMarkGC as GCClass - - GC_PARAMS = {'max_number_of_pinned_objects': 5 - } - def test_pinning_limit(self): - for instance_nr in xrange(self.GC_PARAMS['max_number_of_pinned_objects']): + for instance_nr in xrange(self.gc.max_number_of_pinned_objects): ptr = self.malloc(S) adr = llmemory.cast_ptr_to_adr(ptr) ptr.someInt = 100 + instance_nr @@ -233,22 +226,13 @@ adr = llmemory.cast_ptr_to_adr(ptr) self.stackroots.append(ptr) assert not self.gc.pin(adr) + test_pinning_limit.GC_PARAMS = {'max_number_of_pinned_objects': 5} - -class TestIncminimarkManyPinnedObjects(BaseDirectGCTest): - from rpython.memory.gc.incminimark import IncrementalMiniMarkGC as GCClass - - GC_PARAMS_PLENTY_PINNED_OBJECTS = {'max_number_of_pinned_objects': 50 - } - - def get_max_nursery_objects(self, TYPE): - typeid = self.get_type_id(TYPE) + def test_full_pinned_nursery_pin_fail(self): + typeid = self.get_type_id(S) size = self.gc.fixed_size(typeid) + self.gc.gcheaderbuilder.size_gc_header raw_size = llmemory.raw_malloc_usage(size) - return self.gc.nursery_size // raw_size - - def test_full_pinned_nursery_pin_fail(self): - object_mallocs = self.get_max_nursery_objects(S) + object_mallocs = self.gc.nursery_size // raw_size # just to be sure we do not run into the limit as we test not the limiter # but rather the case of a nursery full with pinned objects. assert object_mallocs < self.gc.max_number_of_pinned_objects @@ -261,3 +245,4 @@ # # nursery should be full now, at least no space for another `S`. Next malloc should fail. py.test.raises(Exception, self.malloc, S) + test_full_pinned_nursery_pin_fail.GC_PARAMS = {'max_number_of_pinned_objects': 50} From noreply at buildbot.pypy.org Mon Jun 2 17:24:58 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:24:58 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: added additional pinning tests Message-ID: <20140602152458.F04A41C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71867:b7aebcec86ec Date: 2014-06-02 16:37 +0200 http://bitbucket.org/pypy/pypy/changeset/b7aebcec86ec/ Log: added additional pinning tests diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -1,8 +1,11 @@ import py from rpython.rtyper.lltypesystem import lltype, llmemory, llarena -from rpython.memory.gc.incminimark import IncrementalMiniMarkGC +from rpython.memory.gc.incminimark import IncrementalMiniMarkGC, WORD from test_direct import BaseDirectGCTest +# YYY +from rpython.rlib.debug import debug_print + S = lltype.GcForwardReference() S.become(lltype.GcStruct('pinning_test_struct', ('someInt', lltype.Signed), @@ -148,31 +151,224 @@ adr = llmemory.cast_ptr_to_adr(self.stackroots[0]) assert not self.gc.is_in_nursery(adr) - def test_pin_shadow_3(self): - ptr = self.malloc(S) - adr = llmemory.cast_ptr_to_adr(ptr) - ptr.someInt = 100 # not used, just nice to have for identification - self.stackroots.append(ptr) - self.gc.id(ptr) # allocate shadow + def test_pin_nursery_top_scenario1(self): + ptr1 = self.malloc(S) + adr1 = llmemory.cast_ptr_to_adr(ptr1) + ptr1.someInt = 101 + self.stackroots.append(ptr1) + assert self.gc.pin(adr1) + + ptr2 = self.malloc(S) + adr2 = llmemory.cast_ptr_to_adr(ptr2) + ptr2.someInt = 102 + self.stackroots.append(ptr2) + assert self.gc.pin(adr2) - assert self.gc.pin(adr) - self.gc.minor_collection() # object stays in nursery - assert self.gc.is_in_nursery(adr) + ptr3 = self.malloc(S) + adr3 = llmemory.cast_ptr_to_adr(ptr3) + ptr3.someInt = 103 + self.stackroots.append(ptr3) + assert self.gc.pin(adr3) - self.gc.unpin(adr) - # we still have a pinned object at the beginning. There is no space left - # to malloc an object before the pinned one. - assert self.gc.is_in_nursery(adr) + # scenario: no minor collection happened, only three mallocs + # and pins + # + # +- nursery nursery_real_top -+ + # | | + # v v + # +--------+--------+--------+---------------------...---+ + # | pinned | pinned | pinned | empty | + # +--------+--------+--------+---------------------...---+ + # ^ ^ + # | | + # nursery_free -+ | + # nursery_top -+ + # + assert adr3 < self.gc.nursery_free + assert self.gc.nursery_free < self.gc.nursery_top + assert self.gc.nursery_top == self.gc.nursery_real_top + + def test_pin_nursery_top_scenario2(self): + ptr1 = self.malloc(S) + adr1 = llmemory.cast_ptr_to_adr(ptr1) + ptr1.someInt = 101 + self.stackroots.append(ptr1) + assert self.gc.pin(adr1) + + ptr2 = self.malloc(S) + adr2 = llmemory.cast_ptr_to_adr(ptr2) + ptr2.someInt = 102 + self.stackroots.append(ptr2) + assert self.gc.pin(adr2) + + ptr3 = self.malloc(S) + adr3 = llmemory.cast_ptr_to_adr(ptr3) + ptr3.someInt = 103 + self.stackroots.append(ptr3) + assert self.gc.pin(adr3) + + # scenario: after first GC minor collection + # + # +- nursery nursery_real_top -+ + # | | + # v v + # +--------+--------+--------+---------------------...---+ + # | pinned | pinned | pinned | empty | + # +--------+--------+--------+---------------------...---+ + # ^ + # | + # +- nursery_free + # +- nursery_top + # + self.gc.minor_collection() + + assert self.gc.nursery_free == self.gc.nursery_top + assert self.gc.nursery_top == self.gc.nursery + assert self.gc.nursery_top < adr3 + assert adr3 < self.gc.nursery_real_top + + def test_pin_nursery_top_scenario3(self): + ptr1 = self.malloc(S) + adr1 = llmemory.cast_ptr_to_adr(ptr1) + ptr1.someInt = 101 + self.stackroots.append(ptr1) + assert self.gc.pin(adr1) + + ptr2 = self.malloc(S) + adr2 = llmemory.cast_ptr_to_adr(ptr2) + ptr2.someInt = 102 + self.stackroots.append(ptr2) + assert self.gc.pin(adr2) + + ptr3 = self.malloc(S) + adr3 = llmemory.cast_ptr_to_adr(ptr3) + ptr3.someInt = 103 + self.stackroots.append(ptr3) + assert self.gc.pin(adr3) + + # scenario: after unpinning first object and a minor + # collection + # + # +- nursery nursery_real_top -+ + # | | + # v v + # +--------+--------+--------+---------------------...---+ + # | empty | pinned | pinned | empty | + # +--------+--------+--------+---------------------...---+ + # ^ ^ + # | | + # | +- nursery_top + # +- nursery_free + # + self.gc.unpin(adr1) + self.gc.minor_collection() + assert self.gc.nursery_free == self.gc.nursery - assert self.gc.nursery_top == self.gc.nursery + assert self.gc.nursery_top > self.gc.nursery_free + assert self.gc.nursery_top < adr2 + assert adr3 < self.gc.nursery_real_top + def test_pin_nursery_top_scenario4(self): + ptr1 = self.malloc(S) + adr1 = llmemory.cast_ptr_to_adr(ptr1) + ptr1.someInt = 101 + self.stackroots.append(ptr1) + assert self.gc.pin(adr1) + + ptr2 = self.malloc(S) + adr2 = llmemory.cast_ptr_to_adr(ptr2) + ptr2.someInt = 102 + self.stackroots.append(ptr2) + assert self.gc.pin(adr2) + + ptr3 = self.malloc(S) + adr3 = llmemory.cast_ptr_to_adr(ptr3) + ptr3.someInt = 103 + self.stackroots.append(ptr3) + assert self.gc.pin(adr3) + + # scenario: after unpinning first & second object and a minor + # collection + # + # +- nursery nursery_real_top -+ + # | | + # v v + # +-----------------+--------+---------------------...---+ + # | empty | pinned | empty | + # +-----------------+--------+---------------------...---+ + # ^ ^ + # | | + # | +- nursery_top + # +- nursery_free + # + self.gc.unpin(adr1) + self.gc.unpin(adr2) self.gc.minor_collection() - # we don't have a pinned object any more. There is now space left at - # the beginning of our nursery for new objects. - adr = llmemory.cast_ptr_to_adr(self.stackroots[0]) - assert not self.gc.is_in_nursery(adr) + assert self.gc.nursery_free == self.gc.nursery - assert self.gc.nursery_top > self.gc.nursery + assert self.gc.nursery_free < self.gc.nursery_top + assert self.gc.nursery_top < adr3 + assert adr3 < self.gc.nursery_real_top + + def test_pin_nursery_top_scenario5(self): + ptr1 = self.malloc(S) + adr1 = llmemory.cast_ptr_to_adr(ptr1) + ptr1.someInt = 101 + self.stackroots.append(ptr1) + assert self.gc.pin(adr1) + + ptr2 = self.malloc(S) + adr2 = llmemory.cast_ptr_to_adr(ptr2) + ptr2.someInt = 102 + self.stackroots.append(ptr2) + assert self.gc.pin(adr2) + + ptr3 = self.malloc(S) + adr3 = llmemory.cast_ptr_to_adr(ptr3) + ptr3.someInt = 103 + self.stackroots.append(ptr3) + assert self.gc.pin(adr3) + + # scenario: no minor collection happened, only three mallocs + # and pins + # + # +- nursery nursery_real_top -+ + # | | + # v v + # +--------+--------+--------+---------------------...---+ + # | pinned | pinned | pinned | empty | + # +--------+--------+--------+---------------------...---+ + # ^ ^ + # | | + # nursery_free -+ | + # nursery_top -+ + # + assert adr3 < self.gc.nursery_free + assert self.gc.nursery_free < self.gc.nursery_top + assert self.gc.nursery_top == self.gc.nursery_real_top + + # scenario: unpin everything and minor collection + # + # +- nursery nursery_real_top -+ + # | | + # v v + # +----------------------------------+-------------...---+ + # | reset arena | empty (not reset) | + # +----------------------------------+-------------...---+ + # ^ ^ + # | | + # +- nursery_free | + # nursery_top -+ + # + self.gc.unpin(adr1) + self.gc.unpin(adr2) + self.gc.unpin(adr3) + self.gc.minor_collection() + + assert self.gc.nursery_free == self.gc.nursery + # the following assert is important: make sure that + # we did not reset the whole arena used as the nursery + assert self.gc.nursery_top < self.gc.nursery_real_top def test_collect_dead_pinned_objects(self): # prepare three object, where two are stackroots From noreply at buildbot.pypy.org Mon Jun 2 17:25:00 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:25:00 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: wip: introducing move_nursery_top usage back (incremental use of the nursery) Message-ID: <20140602152500.4F2B91C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71868:165a489c4e46 Date: 2014-06-02 16:38 +0200 http://bitbucket.org/pypy/pypy/changeset/165a489c4e46/ Log: wip: introducing move_nursery_top usage back (incremental use of the nursery) There are some tests that fail right now. working on it. diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -692,11 +692,12 @@ "nursery_cleanup not a divisor of nursery_size - initial_cleanup") ll_assert(llmemory.raw_malloc_usage(totalsize) <= size, "totalsize > nursery_cleanup") + debug_print("resetting from %r to %r" % (self.nursery_top, self.nursery_top + size)) llarena.arena_reset(self.nursery_top, size, 2) self.nursery_top += size move_nursery_top._always_inline_ = True - def collect_and_reserve(self, prev_result, totalsize): + def collect_and_reserve(self, prev_nursery_free, totalsize): """To call when nursery_free overflows nursery_top. First check if the nursery_top is the real top, otherwise we can just move the top of one cleanup and continue @@ -734,11 +735,11 @@ # enough space till we reach the real top of the nursery. if self.nursery_top < self.nursery_real_top: self.move_nursery_top(totalsize) - return prev_result + return prev_nursery_free # self.minor_collection() - if minor_collection_count == 0: - minor_collection_count += 1 + minor_collection_count += 1 + if minor_collection_count == 1: # # If the gc_state is not STATE_SCANNING, we're in the middle of # an incremental major collection. In this case, always progress @@ -755,15 +756,16 @@ if self.nursery_free + totalsize > self.nursery_top: # if self.nursery_free + totalsize > self.nursery_real_top: + # still not enough space, we need to collect. + # maybe nursery contains too many pinned objects (see + # assert below). self.minor_collection() - # then the nursery is empty (except pinned objects) else: # execute loop one more time. This should find - # enough space in most cases to allocate the - # object + # enough space to allocate the object pass else: - ll_assert(minor_collection_count < 1, + ll_assert(minor_collection_count == 2, "Seeing minor_collection() at least twice. " "Too many pinned objects?") # @@ -1627,17 +1629,15 @@ prev = prev + pinned_obj_size + \ (size_gc_header + self.get_size(obj)) # - # clear the rest of the arena - # XXX resetting just to self.nursery_top may be enough? (groggi) - llarena.arena_reset(prev, self.nursery_real_top - prev, 2) - # ^^^ calculate the size of the last continuous - # arena block. + # clean up a bit more after the last pinned object + llarena.arena_reset(prev, self.initial_cleanup, 2) + nursery_barriers.append(prev + self.initial_cleanup) # + self.nursery_barriers = nursery_barriers self.surviving_pinned_objects.delete() - self.nursery_barriers = nursery_barriers + # # XXX gc-minimark-pinning does a debug_rotate_nursery() here (groggi) self.nursery_free = self.nursery - self.nursery_barriers.append(self.nursery_real_top) self.nursery_top = self.nursery_barriers.popleft() debug_print("minor collect, total memory used:", From noreply at buildbot.pypy.org Mon Jun 2 17:25:01 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:25:01 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: ups. Forgot to reset with zero=0 over the rest of the nursery Message-ID: <20140602152501.8C6781C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71869:c937d5f11b97 Date: 2014-06-02 16:59 +0200 http://bitbucket.org/pypy/pypy/changeset/c937d5f11b97/ Log: ups. Forgot to reset with zero=0 over the rest of the nursery diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -1630,6 +1630,7 @@ (size_gc_header + self.get_size(obj)) # # clean up a bit more after the last pinned object + llarena.arena_reset(prev, self.nursery_real_top - prev, 0) llarena.arena_reset(prev, self.initial_cleanup, 2) nursery_barriers.append(prev + self.initial_cleanup) # From noreply at buildbot.pypy.org Mon Jun 2 17:25:02 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 2 Jun 2014 17:25:02 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: remove code used for temporary debugging Message-ID: <20140602152502.C7FC11C03D7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r71870:8a6578d32880 Date: 2014-06-02 17:15 +0200 http://bitbucket.org/pypy/pypy/changeset/8a6578d32880/ Log: remove code used for temporary debugging diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -692,7 +692,6 @@ "nursery_cleanup not a divisor of nursery_size - initial_cleanup") ll_assert(llmemory.raw_malloc_usage(totalsize) <= size, "totalsize > nursery_cleanup") - debug_print("resetting from %r to %r" % (self.nursery_top, self.nursery_top + size)) llarena.arena_reset(self.nursery_top, size, 2) self.nursery_top += size move_nursery_top._always_inline_ = True diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -3,9 +3,6 @@ from rpython.memory.gc.incminimark import IncrementalMiniMarkGC, WORD from test_direct import BaseDirectGCTest -# YYY -from rpython.rlib.debug import debug_print - S = lltype.GcForwardReference() S.become(lltype.GcStruct('pinning_test_struct', ('someInt', lltype.Signed), From noreply at buildbot.pypy.org Mon Jun 2 17:28:41 2014 From: noreply at buildbot.pypy.org (rlamy) Date: Mon, 2 Jun 2014 17:28:41 +0200 (CEST) Subject: [pypy-commit] pypy default: fix not_const(s_None) Message-ID: <20140602152841.CA0E61C03D7@cobra.cs.uni-duesseldorf.de> Author: Ronan Lamy Branch: Changeset: r71871:6954e1607695 Date: 2014-06-02 16:27 +0100 http://bitbucket.org/pypy/pypy/changeset/6954e1607695/ Log: fix not_const(s_None) diff --git a/rpython/annotator/model.py b/rpython/annotator/model.py --- a/rpython/annotator/model.py +++ b/rpython/annotator/model.py @@ -675,7 +675,7 @@ def not_const(s_obj): - if s_obj.is_constant() and not isinstance(s_obj, SomePBC): + if s_obj.is_constant() and not isinstance(s_obj, (SomePBC, SomeNone)): new_s_obj = SomeObject.__new__(s_obj.__class__) dic = new_s_obj.__dict__ = s_obj.__dict__.copy() if 'const' in dic: diff --git a/rpython/annotator/test/test_model.py b/rpython/annotator/test/test_model.py --- a/rpython/annotator/test/test_model.py +++ b/rpython/annotator/test/test_model.py @@ -130,8 +130,9 @@ py.test.raises(AnnotatorError, compile_function, blocked_inference) -if __name__ == '__main__': - for name, value in globals().items(): - if name.startswith('test_'): - value() - +def test_not_const(): + s_int = SomeInteger() + s_int.const = 2 + assert s_int != SomeInteger() + assert not_const(s_int) == SomeInteger() + assert not_const(s_None) == s_None From noreply at buildbot.pypy.org Mon Jun 2 17:53:49 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Mon, 2 Jun 2014 17:53:49 +0200 (CEST) Subject: [pypy-commit] extradoc extradoc: tweaks and performance-section updates Message-ID: <20140602155349.18CFB1C0026@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: extradoc Changeset: r5286:aeb303e11ab2 Date: 2014-06-02 17:53 +0200 http://bitbucket.org/pypy/extradoc/changeset/aeb303e11ab2/ Log: tweaks and performance-section updates diff --git a/talk/dls2014/paper/paper.tex b/talk/dls2014/paper/paper.tex --- a/talk/dls2014/paper/paper.tex +++ b/talk/dls2014/paper/paper.tex @@ -154,8 +154,7 @@ atomicity between multiple threads for a series of instructions. Additionally, it provides the application with a sequential consistency model~\cite{lamport79}. Another technology that -can provide the same guarantees is transactional memory -(TM). \remi{cite our position paper} +can provide the same guarantees is transactional memory (TM). There have been several attempts at replacing the GIL with TM~\cite{nicholas06,odaira14,fuad10}. Using transactions to enclose @@ -167,7 +166,7 @@ synchronisation mechanism that avoids several of the problems of locks as they are used now. -TM systems come in\arigo{typo?} can be broadly categorised as hardware based (HTM), +TM systems can be broadly categorised as hardware based (HTM), software based (STM), or hybrid systems (HyTM). HTM systems are limited by hardware constraints~\cite{odaira14,fuad10}, while STM systems have a lot of overhead~\cite{cascaval08,drago11}. In \cite{wayforward14}, @@ -466,16 +465,16 @@ \subsubsection{Isolation: Copy-On-Write} -We now use these mechanisms to provide isolation for transactions. -Using write barriers, we implement a \emph{Copy-On-Write (COW)} on the -level of pages~\footnote{Conflict detection still occurs on the level -of objects.}. Starting from the initial fully-shared configuration +We now use these mechanisms to provide isolation for transactions. We +implement a \emph{Copy-On-Write (COW)} on the level of +pages~\footnote{Conflict detection still occurs on the level of +objects.}. Starting from the initial fully-shared configuration (figure \ref{fig:Page-Remapping}, (II)), when we need to modify an object without other threads seeing the changes immediately, we ensure that all pages belonging to the object are private to our segment. -More precisely, this is done by a write barrier that detects that we are -about to write to an old (pre-transaction) object that we did not record +More precisely, this is done with a write barrier that detects that we are +about to write to an object that we did not record in the write-set yet. When this occurs, the slow-path of the write barrier will also check if the page (or pages) containing the object is still shared, and if so, privatise it. This is done by remapping and copying @@ -1130,8 +1129,11 @@ uses fine-grained locking instead of a GIL, is only expected to scale with the number of threads for the latter group. It is not able to scale when using coarse-grained locking. STM, however, uses atomic -blocks instead, so it may still be able to scale since they are -implemented as simple transactions. +blocks instead of a single lock to synchronise accesses to the +shared data structures. Since atomic blocks map to transactions, +our STM system may still be able to get a speedup on more than one +thread by running transactions in parallel. + % To isolate factors we look at performance w/o JIT and perf w JIT. % w/o JIT: @@ -1152,18 +1154,27 @@ As expected, all interpreters with a GIL do not scale with the number of threads. They even become slower because of the overhead of thread-switching and GIL handling (see \cite{beazley10} for a detailed -analysis). We also see Jython scale when we expect it to (mandelbrot, -raytrace, richards), and behave similar to the GIL interpreters in the -other cases. +analysis). We also see Jython scale when we expect it to (\emph{mandelbrot, +raytrace, richards}), and behave similar to the GIL interpreters in the +other cases. The reason again being the coarse grained locking. -PyPy using our STM system (pypy-stm-nojit) scales in all benchmarks to -a certain degree. We see that the average overhead from switching from -GIL to STM is \remi{$35.5\%$}, the maximum in richards is -\remi{$63\%$}. pypy-stm-nojit beats pypy-nojit already on two threads; +PyPy using our STM system (\emph{pypy-stm-nojit}) scales in all +benchmarks to a certain degree. It scales best in the ones where +Jython scales as well and a little less in the others. The reason +for that is that in the former group there are no real, logical +conflicts -- all threads do independent calculations. In the latter +case, the threads work on a common data structure and therefore +create much more conflicts, which limits the scalability. + +Looking at the average overhead from switching from GIL to STM, we see +that it is \remi{$\approx 35.5\%$}. The maximum in richards is +\remi{$63\%$}. + +\emph{pypy-stm-nojit} beats \emph{pypy-nojit} already on two threads; however, it never even beats CPython, the reference implementation of Python. This means that without the JIT, our performance is not -competitive. We now look at how well our system works when we enable -the JIT. +competitive. So we will now look at how well our system works when we +enable the JIT. \begin{figure}[h] \centering @@ -1180,15 +1191,23 @@ The results are presented in figure \ref{fig:performance-nojit}. We see that the performance is much less stable. There is certainly more -work required in this area. In general, we see that the group of -non-locked benchmarks certainly scales best. The other three scale -barely or not at all with the number of threads. The slowdown factor -from GIL to STM ranges around \remi{$1-2.4\times$} and we beat GIL -performance in half of the benchmarks. +work required in this area. The slowdown factor for switching from GIL +to STM ranges around \remi{$1-2.4\times$}, and we beat GIL performance +in half of the benchmarks. -\remi{Reason for bad scaling: acceleration of code that produces -conflicts $-->$ more iterations $-->$ more conflicts. The overhead -doesn't get accelerated by the JIT.} +We see that generally, the group of non-locked benchmarks scales +best. The other three scale barely or not at all with the number of +threads. The reason for this is likely again the conflicts in the +latter group. Since the JIT accelerates all code but not the STM +overhead, we do more work per transaction. And this increases the +likelihood of conflicts between them and therefore limits scalability +even more than in the no-JIT benchmarks. + +Overall PyPy needs the JIT in order for its performance to be +competitive. It would be interesting to see how using our STM system +in CPython would turn out, but it is a lot of work. On its own, our +system scales well, so we hope to also see that with the JIT in the +future. \begin{figure}[h] @@ -1198,13 +1217,6 @@ \end{figure} -Overall PyPy needs the JIT in order for its performance to be -competitive. It would be interesting to see how using our STM system -in CPython would perform, but it is a lot of work. On its own, our -system scales well so we hope to also see that with the JIT in the -future. - - \section{Related Work} Eliminate GIL: @@ -1218,7 +1230,13 @@ \item FastLane: \cite{warmhoff13} \item TML: \cite{spear09} \item Virtualizing HTM: \cite{rajwar05} -\item Page-based virtualizing HyTM: \cite{chung06} (XTM can be +\item Page-based virtualizing HyTM: \cite{chung06}: page-level conflict + detection, otherwise hardware extensions required; assumes most + transactions fit HTM capacities (not so true here); COW using page-faults; + they assume OS-level access to page-tables (maybe not inherent to their + design); eval on simulator; value-based confl detection; + + (XTM can be implemented either in the OS as part of the virtual memory manager or between underlying TM systems and the OS, like virtual machines; Conflicts for overflowed transactions are tracked at page granularity; From noreply at buildbot.pypy.org Mon Jun 2 18:03:11 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Mon, 2 Jun 2014 18:03:11 +0200 (CEST) Subject: [pypy-commit] extradoc extradoc: comment some currently unused references Message-ID: <20140602160311.DDA761C0026@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: extradoc Changeset: r5287:973618cb5a3f Date: 2014-06-02 18:03 +0200 http://bitbucket.org/pypy/extradoc/changeset/973618cb5a3f/ Log: comment some currently unused references diff --git a/talk/dls2014/paper/paper.tex b/talk/dls2014/paper/paper.tex --- a/talk/dls2014/paper/paper.tex +++ b/talk/dls2014/paper/paper.tex @@ -1179,7 +1179,7 @@ \begin{figure}[h] \centering \includegraphics[width=1\columnwidth]{plots/performance_nojit.pdf} - \caption{Comparing runtime between interpreters without a JIT\label{fig:performance-nojit}} + \caption{Comparing execution time between interpreters without a JIT\label{fig:performance-nojit}} \end{figure} @@ -1213,7 +1213,7 @@ \begin{figure}[h] \centering \includegraphics[width=1\columnwidth]{plots/performance.pdf} - \caption{Comparing runtime between interpreters with JIT\label{fig:performance-jit}} + \caption{Comparing execution time between interpreters with JIT\label{fig:performance-jit}} \end{figure} @@ -1330,11 +1330,11 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\bibitem{dan07} - Dan Grossman. 2007. The transactional memory / garbage collection - analogy. \emph{In Proceedings of the 22nd annual ACM SIGPLAN - conference on Object-oriented programming systems and - applications} (OOPSLA '07). +% \bibitem{dan07} +% Dan Grossman. 2007. The transactional memory / garbage collection +% analogy. \emph{In Proceedings of the 22nd annual ACM SIGPLAN +% conference on Object-oriented programming systems and +% applications} (OOPSLA '07). \bibitem{odaira14} @@ -1369,16 +1369,16 @@ processor's hardware transactional memory support. \emph{SIGARCH Comput. Archit. News 38}, 5 (April 2010) -\bibitem{felber07} - Felber, Pascal, et al. "Transactifying applications using an open - compiler framework." \emph{TRANSACT}, August (2007): 4-6. +% \bibitem{felber07} +% Felber, Pascal, et al. "Transactifying applications using an open +% compiler framework." \emph{TRANSACT}, August (2007): 4-6. -\bibitem{bill06} - Bill McCloskey, Feng Zhou, David Gay, and Eric - Brewer. 2006. Autolocker: synchronization inference for atomic - sections. \emph{In Conference record of the 33rd ACM SIGPLAN-SIGACT - symposium on Principles of programming languages (POPL '06)}. ACM, - New York, NY, USA +% \bibitem{bill06} +% Bill McCloskey, Feng Zhou, David Gay, and Eric +% Brewer. 2006. Autolocker: synchronization inference for atomic +% sections. \emph{In Conference record of the 33rd ACM SIGPLAN-SIGACT +% symposium on Principles of programming languages (POPL '06)}. ACM, +% New York, NY, USA \bibitem{spear09} Spear, Michael F., et al. "Transactional mutex locks." \emph{SIGPLAN @@ -1418,17 +1418,17 @@ characteristics. \emph{SIGARCH Comput. Archit. News} 36, 1 (March 2008), 329-339. -\bibitem{leis14} - Leis, Viktor, Alfons Kemper, and Thomas Neumann. "Exploiting - Hardware Transactional Memory in Main-Memory Databases." - \emph{Proc. of ICDE}. 2014. +% \bibitem{leis14} +% Leis, Viktor, Alfons Kemper, and Thomas Neumann. "Exploiting +% Hardware Transactional Memory in Main-Memory Databases." +% \emph{Proc. of ICDE}. 2014. -\bibitem{biased} - Kenneth Russell and David Detlefs. 2006. Eliminating - synchronization-related atomic operations with biased locking and - bulk rebiasing. \emph{In Proceedings of the 21st annual ACM SIGPLAN - conference on Object-oriented programing, systems, languages, and - applications} (OOPSLA '06). +% \bibitem{biased} +% Kenneth Russell and David Detlefs. 2006. Eliminating +% synchronization-related atomic operations with biased locking and +% bulk rebiasing. \emph{In Proceedings of the 21st annual ACM SIGPLAN +% conference on Object-oriented programing, systems, languages, and +% applications} (OOPSLA '06). \end{thebibliography} From noreply at buildbot.pypy.org Mon Jun 2 19:47:09 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 2 Jun 2014 19:47:09 +0200 (CEST) Subject: [pypy-commit] pypy fix-bytearray-complexity: Fix translation Message-ID: <20140602174709.EE5B91C0026@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: fix-bytearray-complexity Changeset: r71877:6ac3f4336b1b Date: 2014-05-28 22:45 -0500 http://bitbucket.org/pypy/pypy/changeset/6ac3f4336b1b/ Log: Fix translation diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py --- a/pypy/objspace/std/bytearrayobject.py +++ b/pypy/objspace/std/bytearrayobject.py @@ -20,12 +20,12 @@ class W_BytearrayObject(W_Root): import_from_mixin(StringMethods) - def __init__(w_self, data): - w_self.data = data + def __init__(self, data): + self.data = data - def __repr__(w_self): + def __repr__(self): """representation for debugging purposes""" - return "%s(%s)" % (w_self.__class__.__name__, ''.join(w_self.data)) + return "%s(%s)" % (self.__class__.__name__, ''.join(self.data)) def buffer_w(self, space, flags): return BytearrayBuffer(self.data, False) @@ -62,10 +62,6 @@ raise oefmt(space.w_IndexError, "bytearray index out of range") return space.wrap(ord(character)) - def _fillchar(self, space, w_fillchar): - c = self._op_val(space, w_fillchar) - return [c], len(c) - def _val(self, space): return self.data @@ -82,14 +78,14 @@ return str(char)[0] def _multi_chr(self, char): - return [self._chr(char)] + return [char] @staticmethod def _builder(size=100): return ByteListBuilder(size) def _newlist_unwrapped(self, space, res): - return space.newlist([W_BytearrayObject(_make_data(i)) for i in res]) + return space.newlist([W_BytearrayObject(i) for i in res]) def _isupper(self, ch): return ch.isupper() @@ -318,9 +314,6 @@ return space.newbool(_memcmp(value, buffer, min_length) != 0) def descr_lt(self, space, w_other): - if isinstance(w_other, W_BytearrayObject): - return space.newbool(self.data < w_other.data) - try: buffer = _get_buffer(space, w_other) except OperationError as e: @@ -332,13 +325,9 @@ buffer_len = buffer.getlength() cmp = _memcmp(value, buffer, min(len(value), buffer_len)) - return space.newbool( - cmp < 0 or (cmp == 0 and space.newbool(len(value) < buffer_len))) + return space.newbool(cmp < 0 or (cmp == 0 and len(value) < buffer_len)) def descr_le(self, space, w_other): - if isinstance(w_other, W_BytearrayObject): - return space.newbool(self.data <= w_other.data) - try: buffer = _get_buffer(space, w_other) except OperationError as e: @@ -350,13 +339,9 @@ buffer_len = buffer.getlength() cmp = _memcmp(value, buffer, min(len(value), buffer_len)) - return space.newbool( - cmp < 0 or (cmp == 0 and space.newbool(len(value) <= buffer_len))) + return space.newbool(cmp < 0 or (cmp == 0 and len(value) <= buffer_len)) def descr_gt(self, space, w_other): - if isinstance(w_other, W_BytearrayObject): - return space.newbool(self.data > w_other.data) - try: buffer = _get_buffer(space, w_other) except OperationError as e: @@ -368,13 +353,9 @@ buffer_len = buffer.getlength() cmp = _memcmp(value, buffer, min(len(value), buffer_len)) - return space.newbool( - cmp > 0 or (cmp == 0 and space.newbool(len(value) > buffer_len))) + return space.newbool(cmp > 0 or (cmp == 0 and len(value) > buffer_len)) def descr_ge(self, space, w_other): - if isinstance(w_other, W_BytearrayObject): - return space.newbool(self.data >= w_other.data) - try: buffer = _get_buffer(space, w_other) except OperationError as e: @@ -386,8 +367,7 @@ buffer_len = buffer.getlength() cmp = _memcmp(value, buffer, min(len(value), buffer_len)) - return space.newbool( - cmp > 0 or (cmp == 0 and space.newbool(len(value) >= buffer_len))) + return space.newbool(cmp > 0 or (cmp == 0 and len(value) >= buffer_len)) def descr_iter(self, space): return space.newseqiter(self) diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -430,6 +430,7 @@ _immutable_fields_ = ['_value'] def __init__(self, str): + assert str is not None self._value = str def __repr__(self): @@ -482,7 +483,8 @@ @staticmethod def _use_rstr_ops(space, w_other): from pypy.objspace.std.unicodeobject import W_UnicodeObject - return isinstance(w_other, (W_BytesObject, W_UnicodeObject)) + return (isinstance(w_other, W_BytesObject) or + isinstance(w_other, W_UnicodeObject)) @staticmethod def _op_val(space, w_other): diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -6,6 +6,7 @@ from rpython.rlib.rstring import ( search, SEARCH_FIND, SEARCH_RFIND, SEARCH_COUNT, endswith, replace, rsplit, split, startswith) +from rpython.rlib.buffer import Buffer from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import WrappedDefault, unwrap_spec @@ -31,7 +32,7 @@ return (value, start, end) def _multi_chr(self, c): - return self._chr(c) + return c def descr_len(self, space): return space.wrap(self._len()) @@ -73,7 +74,7 @@ if times <= 0: return self._empty() if self._len() == 1: - return self._new(self._val(space)[0] * times) + return self._new(self._multi_chr(self._val(space)[0]) * times) return self._new(self._val(space) * times) descr_rmul = descr_mul @@ -134,7 +135,7 @@ d = width - len(value) if d > 0: offset = d//2 + (d & width & 1) - fillchar = self._multi_chr(fillchar) + fillchar = self._multi_chr(fillchar[0]) centered = offset * fillchar + value + (d - offset) * fillchar else: centered = value @@ -177,7 +178,7 @@ if not value: return self._empty() - if self._use_rstr_ops(value, self): + if self._use_rstr_ops(space, self): splitted = value.split(self._chr('\t')) else: splitted = split(value, self._chr('\t')) @@ -189,7 +190,7 @@ expanded = oldtoken = splitted.pop(0) for token in splitted: - expanded += self._multi_chr(' ') * self._tabindent(oldtoken, + expanded += self._multi_chr(self._chr(' ')) * self._tabindent(oldtoken, tabsize) + token oldtoken = token @@ -391,7 +392,7 @@ # XXX Maybe the extra copy here is okay? It was basically going to # happen anyway, what with being placed into the builder unwrapped.append(self._op_val(space, w_s)) - prealloc_size += len(unwrapped[0]) + prealloc_size += len(unwrapped[i]) sb = self._builder(prealloc_size) for i in range(size): @@ -412,7 +413,7 @@ "ljust() argument 2 must be a single character") d = width - len(value) if d > 0: - fillchar = self._multi_chr(fillchar) + fillchar = self._multi_chr(fillchar[0]) value += d * fillchar return self._new(value) @@ -426,7 +427,7 @@ "rjust() argument 2 must be a single character") d = width - len(value) if d > 0: - fillchar = self._multi_chr(fillchar) + fillchar = self._multi_chr(fillchar[0]) value = d * fillchar + value return self._new(value) @@ -439,63 +440,61 @@ return self._new(builder.build()) def descr_partition(self, space, w_sub): + from pypy.objspace.std.bytearrayobject import W_BytearrayObject value = self._val(space) if self._use_rstr_ops(space, w_sub): sub = self._op_val(space, w_sub) sublen = len(sub) + if sublen == 0: + raise oefmt(space.w_ValueError, "empty separator") + + pos = value.find(sub) else: sub = _get_buffer(space, w_sub) sublen = sub.getlength() + if sublen == 0: + raise oefmt(space.w_ValueError, "empty separator") - if sublen == 0: - raise oefmt(space.w_ValueError, "empty separator") - - if self._use_rstr_ops(space, w_sub): - pos = value.find(sub) - else: pos = search(value, sub, 0, len(value), SEARCH_FIND) + if pos != -1 and isinstance(self, W_BytearrayObject): + w_sub = self._new_from_buffer(sub) if pos == -1: - from pypy.objspace.std.bytearrayobject import W_BytearrayObject if isinstance(self, W_BytearrayObject): self = self._new(value) return space.newtuple([self, self._empty(), self._empty()]) else: - from pypy.objspace.std.bytearrayobject import W_BytearrayObject - if isinstance(self, W_BytearrayObject): - w_sub = self._new_from_buffer(sub) return space.newtuple( [self._sliced(space, value, 0, pos, self), w_sub, self._sliced(space, value, pos + sublen, len(value), self)]) def descr_rpartition(self, space, w_sub): + from pypy.objspace.std.bytearrayobject import W_BytearrayObject value = self._val(space) if self._use_rstr_ops(space, w_sub): sub = self._op_val(space, w_sub) sublen = len(sub) + if sublen == 0: + raise oefmt(space.w_ValueError, "empty separator") + + pos = value.rfind(sub) else: sub = _get_buffer(space, w_sub) sublen = sub.getlength() + if sublen == 0: + raise oefmt(space.w_ValueError, "empty separator") - if sublen == 0: - raise oefmt(space.w_ValueError, "empty separator") - - if self._use_rstr_ops(space, w_sub): - pos = value.rfind(sub) - else: pos = search(value, sub, 0, len(value), SEARCH_RFIND) + if pos != -1 and isinstance(self, W_BytearrayObject): + w_sub = self._new_from_buffer(sub) if pos == -1: - from pypy.objspace.std.bytearrayobject import W_BytearrayObject if isinstance(self, W_BytearrayObject): self = self._new(value) return space.newtuple([self._empty(), self._empty(), self]) else: - from pypy.objspace.std.bytearrayobject import W_BytearrayObject - if isinstance(self, W_BytearrayObject): - w_sub = self._new_from_buffer(sub) return space.newtuple( [self._sliced(space, value, 0, pos, self), w_sub, self._sliced(space, value, pos + sublen, len(value), self)]) @@ -715,7 +714,7 @@ def descr_zfill(self, space, width): selfval = self._val(space) if len(selfval) == 0: - return self._new(self._chr('0') * width) + return self._new(self._multi_chr(self._chr('0')) * width) num_zeros = width - len(selfval) if num_zeros <= 0: # cannot return self, in case it is a subclass of str diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -36,6 +36,8 @@ return search(obj, other, start, end, SEARCH_FIND) def rfind(obj, other, start, end): return search(obj, other, start, end, SEARCH_RFIND) + def count(obj, other, start, end): + return search(obj, other, start, end, SEARCH_COUNT) else: assert isinstance(value, str) or isinstance(value, unicode) assert isinstance(other, str) or isinstance(other, unicode) @@ -43,8 +45,10 @@ return obj.find(other, start, end) def rfind(obj, other, start, end): return obj.rfind(other, start, end) + def count(obj, other, start, end): + return obj.count(other, start, end) - return getitem, getlength, find, rfind + return getitem, getlength, find, rfind, count @specialize.argtype(0) def _isspace(char): @@ -90,7 +94,7 @@ assert isinstance(by, str) else: assert isinstance(by, unicode) - _, _, find, _ = _get_access_functions(value, by) + _, _, find, _, count = _get_access_functions(value, by) bylen = len(by) if bylen == 0: raise ValueError("empty separator") @@ -99,7 +103,7 @@ if bylen == 1: # fast path: uses str.rfind(character) and str.count(character) by = by[0] # annotator hack: string -> char - count = value.count(by) + count = count(value, by, 0, len(value)) if 0 <= maxsplit < count: count = maxsplit res = newlist_hint(count + 1) @@ -173,7 +177,7 @@ res = newlist_hint(min(maxsplit + 1, len(value))) else: res = [] - _, _, _, rfind = _get_access_functions(value, by) + _, _, _, rfind, _ = _get_access_functions(value, by) end = len(value) bylen = len(by) if bylen == 0: @@ -212,7 +216,7 @@ if maxsplit == 0: return input - _, _, find, _ = _get_access_functions(input, sub) + _, _, find, _, count = _get_access_functions(input, sub) if not sub: upper = len(input) @@ -236,7 +240,7 @@ builder.append_slice(input, upper, len(input)) else: # First compute the exact result size - count = input.count(sub) + count = count(input, sub, 0, len(input)) if count > maxsplit and maxsplit > 0: count = maxsplit diff_len = len(by) - len(sub) @@ -317,7 +321,7 @@ @specialize.argtype(0, 1) def search(value, other, start, end, mode): - getitem, getlength, _, _ = _get_access_functions(value, other) + getitem, getlength, _, _, _ = _get_access_functions(value, other) if start < 0: start = 0 if end > len(value): @@ -571,7 +575,7 @@ def append_multiple_char(self, c, times): assert isinstance(c, str) - self.l.extend([c] * times) + self.l.extend([c[0]] * times) def append_charpsize(self, s, size): assert size >= 0 diff --git a/rpython/rtyper/rlist.py b/rpython/rtyper/rlist.py --- a/rpython/rtyper/rlist.py +++ b/rpython/rtyper/rlist.py @@ -293,6 +293,11 @@ v_lst, v_factor = hop.inputargs(r_lst, Signed) return hop.gendirectcall(ll_mul, cRESLIST, v_lst, v_factor) +class __extend__(pairtype(IntegerRepr, AbstractBaseListRepr)): + def rtype_mul((r_int, r_lst), hop): + cRESLIST = hop.inputconst(Void, hop.r_result.LIST) + v_factor, v_lst = hop.inputargs(Signed, r_lst) + return hop.gendirectcall(ll_mul, cRESLIST, v_lst, v_factor) class __extend__(pairtype(AbstractListRepr, IntegerRepr)): diff --git a/rpython/rtyper/test/test_rlist.py b/rpython/rtyper/test/test_rlist.py --- a/rpython/rtyper/test/test_rlist.py +++ b/rpython/rtyper/test/test_rlist.py @@ -946,6 +946,15 @@ for arg in (1, 9, 0, -1, -27): res = self.interpret(fn, [arg]) assert res == fn(arg) + def fn(i): + lst = i * [i, i + 1] + ret = len(lst) + if ret: + ret *= lst[-1] + return ret + for arg in (1, 9, 0, -1, -27): + res = self.interpret(fn, [arg]) + assert res == fn(arg) def test_list_inplace_multiply(self): def fn(i): From noreply at buildbot.pypy.org Mon Jun 2 19:47:03 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 2 Jun 2014 19:47:03 +0200 (CEST) Subject: [pypy-commit] pypy fix-bytearray-complexity: Most bytearray methods fixed Message-ID: <20140602174703.5843F1C0026@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: fix-bytearray-complexity Changeset: r71872:3521f66aed64 Date: 2014-05-26 03:48 -0500 http://bitbucket.org/pypy/pypy/changeset/3521f66aed64/ Log: Most bytearray methods fixed diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py --- a/pypy/objspace/std/bytearrayobject.py +++ b/pypy/objspace/std/bytearrayobject.py @@ -1,7 +1,7 @@ """The builtin bytearray implementation""" from rpython.rlib.objectmodel import ( - import_from_mixin, newlist_hint, resizelist_hint) + import_from_mixin, newlist_hint, resizelist_hint, specialize) from rpython.rlib.buffer import Buffer from rpython.rlib.rstring import StringBuilder @@ -11,7 +11,7 @@ from pypy.interpreter.signature import Signature from pypy.objspace.std.sliceobject import W_SliceObject from pypy.objspace.std.stdtypedef import StdTypeDef -from pypy.objspace.std.stringmethods import StringMethods +from pypy.objspace.std.stringmethods import StringMethods, _get_buffer from pypy.objspace.std.util import get_positive_index NON_HEX_MSG = "non-hexadecimal number found in fromhex() arg at position %d" @@ -40,7 +40,11 @@ return ''.join(self.data) def _new(self, value): - return W_BytearrayObject(_make_data(value)) + return W_BytearrayObject(value) + + def _new_from_buffer(self, buffer): + length = buffer.getlength() + return W_BytearrayObject([buffer.getitem(i) for i in range(length)]) def _new_from_list(self, value): return W_BytearrayObject(value) @@ -58,7 +62,12 @@ raise oefmt(space.w_IndexError, "bytearray index out of range") return space.wrap(ord(character)) - _val = charbuf_w + def _val(self, space): + return self.data + + @staticmethod + def _use_rstr_ops(space, w_other): + return False @staticmethod def _op_val(space, w_other): @@ -68,7 +77,9 @@ assert len(char) == 1 return str(char)[0] - _builder = StringBuilder + @staticmethod + def _builder(size=100): + return BytearrayBuilder(size) def _newlist_unwrapped(self, space, res): return space.newlist([W_BytearrayObject(_make_data(i)) for i in res]) @@ -260,58 +271,116 @@ return space.wrap(''.join(self.data)) def descr_eq(self, space, w_other): + if isinstance(w_other, W_BytearrayObject): + return space.newbool(self.data == w_other.data) + try: - res = self._val(space) == self._op_val(space, w_other) + buffer = _get_buffer(space, w_other) except OperationError as e: if e.match(space, space.w_TypeError): return space.w_NotImplemented raise - return space.newbool(res) + + value = self._val(space) + buffer_len = buffer.getlength() + + if len(value) != buffer_len: + return space.newbool(False) + + min_length = min(len(value), buffer_len) + return space.newbool(_memcmp(value, buffer, min_length) == 0) def descr_ne(self, space, w_other): + if isinstance(w_other, W_BytearrayObject): + return space.newbool(self.data != w_other.data) + try: - res = self._val(space) != self._op_val(space, w_other) + buffer = _get_buffer(space, w_other) except OperationError as e: if e.match(space, space.w_TypeError): return space.w_NotImplemented raise - return space.newbool(res) + + value = self._val(space) + buffer_len = buffer.getlength() + + if len(value) != buffer_len: + return space.newbool(True) + + min_length = min(len(value), buffer_len) + return space.newbool(_memcmp(value, buffer, min_length) != 0) def descr_lt(self, space, w_other): + if isinstance(w_other, W_BytearrayObject): + return space.newbool(self.data < w_other.data) + try: - res = self._val(space) < self._op_val(space, w_other) + buffer = _get_buffer(space, w_other) except OperationError as e: if e.match(space, space.w_TypeError): return space.w_NotImplemented raise - return space.newbool(res) + + value = self._val(space) + buffer_len = buffer.getlength() + + cmp = _memcmp(value, buffer, min(len(value), buffer_len)) + return space.newbool( + cmp < 0 or (cmp == 0 and space.newbool(len(value) < buffer_len))) def descr_le(self, space, w_other): + if isinstance(w_other, W_BytearrayObject): + return space.newbool(self.data <= w_other.data) + try: - res = self._val(space) <= self._op_val(space, w_other) + buffer = _get_buffer(space, w_other) except OperationError as e: if e.match(space, space.w_TypeError): return space.w_NotImplemented raise - return space.newbool(res) + + value = self._val(space) + buffer_len = buffer.getlength() + + cmp = _memcmp(value, buffer, min(len(value), buffer_len)) + return space.newbool( + cmp < 0 or (cmp == 0 and space.newbool(len(value) <= buffer_len))) def descr_gt(self, space, w_other): + if isinstance(w_other, W_BytearrayObject): + return space.newbool(self.data > w_other.data) + try: - res = self._val(space) > self._op_val(space, w_other) + buffer = _get_buffer(space, w_other) except OperationError as e: if e.match(space, space.w_TypeError): return space.w_NotImplemented raise - return space.newbool(res) + + value = self._val(space) + buffer_len = buffer.getlength() + + cmp = _memcmp(value, buffer, min(len(value), buffer_len)) + return space.newbool( + cmp > 0 or (cmp == 0 and space.newbool(len(value) > buffer_len))) def descr_ge(self, space, w_other): + if isinstance(w_other, W_BytearrayObject): + return space.newbool(self.data >= w_other.data) + try: - res = self._val(space) >= self._op_val(space, w_other) + buffer = _get_buffer(space, w_other) except OperationError as e: if e.match(space, space.w_TypeError): return space.w_NotImplemented raise - return space.newbool(res) + + value = self._val(space) + buffer_len = buffer.getlength() + + cmp = _memcmp(value, buffer, min(len(value), buffer_len)) + return space.newbool( + cmp > 0 or (cmp == 0 and space.newbool(len(value) >= buffer_len))) def descr_iter(self, space): return space.newseqiter(self) @@ -319,8 +388,11 @@ def descr_inplace_add(self, space, w_other): if isinstance(w_other, W_BytearrayObject): self.data += w_other.data - else: - self.data += self._op_val(space, w_other) + return self + + buffer = _get_buffer(space, w_other) + for i in range(buffer.getlength()): + self.data.append(buffer.getitem(i)) return self def descr_inplace_mul(self, space, w_times): @@ -403,11 +475,42 @@ if space.isinstance_w(w_sub, space.w_int): char = space.int_w(w_sub) return _descr_contains_bytearray(self.data, space, char) + return self._StringMethods_descr_contains(space, w_sub) + def descr_add(self, space, w_other): + if isinstance(w_other, W_BytearrayObject): + return self._new(self.data + w_other.data) + + try: + buffer = _get_buffer(space, w_other) + except OperationError as e: + if e.match(space, space.w_TypeError): + return space.w_NotImplemented + raise + + buffer_len = buffer.getlength() + data = list(self.data + ['\0'] * buffer_len) + for i in range(buffer_len): + data[len(self.data) + i] = buffer.getitem(i) + return self._new(data) + + def descr_reverse(self, space): self.data.reverse() +class BytearrayBuilder(object): + def __init__(self, size): + self.data = newlist_hint(size) + + def append(self, s): + for i in range(len(s)): + self.data.append(s[i]) + + def build(self): + return self.data + + # ____________________________________________________________ # helpers for slow paths, moved out because they contain loops @@ -1152,3 +1255,13 @@ def setitem(self, index, char): self.data[index] = char + + + at specialize.argtype(0) +def _memcmp(selfvalue, buffer, length): + for i in range(length): + if selfvalue[i] < buffer.getitem(i): + return -1 + if selfvalue[i] > buffer.getitem(i): + return 1 + return 0 diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -480,6 +480,11 @@ _val = str_w @staticmethod + def _use_rstr_ops(space, w_other): + from pypy.objspace.std.unicodeobject import W_UnicodeObject + return isinstance(w_other, (W_BytesObject, W_UnicodeObject)) + + @staticmethod def _op_val(space, w_other): try: return space.str_w(w_other) diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -1,7 +1,7 @@ """Functionality shared between bytes/bytearray/unicode""" from rpython.rlib import jit -from rpython.rlib.objectmodel import specialize +from rpython.rlib.objectmodel import specialize, newlist_hint from rpython.rlib.rarithmetic import ovfcheck from rpython.rlib.rstring import endswith, replace, rsplit, split, startswith @@ -36,17 +36,27 @@ def descr_contains(self, space, w_sub): value = self._val(space) - other = self._op_val(space, w_sub) - return space.newbool(value.find(other) >= 0) + if self._use_rstr_ops(space, w_sub): + other = self._op_val(space, w_sub) + return space.newbool(value.find(other) >= 0) + + buffer = _get_buffer(space, w_sub) + res = _search_slowpath(value, buffer, 0, len(value), FAST_FIND) + return space.newbool(res >= 0) def descr_add(self, space, w_other): - try: - other = self._op_val(space, w_other) - except OperationError as e: - if e.match(space, space.w_TypeError): - return space.w_NotImplemented - raise - return self._new(self._val(space) + other) + if self._use_rstr_ops(space, w_other): + try: + other = self._op_val(space, w_other) + except OperationError as e: + if e.match(space, space.w_TypeError): + return space.w_NotImplemented + raise + return self._new(self._val(space) + other) + + # Bytearray overrides this method, CPython doesn't support contacting + # buffers and strs, and unicodes are always handled above + return space.w_NotImplemented def descr_mul(self, space, w_times): try: @@ -128,14 +138,21 @@ def descr_count(self, space, w_sub, w_start=None, w_end=None): value, start, end = self._convert_idx_params(space, w_start, w_end) - return space.newint(value.count(self._op_val(space, w_sub), start, - end)) + + if self._use_rstr_ops(space, w_sub): + return space.newint(value.count(self._op_val(space, w_sub), start, + end)) + + buffer = _get_buffer(space, w_sub) + res = _search_slowpath(value, buffer, start, end, FAST_COUNT) + return space.wrap(max(res, 0)) def descr_decode(self, space, w_encoding=None, w_errors=None): from pypy.objspace.std.unicodeobject import ( _get_encoding_and_errors, decode_object, unicode_from_string) encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors) + # TODO: On CPython calling bytearray.decode with no arguments works. if encoding is None and errors is None: return unicode_from_string(space, self) return decode_object(space, self, encoding, errors) @@ -192,30 +209,52 @@ def descr_find(self, space, w_sub, w_start=None, w_end=None): (value, start, end) = self._convert_idx_params(space, w_start, w_end) - res = value.find(self._op_val(space, w_sub), start, end) + + if self._use_rstr_ops(space, w_sub): + res = value.find(self._op_val(space, w_sub), start, end) + return space.wrap(res) + + buffer = _get_buffer(space, w_sub) + res = _search_slowpath(value, buffer, start, end, FAST_FIND) return space.wrap(res) def descr_rfind(self, space, w_sub, w_start=None, w_end=None): (value, start, end) = self._convert_idx_params(space, w_start, w_end) - res = value.rfind(self._op_val(space, w_sub), start, end) + + if self._use_rstr_ops(space, w_sub): + res = value.rfind(self._op_val(space, w_sub), start, end) + return space.wrap(res) + + buffer = _get_buffer(space, w_sub) + res = _search_slowpath(value, buffer, start, end, FAST_RFIND) return space.wrap(res) def descr_index(self, space, w_sub, w_start=None, w_end=None): (value, start, end) = self._convert_idx_params(space, w_start, w_end) - res = value.find(self._op_val(space, w_sub), start, end) + + if self._use_rstr_ops(space, w_sub): + res = value.find(self._op_val(space, w_sub), start, end) + else: + buffer = _get_buffer(space, w_sub) + res = _search_slowpath(value, buffer, start, end, FAST_FIND) + if res < 0: raise oefmt(space.w_ValueError, "substring not found in string.index") - return space.wrap(res) def descr_rindex(self, space, w_sub, w_start=None, w_end=None): (value, start, end) = self._convert_idx_params(space, w_start, w_end) - res = value.rfind(self._op_val(space, w_sub), start, end) + + if self._use_rstr_ops(space, w_sub): + res = value.rfind(self._op_val(space, w_sub), start, end) + else: + buffer = _get_buffer(space, w_sub) + res = _search_slowpath(value, buffer, start, end, FAST_RFIND) + if res < 0: raise oefmt(space.w_ValueError, "substring not found in string.rindex") - return space.wrap(res) @specialize.arg(2) @@ -328,6 +367,7 @@ value = self._val(space) prealloc_size = len(value) * (size - 1) + unwrapped = newlist_hint(size) for i in range(size): w_s = list_w[i] check_item = self._join_check_item(space, w_s) @@ -337,13 +377,16 @@ i, w_s) elif check_item == 2: return self._join_autoconvert(space, list_w) - prealloc_size += len(self._op_val(space, w_s)) + # XXX Maybe the extra copy here is okay? It was basically going to + # happen anyway, what with being placed into the builder + unwrapped.append(self._op_val(space, w_s)) + prealloc_size += len(unwrapped[0]) sb = self._builder(prealloc_size) for i in range(size): if value and i != 0: sb.append(value) - sb.append(self._op_val(space, list_w[i])) + sb.append(unwrapped[i]) return self._new(sb.build()) def _join_autoconvert(self, space, list_w): @@ -386,10 +429,22 @@ def descr_partition(self, space, w_sub): value = self._val(space) - sub = self._op_val(space, w_sub) - if not sub: + + if self._use_rstr_ops(space, w_sub): + sub = self._op_val(space, w_sub) + sublen = len(sub) + else: + sub = _get_buffer(space, w_sub) + sublen = sub.getlength() + + if sublen == 0: raise oefmt(space.w_ValueError, "empty separator") - pos = value.find(sub) + + if self._use_rstr_ops(space, w_sub): + pos = value.find(sub) + else: + pos = _search_slowpath(value, sub, 0, len(value), FAST_FIND) + if pos == -1: from pypy.objspace.std.bytearrayobject import W_BytearrayObject if isinstance(self, W_BytearrayObject): @@ -398,17 +453,29 @@ else: from pypy.objspace.std.bytearrayobject import W_BytearrayObject if isinstance(self, W_BytearrayObject): - w_sub = self._new(sub) + w_sub = self._new_from_buffer(sub) return space.newtuple( [self._sliced(space, value, 0, pos, self), w_sub, self._sliced(space, value, pos+len(sub), len(value), self)]) def descr_rpartition(self, space, w_sub): value = self._val(space) - sub = self._op_val(space, w_sub) - if not sub: + + if self._use_rstr_ops(space, w_sub): + sub = self._op_val(space, w_sub) + sublen = len(sub) + else: + sub = _get_buffer(space, w_sub) + sublen = sub.getlength() + + if sublen == 0: raise oefmt(space.w_ValueError, "empty separator") - pos = value.rfind(sub) + + if self._use_rstr_ops(space, w_sub): + pos = value.rfind(sub) + else: + pos = _search_slowpath(value, sub, 0, len(value), FAST_RFIND) + if pos == -1: from pypy.objspace.std.bytearrayobject import W_BytearrayObject if isinstance(self, W_BytearrayObject): @@ -417,7 +484,7 @@ else: from pypy.objspace.std.bytearrayobject import W_BytearrayObject if isinstance(self, W_BytearrayObject): - w_sub = self._new(sub) + w_sub = self._new_from_buffer(sub) return space.newtuple( [self._sliced(space, value, 0, pos, self), w_sub, self._sliced(space, value, pos+len(sub), len(value), self)]) @@ -616,10 +683,11 @@ for char in string: buf.append(table[ord(char)]) else: + # XXX Why not preallocate here too? buf = self._builder() deletion_table = [False] * 256 - for c in deletechars: - deletion_table[ord(c)] = True + for i in range(len(deletechars)): + deletion_table[ord(deletechars[i])] = True for char in string: if not deletion_table[ord(char)]: buf.append(table[ord(char)]) @@ -662,3 +730,118 @@ @specialize.argtype(0) def _descr_getslice_slowpath(selfvalue, start, step, sl): return [selfvalue[start + i*step] for i in range(sl)] + +def _get_buffer(space, w_obj): + return space.buffer_w(w_obj, space.BUF_SIMPLE) + + + +# Stolen form rpython.rtyper.lltypesytem.rstr +# TODO: Ask about what to do with this... + +FAST_COUNT = 0 +FAST_FIND = 1 +FAST_RFIND = 2 + +from rpython.rlib.rarithmetic import LONG_BIT as BLOOM_WIDTH + +def bloom_add(mask, c): + return mask | (1 << (ord(c) & (BLOOM_WIDTH - 1))) + + +def bloom(mask, c): + return mask & (1 << (ord(c) & (BLOOM_WIDTH - 1))) + + at specialize.argtype(0, 1) +def _search_slowpath(value, buffer, start, end, mode): + if start < 0: + start = 0 + if end > len(value): + end = len(value) + if start > end: + return -1 + + count = 0 + n = end - start + m = buffer.getlength() + + if m == 0: + if mode == FAST_COUNT: + return end - start + 1 + elif mode == FAST_RFIND: + return end + else: + return start + + w = n - m + + if w < 0: + return -1 + + mlast = m - 1 + skip = mlast - 1 + mask = 0 + + if mode != FAST_RFIND: + for i in range(mlast): + mask = bloom_add(mask, buffer.getitem(i)) + if buffer.getitem(i) == buffer.getitem(mlast): + skip = mlast - i - 1 + mask = bloom_add(mask, buffer.getitem(mlast)) + + i = start - 1 + while i + 1 <= start + w: + i += 1 + if value[i + m - 1] == buffer.getitem(m - 1): + for j in range(mlast): + if value[i + j] != buffer.getitem(j): + break + else: + if mode != FAST_COUNT: + return i + count += 1 + i += mlast + continue + + if i + m < len(value): + c = value[i + m] + else: + c = '\0' + if not bloom(mask, c): + i += m + else: + i += skip + else: + if i + m < len(value): + c = value[i + m] + else: + c = '\0' + if not bloom(mask, c): + i += m + else: + mask = bloom_add(mask, buffer.getitem(0)) + for i in range(mlast, 0, -1): + mask = bloom_add(mask, buffer.getitem(i)) + if buffer.getitem(i) == buffer.getitem(0): + skip = i - 1 + + i = start + w + 1 + while i - 1 >= start: + i -= 1 + if value[i] == buffer.getitem(0): + for j in xrange(mlast, 0, -1): + if value[i + j] != buffer.getitem(j): + break + else: + return i + if i - 1 >= 0 and not bloom(mask, value[i - 1]): + i -= m + else: + i -= skip + else: + if i - 1 >= 0 and not bloom(mask, value[i - 1]): + i -= m + + if mode != FAST_COUNT: + return -1 + return count diff --git a/pypy/objspace/std/test/test_bytearrayobject.py b/pypy/objspace/std/test/test_bytearrayobject.py --- a/pypy/objspace/std/test/test_bytearrayobject.py +++ b/pypy/objspace/std/test/test_bytearrayobject.py @@ -178,8 +178,10 @@ assert bytearray('hello').rindex('l') == 3 assert bytearray('hello').index(bytearray('e')) == 1 assert bytearray('hello').find('l') == 2 + assert bytearray('hello').find('l', -2) == 3 assert bytearray('hello').rfind('l') == 3 + # these checks used to not raise in pypy but they should raises(TypeError, bytearray('hello').index, ord('e')) raises(TypeError, bytearray('hello').rindex, ord('e')) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -103,6 +103,12 @@ _val = unicode_w @staticmethod + def _use_rstr_ops(space, w_other): + # Always return true because we always need to copy the other + # operand(s) before we can do comparisons + return True + + @staticmethod def _op_val(space, w_other): if isinstance(w_other, W_UnicodeObject): return w_other._value From noreply at buildbot.pypy.org Mon Jun 2 19:47:04 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 2 Jun 2014 19:47:04 +0200 (CEST) Subject: [pypy-commit] pypy fix-bytearray-complexity: All bytearray ops work except for .replace. Some operands are still copied Message-ID: <20140602174704.D85321C0026@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: fix-bytearray-complexity Changeset: r71873:fd4cad71fdab Date: 2014-05-26 09:04 -0500 http://bitbucket.org/pypy/pypy/changeset/fd4cad71fdab/ Log: All bytearray ops work except for .replace. Some operands are still copied diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py --- a/pypy/objspace/std/bytearrayobject.py +++ b/pypy/objspace/std/bytearrayobject.py @@ -62,6 +62,10 @@ raise oefmt(space.w_IndexError, "bytearray index out of range") return space.wrap(ord(character)) + def _fillchar(self, space, w_fillchar): + c = self._op_val(space, w_fillchar) + return [c], len(c) + def _val(self, space): return self.data @@ -77,6 +81,9 @@ assert len(char) == 1 return str(char)[0] + def _multi_chr(self, char): + return [self._chr(char)] + @staticmethod def _builder(size=100): return BytearrayBuilder(size) @@ -495,7 +502,6 @@ data[len(self.data) + i] = buffer.getitem(i) return self._new(data) - def descr_reverse(self, space): self.data.reverse() @@ -507,6 +513,13 @@ for i in range(len(s)): self.data.append(s[i]) + def append_multiple_char(self, c, count): + self.data.extend([c] * count) + + def append_slice(self, value, start, end): + for i in range(start, end): + self.data.append(value[i]) + def build(self): return self.data diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -3,7 +3,9 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import specialize, newlist_hint from rpython.rlib.rarithmetic import ovfcheck -from rpython.rlib.rstring import endswith, replace, rsplit, split, startswith +from rpython.rlib.rstring import ( + search, SEARCH_FIND, SEARCH_RFIND, SEARCH_COUNT, endswith, replace, rsplit, + split, startswith) from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import WrappedDefault, unwrap_spec @@ -28,6 +30,9 @@ space, lenself, w_start, w_end, upper_bound=upper_bound) return (value, start, end) + def _multi_chr(self, c): + return self._chr(c) + def descr_len(self, space): return space.wrap(self._len()) @@ -41,7 +46,7 @@ return space.newbool(value.find(other) >= 0) buffer = _get_buffer(space, w_sub) - res = _search_slowpath(value, buffer, 0, len(value), FAST_FIND) + res = search(value, buffer, 0, len(value), SEARCH_FIND) return space.newbool(res >= 0) def descr_add(self, space, w_other): @@ -129,7 +134,7 @@ d = width - len(value) if d > 0: offset = d//2 + (d & width & 1) - fillchar = fillchar[0] # annotator hint: it's a single character + fillchar = self._multi_chr(fillchar) centered = offset * fillchar + value + (d - offset) * fillchar else: centered = value @@ -144,7 +149,7 @@ end)) buffer = _get_buffer(space, w_sub) - res = _search_slowpath(value, buffer, start, end, FAST_COUNT) + res = search(value, buffer, start, end, SEARCH_COUNT) return space.wrap(max(res, 0)) def descr_decode(self, space, w_encoding=None, w_errors=None): @@ -152,7 +157,6 @@ _get_encoding_and_errors, decode_object, unicode_from_string) encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors) - # TODO: On CPython calling bytearray.decode with no arguments works. if encoding is None and errors is None: return unicode_from_string(space, self) return decode_object(space, self, encoding, errors) @@ -170,7 +174,11 @@ if not value: return self._empty() - splitted = value.split(self._chr('\t')) + if self._use_rstr_ops(value, self): + splitted = value.split(self._chr('\t')) + else: + splitted = split(value, self._chr('\t')) + try: ovfcheck(len(splitted) * tabsize) except OverflowError: @@ -178,7 +186,7 @@ expanded = oldtoken = splitted.pop(0) for token in splitted: - expanded += self._chr(' ') * self._tabindent(oldtoken, + expanded += self._multi_chr(' ') * self._tabindent(oldtoken, tabsize) + token oldtoken = token @@ -215,7 +223,7 @@ return space.wrap(res) buffer = _get_buffer(space, w_sub) - res = _search_slowpath(value, buffer, start, end, FAST_FIND) + res = search(value, buffer, start, end, SEARCH_FIND) return space.wrap(res) def descr_rfind(self, space, w_sub, w_start=None, w_end=None): @@ -226,7 +234,7 @@ return space.wrap(res) buffer = _get_buffer(space, w_sub) - res = _search_slowpath(value, buffer, start, end, FAST_RFIND) + res = search(value, buffer, start, end, SEARCH_RFIND) return space.wrap(res) def descr_index(self, space, w_sub, w_start=None, w_end=None): @@ -236,7 +244,7 @@ res = value.find(self._op_val(space, w_sub), start, end) else: buffer = _get_buffer(space, w_sub) - res = _search_slowpath(value, buffer, start, end, FAST_FIND) + res = search(value, buffer, start, end, SEARCH_FIND) if res < 0: raise oefmt(space.w_ValueError, @@ -250,7 +258,7 @@ res = value.rfind(self._op_val(space, w_sub), start, end) else: buffer = _get_buffer(space, w_sub) - res = _search_slowpath(value, buffer, start, end, FAST_RFIND) + res = search(value, buffer, start, end, SEARCH_RFIND) if res < 0: raise oefmt(space.w_ValueError, @@ -401,7 +409,7 @@ "ljust() argument 2 must be a single character") d = width - len(value) if d > 0: - fillchar = fillchar[0] # annotator hint: it's a single character + fillchar = self._multi_chr(fillchar) value += d * fillchar return self._new(value) @@ -415,7 +423,7 @@ "rjust() argument 2 must be a single character") d = width - len(value) if d > 0: - fillchar = fillchar[0] # annotator hint: it's a single character + fillchar = self._multi_chr(fillchar) value = d * fillchar + value return self._new(value) @@ -443,7 +451,7 @@ if self._use_rstr_ops(space, w_sub): pos = value.find(sub) else: - pos = _search_slowpath(value, sub, 0, len(value), FAST_FIND) + pos = search(value, sub, 0, len(value), SEARCH_FIND) if pos == -1: from pypy.objspace.std.bytearrayobject import W_BytearrayObject @@ -456,7 +464,7 @@ w_sub = self._new_from_buffer(sub) return space.newtuple( [self._sliced(space, value, 0, pos, self), w_sub, - self._sliced(space, value, pos+len(sub), len(value), self)]) + self._sliced(space, value, pos + sublen, len(value), self)]) def descr_rpartition(self, space, w_sub): value = self._val(space) @@ -474,7 +482,7 @@ if self._use_rstr_ops(space, w_sub): pos = value.rfind(sub) else: - pos = _search_slowpath(value, sub, 0, len(value), FAST_RFIND) + pos = search(value, sub, 0, len(value), SEARCH_RFIND) if pos == -1: from pypy.objspace.std.bytearrayobject import W_BytearrayObject @@ -487,7 +495,7 @@ w_sub = self._new_from_buffer(sub) return space.newtuple( [self._sliced(space, value, 0, pos, self), w_sub, - self._sliced(space, value, pos+len(sub), len(value), self)]) + self._sliced(space, value, pos + sublen, len(value), self)]) @unwrap_spec(count=int) def descr_replace(self, space, w_old, w_new, count=-1): @@ -735,113 +743,3 @@ return space.buffer_w(w_obj, space.BUF_SIMPLE) - -# Stolen form rpython.rtyper.lltypesytem.rstr -# TODO: Ask about what to do with this... - -FAST_COUNT = 0 -FAST_FIND = 1 -FAST_RFIND = 2 - -from rpython.rlib.rarithmetic import LONG_BIT as BLOOM_WIDTH - -def bloom_add(mask, c): - return mask | (1 << (ord(c) & (BLOOM_WIDTH - 1))) - - -def bloom(mask, c): - return mask & (1 << (ord(c) & (BLOOM_WIDTH - 1))) - - at specialize.argtype(0, 1) -def _search_slowpath(value, buffer, start, end, mode): - if start < 0: - start = 0 - if end > len(value): - end = len(value) - if start > end: - return -1 - - count = 0 - n = end - start - m = buffer.getlength() - - if m == 0: - if mode == FAST_COUNT: - return end - start + 1 - elif mode == FAST_RFIND: - return end - else: - return start - - w = n - m - - if w < 0: - return -1 - - mlast = m - 1 - skip = mlast - 1 - mask = 0 - - if mode != FAST_RFIND: - for i in range(mlast): - mask = bloom_add(mask, buffer.getitem(i)) - if buffer.getitem(i) == buffer.getitem(mlast): - skip = mlast - i - 1 - mask = bloom_add(mask, buffer.getitem(mlast)) - - i = start - 1 - while i + 1 <= start + w: - i += 1 - if value[i + m - 1] == buffer.getitem(m - 1): - for j in range(mlast): - if value[i + j] != buffer.getitem(j): - break - else: - if mode != FAST_COUNT: - return i - count += 1 - i += mlast - continue - - if i + m < len(value): - c = value[i + m] - else: - c = '\0' - if not bloom(mask, c): - i += m - else: - i += skip - else: - if i + m < len(value): - c = value[i + m] - else: - c = '\0' - if not bloom(mask, c): - i += m - else: - mask = bloom_add(mask, buffer.getitem(0)) - for i in range(mlast, 0, -1): - mask = bloom_add(mask, buffer.getitem(i)) - if buffer.getitem(i) == buffer.getitem(0): - skip = i - 1 - - i = start + w + 1 - while i - 1 >= start: - i -= 1 - if value[i] == buffer.getitem(0): - for j in xrange(mlast, 0, -1): - if value[i + j] != buffer.getitem(j): - break - else: - return i - if i - 1 >= 0 and not bloom(mask, value[i - 1]): - i -= m - else: - i -= skip - else: - if i - 1 >= 0 and not bloom(mask, value[i - 1]): - i -= m - - if mode != FAST_COUNT: - return -1 - return count diff --git a/pypy/objspace/std/test/test_bytearrayobject.py b/pypy/objspace/std/test/test_bytearrayobject.py --- a/pypy/objspace/std/test/test_bytearrayobject.py +++ b/pypy/objspace/std/test/test_bytearrayobject.py @@ -442,6 +442,7 @@ u = b.decode('utf-8') assert isinstance(u, unicode) assert u == u'abcdefghi' + assert b.decode() def test_int(self): assert int(bytearray('-1234')) == -1234 diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -7,7 +7,8 @@ from rpython.rtyper.llannotation import SomePtr from rpython.rlib import jit from rpython.rlib.objectmodel import newlist_hint, specialize -from rpython.rlib.rarithmetic import ovfcheck +from rpython.rlib.rarithmetic import ovfcheck, LONG_BIT as BLOOM_WIDTH +from rpython.rlib.buffer import Buffer from rpython.rlib.unicodedata import unicodedb_5_2_0 as unicodedb from rpython.rtyper.extregistry import ExtRegistryEntry from rpython.tool.pairtype import pairtype @@ -15,6 +16,35 @@ # -------------- public API for string functions ----------------------- + at specialize.argtype(0, 1) +def _get_access_functions(value, other): + if isinstance(other, (str, unicode, list)): + def getitem(obj, i): + return obj[i] + def getlength(obj): + return len(obj) + else: + assert isinstance(other, Buffer) + def getitem(obj, i): + return obj.getitem(i) + def getlength(obj): + return obj.getlength() + + if isinstance(value, list) or isinstance(other, Buffer): + def find(obj, other, start, end): + return search(obj, other, start, end, SEARCH_FIND) + def rfind(obj, other, start, end): + return search(obj, other, start, end, SEARCH_RFIND) + else: + assert isinstance(value, (str, unicode)) + assert isinstance(other, (str, unicode)) + def find(obj, other, start, end): + return obj.find(other, start, end) + def rfind(obj, other, start, end): + return obj.rfind(other, start, end) + + return getitem, getlength, find, rfind + @specialize.argtype(0) def _isspace(char): if isinstance(char, str): @@ -55,10 +85,11 @@ i = j + 1 return res - if isinstance(value, str): + if isinstance(value, (list, str)): assert isinstance(by, str) else: assert isinstance(by, unicode) + _, _, find, _ = _get_access_functions(value, by) bylen = len(by) if bylen == 0: raise ValueError("empty separator") @@ -72,7 +103,7 @@ count = maxsplit res = newlist_hint(count + 1) while count > 0: - next = value.find(by, start) + next = find(value, by, start, len(value)) assert next >= 0 # cannot fail due to the value.count above res.append(value[start:next]) start = next + bylen @@ -86,7 +117,7 @@ res = [] while maxsplit != 0: - next = value.find(by, start) + next = find(value, by, start, len(value)) if next < 0: break res.append(value[start:next]) @@ -133,7 +164,7 @@ res.reverse() return res - if isinstance(value, str): + if isinstance(value, (list, str)): assert isinstance(by, str) else: assert isinstance(by, unicode) @@ -141,13 +172,14 @@ res = newlist_hint(min(maxsplit + 1, len(value))) else: res = [] + _, _, _, rfind = _get_access_functions(value, by) end = len(value) bylen = len(by) if bylen == 0: raise ValueError("empty separator") while maxsplit != 0: - next = value.rfind(by, 0, end) + next = rfind(value, by, 0, end) if next < 0: break res.append(value[next + bylen:end]) @@ -166,13 +198,20 @@ assert isinstance(sub, str) assert isinstance(by, str) Builder = StringBuilder - else: + elif isinstance(input, unicode): assert isinstance(sub, unicode) assert isinstance(by, unicode) Builder = UnicodeBuilder + elif isinstance(input, list): + assert isinstance(sub, str) + assert isinstance(by, str) + # TODO: ???? + Builder = StringBuilder if maxsplit == 0: return input + _, _, find, _ = _get_access_functions(input, sub) + if not sub: upper = len(input) if maxsplit > 0 and maxsplit < upper + 2: @@ -210,7 +249,7 @@ sublen = len(sub) while maxsplit != 0: - next = input.find(sub, start) + next = find(input, sub, start, len(input)) if next < 0: break builder.append_slice(input, start, next) @@ -261,6 +300,114 @@ return False return True +# Stolen form rpython.rtyper.lltypesytem.rstr +# TODO: Ask about what to do with this... + +SEARCH_COUNT = 0 +SEARCH_FIND = 1 +SEARCH_RFIND = 2 + +def bloom_add(mask, c): + return mask | (1 << (ord(c) & (BLOOM_WIDTH - 1))) + +def bloom(mask, c): + return mask & (1 << (ord(c) & (BLOOM_WIDTH - 1))) + + at specialize.argtype(0, 1) +def search(value, other, start, end, mode): + getitem, getlength, _, _ = _get_access_functions(value, other) + if start < 0: + start = 0 + if end > len(value): + end = len(value) + if start > end: + return -1 + + count = 0 + n = end - start + m = getlength(other) + + if m == 0: + if mode == SEARCH_COUNT: + return end - start + 1 + elif mode == SEARCH_RFIND: + return end + else: + return start + + w = n - m + + if w < 0: + return -1 + + mlast = m - 1 + skip = mlast - 1 + mask = 0 + + if mode != SEARCH_RFIND: + for i in range(mlast): + mask = bloom_add(mask, getitem(other, i)) + if getitem(other, i) == getitem(other, mlast): + skip = mlast - i - 1 + mask = bloom_add(mask, getitem(other, mlast)) + + i = start - 1 + while i + 1 <= start + w: + i += 1 + if value[i + m - 1] == getitem(other, m - 1): + for j in range(mlast): + if value[i + j] != getitem(other, j): + break + else: + if mode != SEARCH_COUNT: + return i + count += 1 + i += mlast + continue + + if i + m < len(value): + c = value[i + m] + else: + c = '\0' + if not bloom(mask, c): + i += m + else: + i += skip + else: + if i + m < len(value): + c = value[i + m] + else: + c = '\0' + if not bloom(mask, c): + i += m + else: + mask = bloom_add(mask, getitem(other, 0)) + for i in range(mlast, 0, -1): + mask = bloom_add(mask, getitem(other, i)) + if getitem(other, i) == getitem(other, 0): + skip = i - 1 + + i = start + w + 1 + while i - 1 >= start: + i -= 1 + if value[i] == getitem(other, 0): + for j in xrange(mlast, 0, -1): + if value[i + j] != getitem(other, j): + break + else: + return i + if i - 1 >= 0 and not bloom(mask, value[i - 1]): + i -= m + else: + i -= skip + else: + if i - 1 >= 0 and not bloom(mask, value[i - 1]): + i -= m + + if mode != SEARCH_COUNT: + return -1 + return count + # -------------- numeric parsing support -------------------- def strip_spaces(s): From noreply at buildbot.pypy.org Mon Jun 2 19:47:11 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 2 Jun 2014 19:47:11 +0200 (CEST) Subject: [pypy-commit] pypy fix-bytearray-complexity: Add to RPython support for __getitem__, __setitem, __getslice__, __setslice__, and __len__ Message-ID: <20140602174711.41EAB1C0026@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: fix-bytearray-complexity Changeset: r71878:5e3423ac75bf Date: 2014-06-01 05:26 -0500 http://bitbucket.org/pypy/pypy/changeset/5e3423ac75bf/ Log: Add to RPython support for __getitem__, __setitem, __getslice__, __setslice__, and __len__ diff --git a/rpython/annotator/binaryop.py b/rpython/annotator/binaryop.py --- a/rpython/annotator/binaryop.py +++ b/rpython/annotator/binaryop.py @@ -719,6 +719,14 @@ return super(thistype, pair(ins1, ins2)).improve() +class __extend__(pairtype(SomeInstance, SomeObject)): + def getitem((s_ins, s_idx)): + return s_ins._emulate_call("__getitem__", s_idx) + + def setitem((s_ins, s_idx), s_value): + return s_ins._emulate_call("__setitem__", s_idx, s_value) + + class __extend__(pairtype(SomeIterator, SomeIterator)): def union((iter1, iter2)): diff --git a/rpython/annotator/test/test_annrpython.py b/rpython/annotator/test/test_annrpython.py --- a/rpython/annotator/test/test_annrpython.py +++ b/rpython/annotator/test/test_annrpython.py @@ -3937,6 +3937,78 @@ s = a.build_types(fn, [int]) assert isinstance(s, annmodel.SomeInteger) + def test_instance_getitem(self): + class A(object): + def __getitem__(self, i): + return i * i + + def fn(i): + a = A() + return a[i] + + a = self.RPythonAnnotator() + s = a.build_types(fn, [int]) + assert len(a.translator.graphs) == 2 # fn, __getitem__ + assert isinstance(s, annmodel.SomeInteger) + + def test_instance_setitem(self): + class A(object): + def __setitem__(self, i, v): + self.value = i * v + + def fn(i, v): + a = A() + a[i] = v + return a.value + + a = self.RPythonAnnotator() + s = a.build_types(fn, [int, int]) + assert len(a.translator.graphs) == 2 # fn, __setitem__ + assert isinstance(s, annmodel.SomeInteger) + + def test_instance_getslice(self): + class A(object): + def __getslice__(self, stop, start): + return "Test"[stop:start] + + def fn(): + a = A() + return a[0:2] + + a = self.RPythonAnnotator() + s = a.build_types(fn, []) + assert len(a.translator.graphs) == 2 # fn, __getslice__ + assert isinstance(s, annmodel.SomeString) + + def test_instance_setslice(self): + class A(object): + def __setslice__(self, stop, start, value): + self.value = value + + def fn(): + a = A() + a[0:2] = '00' + return a.value + + a = self.RPythonAnnotator() + s = a.build_types(fn, []) + assert len(a.translator.graphs) == 2 # fn, __setslice__ + assert isinstance(s, annmodel.SomeString) + + def test_instance_len(self): + class A(object): + def __len__(self): + return 0 + + def fn(): + a = A() + return len(a) + + a = self.RPythonAnnotator() + s = a.build_types(fn, []) + assert len(a.translator.graphs) == 2 # fn, __len__ + assert isinstance(s, annmodel.SomeInteger) + def test_reversed(self): def fn(n): for elem in reversed([1, 2, 3, 4, 5]): diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py --- a/rpython/annotator/unaryop.py +++ b/rpython/annotator/unaryop.py @@ -683,19 +683,27 @@ if not self.can_be_None: s.const = True + def _emulate_call(self, meth_name, *args_s): + bk = getbookkeeper() + s_attr = self._true_getattr(meth_name) + # record for calltables + bk.emulate_pbc_call(bk.position_key, s_attr, args_s) + return s_attr.call(simple_args(args_s)) + def iter(self): - s_iterable = self._true_getattr('__iter__') - bk = getbookkeeper() - # record for calltables - bk.emulate_pbc_call(bk.position_key, s_iterable, []) - return s_iterable.call(simple_args([])) + return self._emulate_call('__iter__') def next(self): - s_next = self._true_getattr('next') - bk = getbookkeeper() - # record for calltables - bk.emulate_pbc_call(bk.position_key, s_next, []) - return s_next.call(simple_args([])) + return self._emulate_call('next') + + def len(self): + return self._emulate_call('__len__') + + def getslice(self, s_start, s_stop): + return self._emulate_call('__getslice__', s_start, s_stop) + + def setslice(self, s_start, s_stop, s_iterable): + return self._emulate_call('__setslice__', s_start, s_stop, s_iterable) class __extend__(SomeBuiltin): def simple_call(self, *args): diff --git a/rpython/rlib/buffer.py b/rpython/rlib/buffer.py --- a/rpython/rlib/buffer.py +++ b/rpython/rlib/buffer.py @@ -12,6 +12,9 @@ def getlength(self): raise NotImplementedError + def __len__(self): + return self.getlength() + def as_str(self): "Returns an interp-level string with the whole content of the buffer." # May be overridden. @@ -21,14 +24,23 @@ "Returns the index'th character in the buffer." raise NotImplementedError # Must be overriden. No bounds checks. + def __getitem__(self, i): + return self.getitem(i) + def getslice(self, start, stop, step, size): # May be overridden. No bounds checks. return ''.join([self.getitem(i) for i in range(start, stop, step)]) + def __getslice__(self, start, stop): + return self.getslice(start, stop, 1, stop - start) + def setitem(self, index, char): "Write a character into the buffer." raise NotImplementedError # Must be overriden. No bounds checks. + def __setitem__(self, i, char): + return self.setitem(i, char) + def setslice(self, start, string): # May be overridden. No bounds checks. for i in range(len(string)): diff --git a/rpython/rlib/test/test_buffer.py b/rpython/rlib/test/test_buffer.py --- a/rpython/rlib/test/test_buffer.py +++ b/rpython/rlib/test/test_buffer.py @@ -4,7 +4,10 @@ def test_string_buffer(): buf = StringBuffer('hello world') assert buf.getitem(4) == 'o' + assert buf.getitem(4) == buf[4] assert buf.getlength() == 11 + assert buf.getlength() == len(buf) assert buf.getslice(1, 6, 1, 5) == 'ello ' + assert buf.getslice(1, 6, 1, 5) == buf[1:6] assert buf.getslice(1, 6, 2, 3) == 'el ' assert buf.as_str() == 'hello world' diff --git a/rpython/rtyper/rclass.py b/rpython/rtyper/rclass.py --- a/rpython/rtyper/rclass.py +++ b/rpython/rtyper/rclass.py @@ -7,6 +7,7 @@ from rpython.rtyper.lltypesystem.lltype import Void from rpython.rtyper.rmodel import Repr, getgcflavor, inputconst from rpython.rlib.objectmodel import UnboxedValue +from rpython.tool.pairtype import pairtype class FieldListAccessor(object): @@ -390,7 +391,7 @@ raise NotImplementedError def _emulate_call(self, hop, meth_name): - vinst, = hop.inputargs(self) + vinst = hop.args_v[0] clsdef = hop.args_s[0].classdef s_unbound_attr = clsdef.find_attribute(meth_name).getvalue() s_attr = clsdef.lookup_filter(s_unbound_attr, meth_name, @@ -402,10 +403,10 @@ r_method = self.rtyper.getrepr(s_attr) r_method.get_method_from_instance(self, vinst, hop.llops) hop2 = hop.copy() - hop2.spaceop = op.simple_call(hop.spaceop.args[0]) + hop2.spaceop = op.simple_call(*hop.spaceop.args) hop2.spaceop.result = hop.spaceop.result - hop2.args_r = [r_method] - hop2.args_s = [s_attr] + hop2.args_r[0] = r_method + hop2.args_s[0] = s_attr return hop2.dispatch() def rtype_iter(self, hop): @@ -414,6 +415,15 @@ def rtype_next(self, hop): return self._emulate_call(hop, 'next') + def rtype_getslice(self, hop): + return self._emulate_call(hop, "__getslice__") + + def rtype_setslice(self, hop): + return self._emulate_call(hop, "__setslice__") + + def rtype_len(self, hop): + return self._emulate_call(hop, "__len__") + def ll_str(self, i): raise NotImplementedError @@ -460,6 +470,16 @@ if len(seen) == oldlength: break + +class __extend__(pairtype(AbstractInstanceRepr, Repr)): + def rtype_getitem((r_ins, r_obj), hop): + return r_ins._emulate_call(hop, "__getitem__") + + def rtype_setitem((r_ins, r_obj), hop): + return r_ins._emulate_call(hop, "__setitem__") + + + # ____________________________________________________________ def rtype_new_instance(rtyper, classdef, llops, classcallhop=None): diff --git a/rpython/rtyper/test/test_rclass.py b/rpython/rtyper/test/test_rclass.py --- a/rpython/rtyper/test/test_rclass.py +++ b/rpython/rtyper/test/test_rclass.py @@ -1193,6 +1193,69 @@ assert self.interpret(f, [True]) == f(True) assert self.interpret(f, [False]) == f(False) + def test_indexing(self): + class A(object): + def __init__(self, data): + self.data = data + + def __getitem__(self, i): + return self.data[i] + + def __setitem__(self, i, v): + self.data[i] = v + + def __getslice__(self, start, stop): + assert start >= 0 + assert stop >= 0 + return self.data[start:stop] + + def __setslice__(self, start, stop, v): + assert start >= 0 + assert stop >= 0 + i = 0 + for n in range(start, stop): + self.data[n] = v[i] + i += 1 + + def getitem(i): + a = A("abcdefg") + return a[i] + + def setitem(i, v): + a = A([0] * 5) + a[i] = v + return a[i] + + def getslice(start, stop): + a = A([1, 2, 3, 4, 5, 6]) + sum = 0 + for i in a[start:stop]: + sum += i + return sum + + def setslice(start, stop, i): + a = A([0] * stop) + a[start:stop] = range(start, stop) + return a[i] + + assert self.interpret(getitem, [0]) == getitem(0) + assert self.interpret(getitem, [1]) == getitem(1) + assert self.interpret(setitem, [0, 5]) == setitem(0, 5) + assert self.interpret(getslice, [0, 4]) == getslice(0, 4) + assert self.interpret(getslice, [1, 4]) == getslice(1, 4) + assert self.interpret(setslice, [4, 6, 5]) == setslice(4, 6, 5) + + def test_len(self): + class A(object): + def __len__(self): + return 5 + + def fn(): + a = A() + return len(a) + + assert self.interpret(fn, []) == fn() + def test_init_with_star_args(self): class Base(object): def __init__(self, a, b): From noreply at buildbot.pypy.org Mon Jun 2 19:47:06 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 2 Jun 2014 19:47:06 +0200 (CEST) Subject: [pypy-commit] pypy fix-bytearray-complexity: Test new rlib.rstring functionality directly in rpython tests Message-ID: <20140602174706.1FDCA1C0026@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: fix-bytearray-complexity Changeset: r71874:cdb700a29e83 Date: 2014-05-26 15:00 -0500 http://bitbucket.org/pypy/pypy/changeset/cdb700a29e83/ Log: Test new rlib.rstring functionality directly in rpython tests diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py --- a/rpython/rlib/test/test_rstring.py +++ b/rpython/rlib/test/test_rstring.py @@ -2,18 +2,24 @@ from rpython.rlib.rstring import StringBuilder, UnicodeBuilder, split, rsplit from rpython.rlib.rstring import replace, startswith, endswith +from rpython.rlib.rstring import search, SEARCH_FIND, SEARCH_RFIND, SEARCH_COUNT from rpython.rtyper.test.tool import BaseRtypingTest def test_split(): - assert split("", 'x') == [''] - assert split("a", "a", 1) == ['', ''] - assert split(" ", " ", 1) == ['', ''] - assert split("aa", "a", 2) == ['', '', ''] - assert split('a|b|c|d', '|') == ['a', 'b', 'c', 'd'] - assert split('a|b|c|d', '|', 2) == ['a', 'b', 'c|d'] - assert split('a//b//c//d', '//') == ['a', 'b', 'c', 'd'] - assert split('a//b//c//d', '//', 2) == ['a', 'b', 'c//d'] - assert split('endcase test', 'test') == ['endcase ', ''] + def check_split(value, *args, **kwargs): + result = kwargs['res'] + assert split(value, *args) == result + assert split(list(value), *args) == [list(i) for i in result] + + check_split("", 'x', res=['']) + check_split("a", "a", 1, res=['', '']) + check_split(" ", " ", 1, res=['', '']) + check_split("aa", "a", 2, res=['', '', '']) + check_split('a|b|c|d', '|', res=['a', 'b', 'c', 'd']) + check_split('a|b|c|d', '|', 2, res=['a', 'b', 'c|d']) + check_split('a//b//c//d', '//', res=['a', 'b', 'c', 'd']) + check_split('a//b//c//d', '//', 2, res=['a', 'b', 'c//d']) + check_split('endcase test', 'test', res=['endcase ', '']) py.test.raises(ValueError, split, 'abc', '') def test_split_None(): @@ -33,13 +39,18 @@ py.test.raises(ValueError, split, u'abc', u'') def test_rsplit(): - assert rsplit("a", "a", 1) == ['', ''] - assert rsplit(" ", " ", 1) == ['', ''] - assert rsplit("aa", "a", 2) == ['', '', ''] - assert rsplit('a|b|c|d', '|') == ['a', 'b', 'c', 'd'] - assert rsplit('a|b|c|d', '|', 2) == ['a|b', 'c', 'd'] - assert rsplit('a//b//c//d', '//') == ['a', 'b', 'c', 'd'] - assert rsplit('endcase test', 'test') == ['endcase ', ''] + def check_rsplit(value, *args, **kwargs): + result = kwargs['res'] + assert rsplit(value, *args) == result + assert rsplit(list(value), *args) == [list(i) for i in result] + + check_rsplit("a", "a", 1, res=['', '']) + check_rsplit(" ", " ", 1, res=['', '']) + check_rsplit("aa", "a", 2, res=['', '', '']) + check_rsplit('a|b|c|d', '|', res=['a', 'b', 'c', 'd']) + check_rsplit('a|b|c|d', '|', 2, res=['a|b', 'c', 'd']) + check_rsplit('a//b//c//d', '//', res=['a', 'b', 'c', 'd']) + check_rsplit('endcase test', 'test', res=['endcase ', '']) py.test.raises(ValueError, rsplit, "abc", '') def test_rsplit_None(): @@ -58,25 +69,30 @@ py.test.raises(ValueError, rsplit, u"abc", u'') def test_string_replace(): - assert replace('one!two!three!', '!', '@', 1) == 'one at two!three!' - assert replace('one!two!three!', '!', '') == 'onetwothree' - assert replace('one!two!three!', '!', '@', 2) == 'one at two@three!' - assert replace('one!two!three!', '!', '@', 3) == 'one at two@three@' - assert replace('one!two!three!', '!', '@', 4) == 'one at two@three@' - assert replace('one!two!three!', '!', '@', 0) == 'one!two!three!' - assert replace('one!two!three!', '!', '@') == 'one at two@three@' - assert replace('one!two!three!', 'x', '@') == 'one!two!three!' - assert replace('one!two!three!', 'x', '@', 2) == 'one!two!three!' - assert replace('abc', '', '-') == '-a-b-c-' - assert replace('abc', '', '-', 3) == '-a-b-c' - assert replace('abc', '', '-', 0) == 'abc' - assert replace('', '', '') == '' - assert replace('', '', 'a') == 'a' - assert replace('abc', 'ab', '--', 0) == 'abc' - assert replace('abc', 'xy', '--') == 'abc' - assert replace('123', '123', '') == '' - assert replace('123123', '123', '') == '' - assert replace('123x123', '123', '') == 'x' + def check_replace(value, *args, **kwargs): + result = kwargs['res'] + assert replace(value, *args) == result + assert replace(list(value), *args) == list(result) + + check_replace('one!two!three!', '!', '@', 1, res='one at two!three!') + check_replace('one!two!three!', '!', '', res='onetwothree') + check_replace('one!two!three!', '!', '@', 2, res='one at two@three!') + check_replace('one!two!three!', '!', '@', 3, res='one at two@three@') + check_replace('one!two!three!', '!', '@', 4, res='one at two@three@') + check_replace('one!two!three!', '!', '@', 0, res='one!two!three!') + check_replace('one!two!three!', '!', '@', res='one at two@three@') + check_replace('one!two!three!', 'x', '@', res='one!two!three!') + check_replace('one!two!three!', 'x', '@', 2, res='one!two!three!') + check_replace('abc', '', '-', res='-a-b-c-') + check_replace('abc', '', '-', 3, res='-a-b-c') + check_replace('abc', '', '-', 0, res='abc') + check_replace('', '', '', res='') + check_replace('', '', 'a', res='a') + check_replace('abc', 'ab', '--', 0, res='abc') + check_replace('abc', 'xy', '--', res='abc') + check_replace('123', '123', '', res='') + check_replace('123123', '123', '', res='') + check_replace('123x123', '123', '', res='x') def test_string_replace_overflow(): if sys.maxint > 2**31-1: @@ -122,35 +138,45 @@ replace(s, u"a", s, len(s) - 10) def test_startswith(): - assert startswith('ab', 'ab') is True - assert startswith('ab', 'a') is True - assert startswith('ab', '') is True - assert startswith('x', 'a') is False - assert startswith('x', 'x') is True - assert startswith('', '') is True - assert startswith('', 'a') is False - assert startswith('x', 'xx') is False - assert startswith('y', 'xx') is False - assert startswith('ab', 'a', 0) is True - assert startswith('ab', 'a', 1) is False - assert startswith('ab', 'b', 1) is True - assert startswith('abc', 'bc', 1, 2) is False - assert startswith('abc', 'c', -1, 4) is True + def check_startswith(value, sub, *args, **kwargs): + result = kwargs['res'] + assert startswith(value, sub, *args) is result + assert startswith(list(value), sub, *args) is result + + check_startswith('ab', 'ab', res=True) + check_startswith('ab', 'a', res=True) + check_startswith('ab', '', res=True) + check_startswith('x', 'a', res=False) + check_startswith('x', 'x', res=True) + check_startswith('', '', res=True) + check_startswith('', 'a', res=False) + check_startswith('x', 'xx', res=False) + check_startswith('y', 'xx', res=False) + check_startswith('ab', 'a', 0, res=True) + check_startswith('ab', 'a', 1, res=False) + check_startswith('ab', 'b', 1, res=True) + check_startswith('abc', 'bc', 1, 2, res=False) + check_startswith('abc', 'c', -1, 4, res=True) def test_endswith(): - assert endswith('ab', 'ab') is True - assert endswith('ab', 'b') is True - assert endswith('ab', '') is True - assert endswith('x', 'a') is False - assert endswith('x', 'x') is True - assert endswith('', '') is True - assert endswith('', 'a') is False - assert endswith('x', 'xx') is False - assert endswith('y', 'xx') is False - assert endswith('abc', 'ab', 0, 2) is True - assert endswith('abc', 'bc', 1) is True - assert endswith('abc', 'bc', 2) is False - assert endswith('abc', 'b', -3, -1) is True + def check_endswith(value, sub, *args, **kwargs): + result = kwargs['res'] + assert endswith(value, sub, *args) is result + assert endswith(list(value), sub, *args) is result + + check_endswith('ab', 'ab', res=True) + check_endswith('ab', 'b', res=True) + check_endswith('ab', '', res=True) + check_endswith('x', 'a', res=False) + check_endswith('x', 'x', res=True) + check_endswith('', '', res=True) + check_endswith('', 'a', res=False) + check_endswith('x', 'xx', res=False) + check_endswith('y', 'xx', res=False) + check_endswith('abc', 'ab', 0, 2, res=True) + check_endswith('abc', 'bc', 1, res=True) + check_endswith('abc', 'bc', 2, res=False) + check_endswith('abc', 'b', -3, -1, res=True) def test_string_builder(): s = StringBuilder() @@ -172,6 +198,24 @@ assert s.build() == 'aabcbdddd' assert isinstance(s.build(), unicode) +def test_search(): + def check_search(value, sub, *args, **kwargs): + result = kwargs['res'] + assert search(value, sub, *args) == result + assert search(list(value), sub, *args) == result + + check_search('one two three', 'ne', 0, 13, SEARCH_FIND, res=1) + check_search('one two three', 'ne', 5, 13, SEARCH_FIND, res=-1) + check_search('one two three', '', 0, 13, SEARCH_FIND, res=0) + + check_search('one two three', 'e', 0, 13, SEARCH_RFIND, res=12) + check_search('one two three', 'e', 0, 1, SEARCH_RFIND, res=-1) + check_search('one two three', '', 0, 13, SEARCH_RFIND, res=13) + + check_search('one two three', 'e', 0, 13, SEARCH_COUNT, res=3) + check_search('one two three', 'e', 0, 1, SEARCH_COUNT, res=0) + check_search('one two three', '', 0, 13, SEARCH_RFIND, res=13) + class TestTranslates(BaseRtypingTest): def test_split_rsplit(self): From noreply at buildbot.pypy.org Mon Jun 2 19:47:12 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 2 Jun 2014 19:47:12 +0200 (CEST) Subject: [pypy-commit] pypy fix-bytearray-complexity: Use [] and len() for buffers in rpython.rlib.rstring Message-ID: <20140602174712.868151C0026@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: fix-bytearray-complexity Changeset: r71879:7372138b89d1 Date: 2014-06-01 16:26 -0500 http://bitbucket.org/pypy/pypy/changeset/7372138b89d1/ Log: Use [] and len() for buffers in rpython.rlib.rstring diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -18,20 +18,9 @@ @specialize.argtype(0, 1) def _get_access_functions(value, other): - if (isinstance(other, str) or isinstance(other, unicode) or - isinstance(other, list)): - def getitem(obj, i): - return obj[i] - def getlength(obj): - return len(obj) - else: - assert isinstance(other, Buffer) - def getitem(obj, i): - return obj.getitem(i) - def getlength(obj): - return obj.getlength() + if (not (isinstance(value, str) or isinstance(value, unicode)) or + not (isinstance(other, str) or isinstance(other, unicode))): - if isinstance(value, list) or isinstance(other, Buffer): def find(obj, other, start, end): return search(obj, other, start, end, SEARCH_FIND) def rfind(obj, other, start, end): @@ -39,8 +28,8 @@ def count(obj, other, start, end): return search(obj, other, start, end, SEARCH_COUNT) else: - assert isinstance(value, str) or isinstance(value, unicode) - assert isinstance(other, str) or isinstance(other, unicode) + assert isinstance(value, str) or isinstance(value, unicode) + assert isinstance(other, str) or isinstance(other, unicode) def find(obj, other, start, end): return obj.find(other, start, end) def rfind(obj, other, start, end): @@ -48,7 +37,7 @@ def count(obj, other, start, end): return obj.count(other, start, end) - return getitem, getlength, find, rfind, count + return find, rfind, count @specialize.argtype(0) def _isspace(char): @@ -59,7 +48,7 @@ return unicodedb.isspace(ord(char)) - at specialize.argtype(0) + at specialize.argtype(0, 1) def split(value, by=None, maxsplit=-1): if by is None: length = len(value) @@ -90,11 +79,7 @@ i = j + 1 return res - if isinstance(value, list) or isinstance(value, str): - assert isinstance(by, str) - else: - assert isinstance(by, unicode) - _, _, find, _, count = _get_access_functions(value, by) + find, _, count = _get_access_functions(value, by) bylen = len(by) if bylen == 0: raise ValueError("empty separator") @@ -133,7 +118,7 @@ return res - at specialize.argtype(0) + at specialize.argtype(0, 1) def rsplit(value, by=None, maxsplit=-1): if by is None: res = [] @@ -169,15 +154,11 @@ res.reverse() return res - if isinstance(value, list) or isinstance(value, str): - assert isinstance(by, str) - else: - assert isinstance(by, unicode) if maxsplit > 0: res = newlist_hint(min(maxsplit + 1, len(value))) else: res = [] - _, _, _, rfind, _ = _get_access_functions(value, by) + _, rfind, _ = _get_access_functions(value, by) end = len(value) bylen = len(by) if bylen == 0: @@ -196,27 +177,20 @@ return res - at specialize.argtype(0) + at specialize.argtype(0, 1) @jit.elidable def replace(input, sub, by, maxsplit=-1): if isinstance(input, str): - assert isinstance(sub, str) - assert isinstance(by, str) Builder = StringBuilder elif isinstance(input, unicode): - assert isinstance(sub, unicode) - assert isinstance(by, unicode) Builder = UnicodeBuilder else: assert isinstance(input, list) - assert isinstance(sub, str) - assert isinstance(by, str) - # TODO: ???? Builder = ByteListBuilder if maxsplit == 0: return input - _, _, find, _, count = _get_access_functions(input, sub) + find, _, count = _get_access_functions(input, sub) if not sub: upper = len(input) @@ -280,7 +254,7 @@ end = length return start, end - at specialize.argtype(0) + at specialize.argtype(0, 1) @jit.elidable def startswith(u_self, prefix, start=0, end=sys.maxint): length = len(u_self) @@ -293,7 +267,7 @@ return False return True - at specialize.argtype(0) + at specialize.argtype(0, 1) @jit.elidable def endswith(u_self, suffix, start=0, end=sys.maxint): length = len(u_self) @@ -321,7 +295,6 @@ @specialize.argtype(0, 1) def search(value, other, start, end, mode): - getitem, getlength, _, _, _ = _get_access_functions(value, other) if start < 0: start = 0 if end > len(value): @@ -331,7 +304,7 @@ count = 0 n = end - start - m = getlength(other) + m = len(other) if m == 0: if mode == SEARCH_COUNT: @@ -352,17 +325,17 @@ if mode != SEARCH_RFIND: for i in range(mlast): - mask = bloom_add(mask, getitem(other, i)) - if getitem(other, i) == getitem(other, mlast): + mask = bloom_add(mask, other[i]) + if other[i] == other[mlast]: skip = mlast - i - 1 - mask = bloom_add(mask, getitem(other, mlast)) + mask = bloom_add(mask, other[mlast]) i = start - 1 while i + 1 <= start + w: i += 1 - if value[i + m - 1] == getitem(other, m - 1): + if value[i + m - 1] == other[m - 1]: for j in range(mlast): - if value[i + j] != getitem(other, j): + if value[i + j] != other[j]: break else: if mode != SEARCH_COUNT: @@ -387,18 +360,18 @@ if not bloom(mask, c): i += m else: - mask = bloom_add(mask, getitem(other, 0)) + mask = bloom_add(mask, other[0]) for i in range(mlast, 0, -1): - mask = bloom_add(mask, getitem(other, i)) - if getitem(other, i) == getitem(other, 0): + mask = bloom_add(mask, other[i]) + if other[i] == other[0]: skip = i - 1 i = start + w + 1 while i - 1 >= start: i -= 1 - if value[i] == getitem(other, 0): + if value[i] == other[0]: for j in xrange(mlast, 0, -1): - if value[i + j] != getitem(other, j): + if value[i + j] != other[j]: break else: return i diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py --- a/rpython/rlib/test/test_rstring.py +++ b/rpython/rlib/test/test_rstring.py @@ -6,10 +6,17 @@ from rpython.rtyper.test.tool import BaseRtypingTest def test_split(): - def check_split(value, *args, **kwargs): + def check_split(value, sub, *args, **kwargs): result = kwargs['res'] - assert split(value, *args) == result - assert split(list(value), *args) == [list(i) for i in result] + assert split(value, sub, *args) == result + assert split(value, buffer(sub), *args) == result + + list_result = [list(i) for i in result] + assert split(list(value), sub, *args) == list_result + assert split(list(value), buffer(sub), *args) == list_result + + assert split(buffer(value), sub, *args) == result + assert split(buffer(value), buffer(sub), *args) == result check_split("", 'x', res=['']) check_split("a", "a", 1, res=['', '']) @@ -39,10 +46,17 @@ py.test.raises(ValueError, split, u'abc', u'') def test_rsplit(): - def check_rsplit(value, *args, **kwargs): + def check_rsplit(value, sub, *args, **kwargs): result = kwargs['res'] - assert rsplit(value, *args) == result - assert rsplit(list(value), *args) == [list(i) for i in result] + assert rsplit(value, sub, *args) == result + assert rsplit(value, buffer(sub), *args) == result + + list_result = [list(i) for i in result] + assert rsplit(list(value), sub, *args) == list_result + assert rsplit(list(value), buffer(sub), *args) == list_result + + assert rsplit(buffer(value), sub, *args) == result + assert rsplit(buffer(value), buffer(sub), *args) == result check_rsplit("a", "a", 1, res=['', '']) check_rsplit(" ", " ", 1, res=['', '']) @@ -69,10 +83,13 @@ py.test.raises(ValueError, rsplit, u"abc", u'') def test_string_replace(): - def check_replace(value, *args, **kwargs): + def check_replace(value, sub, *args, **kwargs): result = kwargs['res'] - assert replace(value, *args) == result - assert replace(list(value), *args) == list(result) + assert replace(value, sub, *args) == result + assert replace(value, buffer(sub), *args) == result + + assert replace(list(value), sub, *args) == list(result) + assert replace(list(value), buffer(sub), *args) == list(result) check_replace('one!two!three!', '!', '@', 1, res='one at two!three!') check_replace('one!two!three!', '!', '', res='onetwothree') From noreply at buildbot.pypy.org Mon Jun 2 19:47:07 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 2 Jun 2014 19:47:07 +0200 (CEST) Subject: [pypy-commit] pypy fix-bytearray-complexity: Fix rstring stuff Message-ID: <20140602174707.635F41C0026@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: fix-bytearray-complexity Changeset: r71875:3c26784156b6 Date: 2014-05-26 15:01 -0500 http://bitbucket.org/pypy/pypy/changeset/3c26784156b6/ Log: Fix rstring stuff diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -6,7 +6,7 @@ SomeInteger, SomeUnicodeCodePoint, SomeUnicodeString, SomePBC) from rpython.rtyper.llannotation import SomePtr from rpython.rlib import jit -from rpython.rlib.objectmodel import newlist_hint, specialize +from rpython.rlib.objectmodel import newlist_hint, resizelist_hint, specialize from rpython.rlib.rarithmetic import ovfcheck, LONG_BIT as BLOOM_WIDTH from rpython.rlib.buffer import Buffer from rpython.rlib.unicodedata import unicodedb_5_2_0 as unicodedb @@ -18,7 +18,8 @@ @specialize.argtype(0, 1) def _get_access_functions(value, other): - if isinstance(other, (str, unicode, list)): + if (isinstance(other, str) or isinstance(other, unicode) or + isinstance(other, list)): def getitem(obj, i): return obj[i] def getlength(obj): @@ -36,8 +37,8 @@ def rfind(obj, other, start, end): return search(obj, other, start, end, SEARCH_RFIND) else: - assert isinstance(value, (str, unicode)) - assert isinstance(other, (str, unicode)) + assert isinstance(value, str) or isinstance(value, unicode) + assert isinstance(other, str) or isinstance(other, unicode) def find(obj, other, start, end): return obj.find(other, start, end) def rfind(obj, other, start, end): @@ -85,7 +86,7 @@ i = j + 1 return res - if isinstance(value, (list, str)): + if isinstance(value, list) or isinstance(value, str): assert isinstance(by, str) else: assert isinstance(by, unicode) @@ -164,7 +165,7 @@ res.reverse() return res - if isinstance(value, (list, str)): + if isinstance(value, list) or isinstance(value, str): assert isinstance(by, str) else: assert isinstance(by, unicode) @@ -202,11 +203,12 @@ assert isinstance(sub, unicode) assert isinstance(by, unicode) Builder = UnicodeBuilder - elif isinstance(input, list): + else: + assert isinstance(input, list) assert isinstance(sub, str) assert isinstance(by, str) # TODO: ???? - Builder = StringBuilder + Builder = ByteListBuilder if maxsplit == 0: return input @@ -552,6 +554,35 @@ class UnicodeBuilder(AbstractStringBuilder): tp = unicode +class ByteListBuilder(object): + def __init__(self, init_size=INIT_SIZE): + self.l = newlist_hint(init_size) + + @specialize.argtype(1) + def append(self, s): + for c in s: + self.l.append(c) + + @specialize.argtype(1) + def append_slice(self, s, start, end): + assert 0 <= start <= end <= len(s) + for c in s[start:end]: + self.l.append(c) + + def append_multiple_char(self, c, times): + assert isinstance(c, str) + self.l.extend([c] * times) + + def append_charpsize(self, s, size): + assert size >= 0 + for i in xrange(size): + self.l.append(s[i]) + + def build(self): + return self.l + + def getlength(self): + return len(self.l) # ------------------------------------------------------------ # ----------------- implementation details ------------------- From noreply at buildbot.pypy.org Mon Jun 2 19:47:13 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 2 Jun 2014 19:47:13 +0200 (CEST) Subject: [pypy-commit] pypy fix-bytearray-complexity: Buffer.__getlen__ needs an assert >= 0 Message-ID: <20140602174713.C32351C0026@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: fix-bytearray-complexity Changeset: r71880:cfdc8b7369aa Date: 2014-06-02 10:35 -0500 http://bitbucket.org/pypy/pypy/changeset/cfdc8b7369aa/ Log: Buffer.__getlen__ needs an assert >= 0 diff --git a/rpython/rlib/buffer.py b/rpython/rlib/buffer.py --- a/rpython/rlib/buffer.py +++ b/rpython/rlib/buffer.py @@ -13,7 +13,9 @@ raise NotImplementedError def __len__(self): - return self.getlength() + res = self.getlength() + assert res >= 0 + return res def as_str(self): "Returns an interp-level string with the whole content of the buffer." diff --git a/rpython/rlib/test/test_buffer.py b/rpython/rlib/test/test_buffer.py --- a/rpython/rlib/test/test_buffer.py +++ b/rpython/rlib/test/test_buffer.py @@ -1,4 +1,6 @@ from rpython.rlib.buffer import * +from rpython.annotator.annrpython import RPythonAnnotator +from rpython.annotator.model import SomeInteger def test_string_buffer(): @@ -11,3 +13,22 @@ assert buf.getslice(1, 6, 1, 5) == buf[1:6] assert buf.getslice(1, 6, 2, 3) == 'el ' assert buf.as_str() == 'hello world' + + + +def test_len_nonneg(): + # This test needs a buffer subclass whose getlength() isn't guaranteed to + # return a non-neg integer. + class DummyBuffer(Buffer): + def __init__(self, s): + self.size = s + + def getlength(self): + return self.size + def func(n): + buf = DummyBuffer(n) + return len(buf) + + a = RPythonAnnotator() + s = a.build_types(func, [int]) + assert s == SomeInteger(nonneg=True) From noreply at buildbot.pypy.org Mon Jun 2 19:47:08 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 2 Jun 2014 19:47:08 +0200 (CEST) Subject: [pypy-commit] pypy fix-bytearray-complexity: All bytearray tests pass again Message-ID: <20140602174708.A69FB1C0026@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: fix-bytearray-complexity Changeset: r71876:38541e58dc34 Date: 2014-05-26 15:14 -0500 http://bitbucket.org/pypy/pypy/changeset/38541e58dc34/ Log: All bytearray tests pass again diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py --- a/pypy/objspace/std/bytearrayobject.py +++ b/pypy/objspace/std/bytearrayobject.py @@ -3,7 +3,7 @@ from rpython.rlib.objectmodel import ( import_from_mixin, newlist_hint, resizelist_hint, specialize) from rpython.rlib.buffer import Buffer -from rpython.rlib.rstring import StringBuilder +from rpython.rlib.rstring import StringBuilder, ByteListBuilder from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.error import OperationError, oefmt @@ -86,7 +86,7 @@ @staticmethod def _builder(size=100): - return BytearrayBuilder(size) + return ByteListBuilder(size) def _newlist_unwrapped(self, space, res): return space.newlist([W_BytearrayObject(_make_data(i)) for i in res]) @@ -505,24 +505,6 @@ def descr_reverse(self, space): self.data.reverse() -class BytearrayBuilder(object): - def __init__(self, size): - self.data = newlist_hint(size) - - def append(self, s): - for i in range(len(s)): - self.data.append(s[i]) - - def append_multiple_char(self, c, count): - self.data.extend([c] * count) - - def append_slice(self, value, start, end): - for i in range(start, end): - self.data.append(value[i]) - - def build(self): - return self.data - # ____________________________________________________________ diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -157,7 +157,10 @@ _get_encoding_and_errors, decode_object, unicode_from_string) encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors) - if encoding is None and errors is None: + + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + if (encoding is None and errors is None and + not isinstance(self, W_BytearrayObject)): return unicode_from_string(space, self) return decode_object(space, self, encoding, errors) diff --git a/pypy/objspace/std/test/test_bytearrayobject.py b/pypy/objspace/std/test/test_bytearrayobject.py --- a/pypy/objspace/std/test/test_bytearrayobject.py +++ b/pypy/objspace/std/test/test_bytearrayobject.py @@ -442,7 +442,7 @@ u = b.decode('utf-8') assert isinstance(u, unicode) assert u == u'abcdefghi' - assert b.decode() + assert b.decode().encode() == b def test_int(self): assert int(bytearray('-1234')) == -1234 From noreply at buildbot.pypy.org Mon Jun 2 19:47:15 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 2 Jun 2014 19:47:15 +0200 (CEST) Subject: [pypy-commit] pypy fix-bytearray-complexity: Reorganize rlib.rstring a bit; add a test Message-ID: <20140602174715.0CBDF1C0026@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: fix-bytearray-complexity Changeset: r71881:6cbc2ccc583e Date: 2014-06-02 11:40 -0500 http://bitbucket.org/pypy/pypy/changeset/6cbc2ccc583e/ Log: Reorganize rlib.rstring a bit; add a test diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -16,29 +16,6 @@ # -------------- public API for string functions ----------------------- - at specialize.argtype(0, 1) -def _get_access_functions(value, other): - if (not (isinstance(value, str) or isinstance(value, unicode)) or - not (isinstance(other, str) or isinstance(other, unicode))): - - def find(obj, other, start, end): - return search(obj, other, start, end, SEARCH_FIND) - def rfind(obj, other, start, end): - return search(obj, other, start, end, SEARCH_RFIND) - def count(obj, other, start, end): - return search(obj, other, start, end, SEARCH_COUNT) - else: - assert isinstance(value, str) or isinstance(value, unicode) - assert isinstance(other, str) or isinstance(other, unicode) - def find(obj, other, start, end): - return obj.find(other, start, end) - def rfind(obj, other, start, end): - return obj.rfind(other, start, end) - def count(obj, other, start, end): - return obj.count(other, start, end) - - return find, rfind, count - @specialize.argtype(0) def _isspace(char): if isinstance(char, str): @@ -79,7 +56,6 @@ i = j + 1 return res - find, _, count = _get_access_functions(value, by) bylen = len(by) if bylen == 0: raise ValueError("empty separator") @@ -88,16 +64,16 @@ if bylen == 1: # fast path: uses str.rfind(character) and str.count(character) by = by[0] # annotator hack: string -> char - count = count(value, by, 0, len(value)) - if 0 <= maxsplit < count: - count = maxsplit - res = newlist_hint(count + 1) - while count > 0: + cnt = count(value, by, 0, len(value)) + if 0 <= maxsplit < cnt: + cnt = maxsplit + res = newlist_hint(cnt + 1) + while cnt > 0: next = find(value, by, start, len(value)) assert next >= 0 # cannot fail due to the value.count above res.append(value[start:next]) start = next + bylen - count -= 1 + cnt -= 1 res.append(value[start:len(value)]) return res @@ -110,6 +86,7 @@ next = find(value, by, start, len(value)) if next < 0: break + assert start >= 0 res.append(value[start:next]) start = next + bylen maxsplit -= 1 # NB. if it's already < 0, it stays < 0 @@ -158,7 +135,6 @@ res = newlist_hint(min(maxsplit + 1, len(value))) else: res = [] - _, rfind, _ = _get_access_functions(value, by) end = len(value) bylen = len(by) if bylen == 0: @@ -190,7 +166,6 @@ if maxsplit == 0: return input - find, _, count = _get_access_functions(input, sub) if not sub: upper = len(input) @@ -214,12 +189,12 @@ builder.append_slice(input, upper, len(input)) else: # First compute the exact result size - count = count(input, sub, 0, len(input)) - if count > maxsplit and maxsplit > 0: - count = maxsplit + cnt = count(input, sub, 0, len(input)) + if cnt > maxsplit and maxsplit > 0: + cnt = maxsplit diff_len = len(by) - len(sub) try: - result_size = ovfcheck(diff_len * count) + result_size = ovfcheck(diff_len * cnt) result_size = ovfcheck(result_size + len(input)) except OverflowError: raise @@ -280,8 +255,28 @@ return False return True -# Stolen form rpython.rtyper.lltypesytem.rstr -# TODO: Ask about what to do with this... + at specialize.argtype(0, 1) +def find(value, other, start, end): + if ((isinstance(value, str) or isinstance(value, unicode)) and + (isinstance(other, str) or isinstance(other, unicode))): + return value.find(other, start, end) + return _search(value, other, start, end, SEARCH_FIND) + + at specialize.argtype(0, 1) +def rfind(value, other, start, end): + if ((isinstance(value, str) or isinstance(value, unicode)) and + (isinstance(other, str) or isinstance(other, unicode))): + return value.rfind(other, start, end) + return _search(value, other, start, end, SEARCH_RFIND) + + at specialize.argtype(0, 1) +def count(value, other, start, end): + if ((isinstance(value, str) or isinstance(value, unicode)) and + (isinstance(other, str) or isinstance(other, unicode))): + return value.count(other, start, end) + return _search(value, other, start, end, SEARCH_COUNT) + +# -------------- substring searching helper ---------------- SEARCH_COUNT = 0 SEARCH_FIND = 1 @@ -294,7 +289,7 @@ return mask & (1 << (ord(c) & (BLOOM_WIDTH - 1))) @specialize.argtype(0, 1) -def search(value, other, start, end, mode): +def _search(value, other, start, end, mode): if start < 0: start = 0 if end > len(value): diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py --- a/rpython/rlib/test/test_rstring.py +++ b/rpython/rlib/test/test_rstring.py @@ -2,7 +2,8 @@ from rpython.rlib.rstring import StringBuilder, UnicodeBuilder, split, rsplit from rpython.rlib.rstring import replace, startswith, endswith -from rpython.rlib.rstring import search, SEARCH_FIND, SEARCH_RFIND, SEARCH_COUNT +from rpython.rlib.rstring import find, rfind, count +from rpython.rlib.buffer import StringBuffer from rpython.rtyper.test.tool import BaseRtypingTest def test_split(): @@ -216,22 +217,22 @@ assert isinstance(s.build(), unicode) def test_search(): - def check_search(value, sub, *args, **kwargs): + def check_search(func, value, sub, *args, **kwargs): result = kwargs['res'] - assert search(value, sub, *args) == result - assert search(list(value), sub, *args) == result + assert func(value, sub, *args) == result + assert func(list(value), sub, *args) == result - check_search('one two three', 'ne', 0, 13, SEARCH_FIND, res=1) - check_search('one two three', 'ne', 5, 13, SEARCH_FIND, res=-1) - check_search('one two three', '', 0, 13, SEARCH_FIND, res=0) + check_search(find, 'one two three', 'ne', 0, 13, res=1) + check_search(find, 'one two three', 'ne', 5, 13, res=-1) + check_search(find, 'one two three', '', 0, 13, res=0) - check_search('one two three', 'e', 0, 13, SEARCH_RFIND, res=12) - check_search('one two three', 'e', 0, 1, SEARCH_RFIND, res=-1) - check_search('one two three', '', 0, 13, SEARCH_RFIND, res=13) + check_search(rfind, 'one two three', 'e', 0, 13, res=12) + check_search(rfind, 'one two three', 'e', 0, 1, res=-1) + check_search(rfind, 'one two three', '', 0, 13, res=13) - check_search('one two three', 'e', 0, 13, SEARCH_COUNT, res=3) - check_search('one two three', 'e', 0, 1, SEARCH_COUNT, res=0) - check_search('one two three', '', 0, 13, SEARCH_RFIND, res=13) + check_search(count, 'one two three', 'e', 0, 13, res=3) + check_search(count, 'one two three', 'e', 0, 1, res=0) + check_search(count, 'one two three', '', 0, 13, res=14) class TestTranslates(BaseRtypingTest): @@ -252,6 +253,20 @@ res = self.interpret(fn, []) assert res + def test_buffer_parameter(self): + def fn(): + res = True + res = res and split('a//b//c//d', StringBuffer('//')) == ['a', 'b', 'c', 'd'] + res = res and split(u'a//b//c//d', StringBuffer('//')) == ['a', 'b', 'c', 'd'] + res = res and rsplit('a//b//c//d', StringBuffer('//')) == ['a', 'b', 'c', 'd'] + res = res and find('a//b//c//d', StringBuffer('//'), 0, 10) != -1 + res = res and rfind('a//b//c//d', StringBuffer('//'), 0, 10) != -1 + res = res and count('a//b//c//d', StringBuffer('//'), 0, 10) != 0 + return res + res = self.interpret(fn, []) + assert res + + def test_replace(self): def fn(): res = True From noreply at buildbot.pypy.org Mon Jun 2 19:47:16 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 2 Jun 2014 19:47:16 +0200 (CEST) Subject: [pypy-commit] pypy fix-bytearray-complexity: Add some special cases for stringmethods to avoid buffer-overhead Message-ID: <20140602174716.418241C0026@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: fix-bytearray-complexity Changeset: r71882:7f2b803f1319 Date: 2014-06-02 11:41 -0500 http://bitbucket.org/pypy/pypy/changeset/7f2b803f1319/ Log: Add some special cases for stringmethods to avoid buffer-overhead diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py --- a/pypy/objspace/std/bytearrayobject.py +++ b/pypy/objspace/std/bytearrayobject.py @@ -12,6 +12,7 @@ from pypy.objspace.std.sliceobject import W_SliceObject from pypy.objspace.std.stdtypedef import StdTypeDef from pypy.objspace.std.stringmethods import StringMethods, _get_buffer +from pypy.objspace.std.bytesobject import W_BytesObject from pypy.objspace.std.util import get_positive_index NON_HEX_MSG = "non-hexadecimal number found in fromhex() arg at position %d" @@ -43,8 +44,7 @@ return W_BytearrayObject(value) def _new_from_buffer(self, buffer): - length = buffer.getlength() - return W_BytearrayObject([buffer.getitem(i) for i in range(length)]) + return W_BytearrayObject([buffer[i] for i in range(len(buffer))]) def _new_from_list(self, value): return W_BytearrayObject(value) @@ -313,61 +313,52 @@ min_length = min(len(value), buffer_len) return space.newbool(_memcmp(value, buffer, min_length) != 0) + def _comparison_helper(self, space, w_other): + value = self._val(space) + + if isinstance(w_other, W_BytearrayObject): + other = w_other.data + other_len = len(other) + cmp = _memcmp(value, other, min(len(value), len(other))) + elif isinstance(w_other, W_BytesObject): + other = self._op_val(space, w_other) + other_len = len(other) + cmp = _memcmp(value, other, min(len(value), len(other))) + else: + try: + buffer = _get_buffer(space, w_other) + except OperationError as e: + if e.match(space, space.w_TypeError): + return False, 0, 0 + raise + other_len = len(buffer) + cmp = _memcmp(value, buffer, min(len(value), len(buffer))) + + return True, cmp, other_len + def descr_lt(self, space, w_other): - try: - buffer = _get_buffer(space, w_other) - except OperationError as e: - if e.match(space, space.w_TypeError): - return space.w_NotImplemented - raise - - value = self._val(space) - buffer_len = buffer.getlength() - - cmp = _memcmp(value, buffer, min(len(value), buffer_len)) - return space.newbool(cmp < 0 or (cmp == 0 and len(value) < buffer_len)) + success, cmp, other_len = self._comparison_helper(space, w_other) + if not success: + return space.w_NotImplemented + return space.newbool(cmp < 0 or (cmp == 0 and self._len() < other_len)) def descr_le(self, space, w_other): - try: - buffer = _get_buffer(space, w_other) - except OperationError as e: - if e.match(space, space.w_TypeError): - return space.w_NotImplemented - raise - - value = self._val(space) - buffer_len = buffer.getlength() - - cmp = _memcmp(value, buffer, min(len(value), buffer_len)) - return space.newbool(cmp < 0 or (cmp == 0 and len(value) <= buffer_len)) + success, cmp, other_len = self._comparison_helper(space, w_other) + if not success: + return space.w_NotImplemented + return space.newbool(cmp < 0 or (cmp == 0 and self._len() <= other_len)) def descr_gt(self, space, w_other): - try: - buffer = _get_buffer(space, w_other) - except OperationError as e: - if e.match(space, space.w_TypeError): - return space.w_NotImplemented - raise - - value = self._val(space) - buffer_len = buffer.getlength() - - cmp = _memcmp(value, buffer, min(len(value), buffer_len)) - return space.newbool(cmp > 0 or (cmp == 0 and len(value) > buffer_len)) + success, cmp, other_len = self._comparison_helper(space, w_other) + if not success: + return space.w_NotImplemented + return space.newbool(cmp > 0 or (cmp == 0 and self._len() > other_len)) def descr_ge(self, space, w_other): - try: - buffer = _get_buffer(space, w_other) - except OperationError as e: - if e.match(space, space.w_TypeError): - return space.w_NotImplemented - raise - - value = self._val(space) - buffer_len = buffer.getlength() - - cmp = _memcmp(value, buffer, min(len(value), buffer_len)) - return space.newbool(cmp > 0 or (cmp == 0 and len(value) >= buffer_len)) + success, cmp, other_len = self._comparison_helper(space, w_other) + if not success: + return space.w_NotImplemented + return space.newbool(cmp > 0 or (cmp == 0 and self._len() >= other_len)) def descr_iter(self, space): return space.newseqiter(self) @@ -377,11 +368,17 @@ self.data += w_other.data return self - buffer = _get_buffer(space, w_other) - for i in range(buffer.getlength()): - self.data.append(buffer.getitem(i)) + if isinstance(w_other, W_BytesObject): + self._inplace_add(self._op_val(space, w_other)) + else: + self._inplace_add(_get_buffer(space, w_other)) return self + @specialize.argtype(1) + def _inplace_add(self, other): + for i in range(len(other)): + self.data.append(other[i]) + def descr_inplace_mul(self, space, w_times): try: times = space.getindex_w(w_times, space.w_OverflowError) @@ -469,18 +466,20 @@ if isinstance(w_other, W_BytearrayObject): return self._new(self.data + w_other.data) + if isinstance(w_other, W_BytesObject): + return self._add(self._op_val(space, w_other)) + try: buffer = _get_buffer(space, w_other) except OperationError as e: if e.match(space, space.w_TypeError): return space.w_NotImplemented raise + return self._add(buffer) - buffer_len = buffer.getlength() - data = list(self.data + ['\0'] * buffer_len) - for i in range(buffer_len): - data[len(self.data) + i] = buffer.getitem(i) - return self._new(data) + @specialize.argtype(1) + def _add(self, other): + return self._new(self.data + [other[i] for i in range(len(other))]) def descr_reverse(self, space): self.data.reverse() @@ -1232,11 +1231,11 @@ self.data[index] = char - at specialize.argtype(0) + at specialize.argtype(1) def _memcmp(selfvalue, buffer, length): for i in range(length): - if selfvalue[i] < buffer.getitem(i): + if selfvalue[i] < buffer[i]: return -1 - if selfvalue[i] > buffer.getitem(i): + if selfvalue[i] > buffer[i]: return 1 return 0 diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -4,8 +4,7 @@ from rpython.rlib.objectmodel import specialize, newlist_hint from rpython.rlib.rarithmetic import ovfcheck from rpython.rlib.rstring import ( - search, SEARCH_FIND, SEARCH_RFIND, SEARCH_COUNT, endswith, replace, rsplit, - split, startswith) + find, rfind, count, endswith, replace, rsplit, split, startswith) from rpython.rlib.buffer import Buffer from pypy.interpreter.error import OperationError, oefmt @@ -46,8 +45,14 @@ other = self._op_val(space, w_sub) return space.newbool(value.find(other) >= 0) - buffer = _get_buffer(space, w_sub) - res = search(value, buffer, 0, len(value), SEARCH_FIND) + from pypy.objspace.std.bytesobject import W_BytesObject + if isinstance(w_sub, W_BytesObject): + other = self._op_val(space, w_sub) + res = find(value, other, 0, len(value)) + else: + buffer = _get_buffer(space, w_sub) + res = find(value, buffer, 0, len(value)) + return space.newbool(res >= 0) def descr_add(self, space, w_other): @@ -149,8 +154,16 @@ return space.newint(value.count(self._op_val(space, w_sub), start, end)) - buffer = _get_buffer(space, w_sub) - res = search(value, buffer, start, end, SEARCH_COUNT) + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + from pypy.objspace.std.bytesobject import W_BytesObject + if isinstance(w_sub, W_BytearrayObject): + res = count(value, w_sub.data, start, end) + elif isinstance(w_sub, W_BytesObject): + res = count(value, w_sub._value, start, end) + else: + buffer = _get_buffer(space, w_sub) + res = count(value, buffer, start, end) + return space.wrap(max(res, 0)) def descr_decode(self, space, w_encoding=None, w_errors=None): @@ -226,8 +239,16 @@ res = value.find(self._op_val(space, w_sub), start, end) return space.wrap(res) - buffer = _get_buffer(space, w_sub) - res = search(value, buffer, start, end, SEARCH_FIND) + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + from pypy.objspace.std.bytesobject import W_BytesObject + if isinstance(w_sub, W_BytearrayObject): + res = find(value, w_sub.data, start, end) + elif isinstance(w_sub, W_BytesObject): + res = find(value, w_sub._value, start, end) + else: + buffer = _get_buffer(space, w_sub) + res = find(value, buffer, start, end) + return space.wrap(res) def descr_rfind(self, space, w_sub, w_start=None, w_end=None): @@ -237,18 +258,32 @@ res = value.rfind(self._op_val(space, w_sub), start, end) return space.wrap(res) - buffer = _get_buffer(space, w_sub) - res = search(value, buffer, start, end, SEARCH_RFIND) + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + from pypy.objspace.std.bytesobject import W_BytesObject + if isinstance(w_sub, W_BytearrayObject): + res = rfind(value, w_sub.data, start, end) + elif isinstance(w_sub, W_BytesObject): + res = rfind(value, w_sub._value, start, end) + else: + buffer = _get_buffer(space, w_sub) + res = rfind(value, buffer, start, end) + return space.wrap(res) def descr_index(self, space, w_sub, w_start=None, w_end=None): (value, start, end) = self._convert_idx_params(space, w_start, w_end) + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + from pypy.objspace.std.bytesobject import W_BytesObject if self._use_rstr_ops(space, w_sub): res = value.find(self._op_val(space, w_sub), start, end) + elif isinstance(w_sub, W_BytearrayObject): + res = find(value, w_sub.data, start, end) + elif isinstance(w_sub, W_BytesObject): + res = find(value, w_sub._value, start, end) else: buffer = _get_buffer(space, w_sub) - res = search(value, buffer, start, end, SEARCH_FIND) + res = find(value, buffer, start, end) if res < 0: raise oefmt(space.w_ValueError, @@ -258,11 +293,17 @@ def descr_rindex(self, space, w_sub, w_start=None, w_end=None): (value, start, end) = self._convert_idx_params(space, w_start, w_end) + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + from pypy.objspace.std.bytesobject import W_BytesObject if self._use_rstr_ops(space, w_sub): res = value.rfind(self._op_val(space, w_sub), start, end) + elif isinstance(w_sub, W_BytearrayObject): + res = rfind(value, w_sub.data, start, end) + elif isinstance(w_sub, W_BytesObject): + res = rfind(value, w_sub._value, start, end) else: buffer = _get_buffer(space, w_sub) - res = search(value, buffer, start, end, SEARCH_RFIND) + res = rfind(value, buffer, start, end) if res < 0: raise oefmt(space.w_ValueError, @@ -456,7 +497,7 @@ if sublen == 0: raise oefmt(space.w_ValueError, "empty separator") - pos = search(value, sub, 0, len(value), SEARCH_FIND) + pos = find(value, sub, 0, len(value)) if pos != -1 and isinstance(self, W_BytearrayObject): w_sub = self._new_from_buffer(sub) @@ -486,7 +527,7 @@ if sublen == 0: raise oefmt(space.w_ValueError, "empty separator") - pos = search(value, sub, 0, len(value), SEARCH_RFIND) + pos = rfind(value, sub, 0, len(value)) if pos != -1 and isinstance(self, W_BytearrayObject): w_sub = self._new_from_buffer(sub) @@ -502,12 +543,14 @@ @unwrap_spec(count=int) def descr_replace(self, space, w_old, w_new, count=-1): input = self._val(space) + sub = self._op_val(space, w_old) by = self._op_val(space, w_new) try: res = replace(input, sub, by, count) except OverflowError: raise oefmt(space.w_OverflowError, "replace string is too long") + return self._new(res) @unwrap_spec(maxsplit=int) @@ -518,11 +561,17 @@ res = split(value, maxsplit=maxsplit) return self._newlist_unwrapped(space, res) - by = self._op_val(space, w_sep) - bylen = len(by) - if bylen == 0: - raise oefmt(space.w_ValueError, "empty separator") - res = split(value, by, maxsplit) + if self._use_rstr_ops(space, w_sep): + by = self._op_val(space, w_sep) + if len(by) == 0: + raise oefmt(space.w_ValueError, "empty separator") + res = split(value, by, maxsplit) + else: + by = _get_buffer(space, w_sep) + if len(by) == 0: + raise oefmt(space.w_ValueError, "empty separator") + res = split(value, by, maxsplit) + return self._newlist_unwrapped(space, res) @unwrap_spec(maxsplit=int) @@ -533,11 +582,17 @@ res = rsplit(value, maxsplit=maxsplit) return self._newlist_unwrapped(space, res) - by = self._op_val(space, w_sep) - bylen = len(by) - if bylen == 0: - raise oefmt(space.w_ValueError, "empty separator") - res = rsplit(value, by, maxsplit) + if self._use_rstr_ops(space, w_sep): + by = self._op_val(space, w_sep) + if len(by) == 0: + raise oefmt(space.w_ValueError, "empty separator") + res = rsplit(value, by, maxsplit) + else: + by = _get_buffer(space, w_sep) + if len(by) == 0: + raise oefmt(space.w_ValueError, "empty separator") + res = rsplit(value, by, maxsplit) + return self._newlist_unwrapped(space, res) @unwrap_spec(keepends=bool) @@ -574,7 +629,10 @@ end)) def _startswith(self, space, value, w_prefix, start, end): - return startswith(value, self._op_val(space, w_prefix), start, end) + if self._use_rstr_ops(space, w_prefix): + return startswith(value, self._op_val(space, w_prefix), start, end) + else: + return startswith(value, _get_buffer(space, w_prefix), start, end) def descr_endswith(self, space, w_suffix, w_start=None, w_end=None): (value, start, end) = self._convert_idx_params(space, w_start, w_end, @@ -588,7 +646,10 @@ end)) def _endswith(self, space, value, w_prefix, start, end): - return endswith(value, self._op_val(space, w_prefix), start, end) + if self._use_rstr_ops(space, w_prefix): + return endswith(value, self._op_val(space, w_prefix), start, end) + else: + return endswith(value, _get_buffer(space, w_prefix), start, end) def _strip(self, space, w_chars, left, right): "internal function called by str_xstrip methods" From noreply at buildbot.pypy.org Mon Jun 2 22:20:39 2014 From: noreply at buildbot.pypy.org (Hubert Hesse) Date: Mon, 2 Jun 2014 22:20:39 +0200 (CEST) Subject: [pypy-commit] lang-smalltalk stmgc-c7: Make SPyVM compatible with JIT of stmgc-c7. Bump pypy revision to 52395a782909 Message-ID: <20140602202039.D880D1C0026@cobra.cs.uni-duesseldorf.de> Author: Hubert Hesse Branch: stmgc-c7 Changeset: r841:a031083d7acd Date: 2014-06-02 22:13 +0200 http://bitbucket.org/pypy/lang-smalltalk/changeset/a031083d7acd/ Log: Make SPyVM compatible with JIT of stmgc-c7. Bump pypy revision to 52395a782909 diff --git a/spyvm/interpreter.py b/spyvm/interpreter.py --- a/spyvm/interpreter.py +++ b/spyvm/interpreter.py @@ -181,6 +181,9 @@ new_interp.trace_proxy = self.trace_proxy bootstrapper.acquire(new_interp, w_frame, w_stm_process) + + rstm.set_transaction_length(1.0) + rthread.start_new_thread(bootstrapper.bootstrap, ()) def interpret_with_w_frame(self, w_frame): @@ -222,6 +225,17 @@ print "====== Switch from: %s to: %s ======" % (s_new_context.short_str(), p.s_new_context.short_str()) s_new_context = p.s_new_context + def _end_c_loop(self, s_context, pc, method): + if jit.we_are_jitted(): + self.quick_check_for_interrupt(s_context, + dec=self._get_adapted_tick_counter()) + if rstm.jit_stm_should_break_transaction(True): + rstm.jit_stm_transaction_break_point() + self.jit_driver.can_enter_jit( + pc=pc, self=self, method=method, + s_context=s_context) + _end_c_loop._dont_inline_ = True + def c_loop(self, s_context, may_context_switch=True): old_pc = 0 if not jit.we_are_jitted() and may_context_switch: @@ -230,20 +244,13 @@ while True: pc = s_context.pc() if pc < old_pc: - if jit.we_are_jitted(): - self.quick_check_for_interrupt(s_context, - dec=self._get_adapted_tick_counter()) - if rstm.jit_stm_should_break_transaction(True): - rstm.jit_stm_transaction_break_point() - self.jit_driver.can_enter_jit( - pc=pc, self=self, method=method, - s_context=s_context) - old_pc = pc + self._end_c_loop(s_context, pc, method) self.jit_driver.jit_merge_point( pc=pc, self=self, method=method, s_context=s_context) if rstm.jit_stm_should_break_transaction(False): rstm.jit_stm_transaction_break_point() + old_pc = pc try: self.step(s_context) except Return, nlr: From noreply at buildbot.pypy.org Mon Jun 2 22:20:52 2014 From: noreply at buildbot.pypy.org (Hubert Hesse) Date: Mon, 2 Jun 2014 22:20:52 +0200 (CEST) Subject: [pypy-commit] lang-smalltalk stmgc-c7: Add -m benchStmParallelWarmed to Image Message-ID: <20140602202052.082FF1C0026@cobra.cs.uni-duesseldorf.de> Author: Hubert Hesse Branch: stmgc-c7 Changeset: r842:96dea4f9ca40 Date: 2014-06-02 22:14 +0200 http://bitbucket.org/pypy/lang-smalltalk/changeset/96dea4f9ca40/ Log: Add -m benchStmParallelWarmed to Image diff --git a/images/Squeak4.5-12568.changes b/images/Squeak4.5-12568.changes --- a/images/Squeak4.5-12568.changes +++ b/images/Squeak4.5-12568.changes @@ -758,4 +758,4 @@ self fieldNew: swapField. ]. - ^ self field! ! ----QUIT----{22 May 2014 . 3:33:07 pm} Squeak4.5-12568.image priorSource: 93437! ----STARTUP----{22 May 2014 . 3:33:13 pm} as /home/hub/hpi/stm/src/lang-smalltalk/images/Squeak4.5-12568.image! !Integer methodsFor: '*SPy-Benchmarks' stamp: 'hh 5/22/2014 15:33'! gameLifeOfLife STMSimulation benchmark.! ! ----QUIT----{22 May 2014 . 3:34:03 pm} Squeak4.5-12568.image priorSource: 110218! ----STARTUP----{22 May 2014 . 3:34:57 pm} as /home/hub/hpi/stm/src/lang-smalltalk/images/Squeak4.5-12568.image! !Integer methodsFor: '*SPy-Benchmarks' stamp: 'hh 5/22/2014 15:35'! gameOfLife STMSimulation benchmark.! ! Integer removeSelector: #gameLifeOfLife! ----QUIT----{22 May 2014 . 3:35:14 pm} Squeak4.5-12568.image priorSource: 110526! ----STARTUP----{22 May 2014 . 3:36:22 pm} as /home/hub/hpi/stm/src/lang-smalltalk/images/Squeak4.5-12568.image! !Integer methodsFor: '*SPy-Benchmarks' stamp: 'hh 5/22/2014 15:36' prior: 33665224! gameOfLife STMSimulation benchmark2.! ! ----QUIT----{22 May 2014 . 3:36:45 pm} Squeak4.5-12568.image priorSource: 110873! ----STARTUP----{22 May 2014 . 3:36:49 pm} as /home/hub/hpi/stm/src/lang-smalltalk/images/Squeak4.5-12568.image! ----QUIT----{22 May 2014 . 3:36:53 pm} Squeak4.5-12568.image priorSource: 111195! ----STARTUP----{22 May 2014 . 3:36:56 pm} as /home/hub/hpi/stm/src/lang-smalltalk/images/Squeak4.5-12568.image! !Integer methodsFor: '*SPy-Benchmarks' stamp: 'hh 5/22/2014 15:37' prior: 33665587! gameOfLife SPyVM print: STMSimulation benchmark2.! ! ----QUIT----{22 May 2014 . 3:37:32 pm} Squeak4.5-12568.image priorSource: 111392! ----STARTUP----{22 May 2014 . 3:38:15 pm} as /home/hub/hpi/stm/src/lang-smalltalk/images/Squeak4.5-12568.image! ----QUIT----{22 May 2014 . 3:38:35 pm} Squeak4.5-12568.image priorSource: 111727! \ No newline at end of file + ^ self field! ! ----QUIT----{22 May 2014 . 3:33:07 pm} Squeak4.5-12568.image priorSource: 93437! ----STARTUP----{22 May 2014 . 3:33:13 pm} as /home/hub/hpi/stm/src/lang-smalltalk/images/Squeak4.5-12568.image! !Integer methodsFor: '*SPy-Benchmarks' stamp: 'hh 5/22/2014 15:33'! gameLifeOfLife STMSimulation benchmark.! ! ----QUIT----{22 May 2014 . 3:34:03 pm} Squeak4.5-12568.image priorSource: 110218! ----STARTUP----{22 May 2014 . 3:34:57 pm} as /home/hub/hpi/stm/src/lang-smalltalk/images/Squeak4.5-12568.image! !Integer methodsFor: '*SPy-Benchmarks' stamp: 'hh 5/22/2014 15:35'! gameOfLife STMSimulation benchmark.! ! Integer removeSelector: #gameLifeOfLife! ----QUIT----{22 May 2014 . 3:35:14 pm} Squeak4.5-12568.image priorSource: 110526! ----STARTUP----{22 May 2014 . 3:36:22 pm} as /home/hub/hpi/stm/src/lang-smalltalk/images/Squeak4.5-12568.image! !Integer methodsFor: '*SPy-Benchmarks' stamp: 'hh 5/22/2014 15:36' prior: 33665224! gameOfLife STMSimulation benchmark2.! ! ----QUIT----{22 May 2014 . 3:36:45 pm} Squeak4.5-12568.image priorSource: 110873! ----STARTUP----{22 May 2014 . 3:36:49 pm} as /home/hub/hpi/stm/src/lang-smalltalk/images/Squeak4.5-12568.image! ----QUIT----{22 May 2014 . 3:36:53 pm} Squeak4.5-12568.image priorSource: 111195! ----STARTUP----{22 May 2014 . 3:36:56 pm} as /home/hub/hpi/stm/src/lang-smalltalk/images/Squeak4.5-12568.image! !Integer methodsFor: '*SPy-Benchmarks' stamp: 'hh 5/22/2014 15:37' prior: 33665587! gameOfLife SPyVM print: STMSimulation benchmark2.! ! ----QUIT----{22 May 2014 . 3:37:32 pm} Squeak4.5-12568.image priorSource: 111392! ----STARTUP----{22 May 2014 . 3:38:15 pm} as /home/hub/hpi/stm/src/lang-smalltalk/images/Squeak4.5-12568.image! ----QUIT----{22 May 2014 . 3:38:35 pm} Squeak4.5-12568.image priorSource: 111727! ----STARTUP----{2 June 2014 . 12:57:18 pm} as /home/hub/hpi/stm/src/lang-smalltalk/images/Squeak4.5-12568.image! !Integer methodsFor: 'benchmarks' stamp: 'hh 6/2/2014 12:58'! benchStmParallelWarmed 10 timesRepeat: [SPyVM print: (self benchStmParallel)].! ! ----QUIT----{2 June 2014 . 12:58:21 pm} Squeak4.5-12568.image priorSource: 111924! \ No newline at end of file diff --git a/images/Squeak4.5-12568.image b/images/Squeak4.5-12568.image index f3034333df2966ec7364ce561fe2683056b6c8e1..0d24c0b0506552d59e3dd4db7be72f7e565a8d7c GIT binary patch [cut] From noreply at buildbot.pypy.org Mon Jun 2 22:20:53 2014 From: noreply at buildbot.pypy.org (Hubert Hesse) Date: Mon, 2 Jun 2014 22:20:53 +0200 (CEST) Subject: [pypy-commit] lang-smalltalk stmgc-c7: disabling debug prints gives compile speedup of 2x Message-ID: <20140602202053.3C5021C0026@cobra.cs.uni-duesseldorf.de> Author: Hubert Hesse Branch: stmgc-c7 Changeset: r843:9ac17c87da60 Date: 2014-06-02 22:19 +0200 http://bitbucket.org/pypy/lang-smalltalk/changeset/9ac17c87da60/ Log: disabling debug prints gives compile speedup of 2x diff --git a/spyvm/primitives.py b/spyvm/primitives.py --- a/spyvm/primitives.py +++ b/spyvm/primitives.py @@ -1468,7 +1468,7 @@ def func(interp, s_frame, w_rcvr): from rpython.rlib import rstm - print "STM_FORK primitive called" + #print "STM_FORK primitive called" if not isinstance(w_rcvr, model.W_PointersObject): raise PrimitiveFailedError("Fork primitive was not called on an StmProcess") @@ -1479,21 +1479,21 @@ def func(interp, s_frame, w_rcvr): from rpython.rlib import rstm - print "STM_WAIT primitive called" + #print "STM_WAIT primitive called" if not isinstance(w_rcvr, model.W_PointersObject): raise PrimitiveFailedError("Join primitive was not called on an StmProcess") process_shadow = w_rcvr.as_special_get_shadow(interp.space, shadow.StmProcessShadow) process_shadow.join(True) - print "STM Rendezvous" - print "Should break: %s" % rstm.should_break_transaction() + #print "STM Rendezvous" + #print "Should break: %s" % rstm.should_break_transaction() @expose_primitive(STM_ATOMIC_ENTER, unwrap_spec=[object], no_result=True) def func(interp, s_frame, w_rcvr): from rpython.rlib import rstm - print "STM_ATOMIC_ENTER primitive called" + #print "STM_ATOMIC_ENTER primitive called" rstm.increment_atomic() @@ -1501,7 +1501,7 @@ def func(interp, s_frame, w_rcvr): from rpython.rlib import rstm - print "STM_ATOMIC_LEAVE primitive called" + #print "STM_ATOMIC_LEAVE primitive called" rstm.decrement_atomic() diff --git a/spyvm/wrapper.py b/spyvm/wrapper.py --- a/spyvm/wrapper.py +++ b/spyvm/wrapper.py @@ -141,7 +141,7 @@ w_frame = self.suspended_context() assert isinstance(w_frame, model.W_PointersObject) - print "Breaking interpreter loop for forking" + #print "Breaking interpreter loop for forking" raise STMForkException(w_frame, self._w_self) class LinkedListWrapper(Wrapper): From noreply at buildbot.pypy.org Mon Jun 2 23:39:38 2014 From: noreply at buildbot.pypy.org (Hubert Hesse) Date: Mon, 2 Jun 2014 23:39:38 +0200 (CEST) Subject: [pypy-commit] lang-smalltalk stmgc-c7: Remove more print to speed up translation Message-ID: <20140602213938.E51681C0026@cobra.cs.uni-duesseldorf.de> Author: Hubert Hesse Branch: stmgc-c7 Changeset: r844:24cee96c9c06 Date: 2014-06-02 23:39 +0200 http://bitbucket.org/pypy/lang-smalltalk/changeset/24cee96c9c06/ Log: Remove more print to speed up translation diff --git a/spyvm/interpreter.py b/spyvm/interpreter.py --- a/spyvm/interpreter.py +++ b/spyvm/interpreter.py @@ -30,22 +30,24 @@ acquired = mylock.LOCK.acquire(False) if acquired: mylock.a = 2 - print "MY 2:", mylock.a + #print "MY 2:", mylock.a time.sleep(2.5) mylock.LOCK.release() else: - print "MY locked 10:", mylock.a + pass + #print "MY locked 10:", mylock.a def yours_little_thread(): while True: acquired = mylock.LOCK.acquire(False) if acquired: mylock.a = 10 - print "YOURS 10:", mylock.a + #print "YOURS 10:", mylock.a mylock.LOCK.release() time.sleep(4.0) else: - print "YOURS locked 2:", mylock.a + pass + #print "YOURS locked 2:", mylock.a @@ -53,7 +55,7 @@ """Bytecode not implemented yet.""" def __init__(self, bytecodename): self.bytecodename = bytecodename - print "MissingBytecode:", bytecodename # hack for debugging + #print "MissingBytecode:", bytecodename # hack for debugging class IllegalStoreError(Exception): """Illegal Store.""" @@ -102,7 +104,7 @@ release = staticmethod(release) def bootstrap(): - print "New thread reporting" + #print "New thread reporting" interp = bootstrapper.interp w_frame = bootstrapper.w_frame @@ -112,7 +114,7 @@ assert isinstance(w_frame, model.W_PointersObject) assert isinstance(w_stm_process, model.W_PointersObject) bootstrapper.num_threads += 1 - print "Me is started", bootstrapper.num_threads + #print "Me is started", bootstrapper.num_threads bootstrapper.release() interp.interpret_with_w_frame(w_frame) #, may_context_switch=False @@ -262,7 +264,7 @@ else: s_context.push(nlr.value) except STMForkException as fork_exception: - print "Fork requested" + #print "Fork requested" self.fork_interpreter_thread(fork_exception.w_frame, fork_exception.w_stm_process) def _get_adapted_tick_counter(self): From noreply at buildbot.pypy.org Mon Jun 2 23:55:36 2014 From: noreply at buildbot.pypy.org (amauryfa) Date: Mon, 2 Jun 2014 23:55:36 +0200 (CEST) Subject: [pypy-commit] pypy default: The "zero" parameter of lltype.malloc() is checked to be constant. Message-ID: <20140602215536.948641C0109@cobra.cs.uni-duesseldorf.de> Author: Amaury Forgeot d'Arc Branch: Changeset: r71883:2c226c08373f Date: 2014-05-25 18:40 +0200 http://bitbucket.org/pypy/pypy/changeset/2c226c08373f/ Log: The "zero" parameter of lltype.malloc() is checked to be constant. This constraint has to be propagated up to scoped_alloc(), to generate specialized versions. diff --git a/rpython/rtyper/lltypesystem/lltype.py b/rpython/rtyper/lltypesystem/lltype.py --- a/rpython/rtyper/lltypesystem/lltype.py +++ b/rpython/rtyper/lltypesystem/lltype.py @@ -2178,9 +2178,9 @@ def ann_render_immortal(s_p, s_track_allocation=None): assert s_track_allocation is None or s_track_allocation.is_constant() -def _make_scoped_allocator(T): +def _make_scoped_allocator(T, zero): class ScopedAlloc: - def __init__(self, n=None, zero=False): + def __init__(self, n=None): if n is None: self.buf = malloc(T, flavor='raw', zero=zero) else: @@ -2204,8 +2204,8 @@ ...use array... ...it's freed now. """ - return _make_scoped_allocator(T)(n=n, zero=zero) -scoped_alloc._annspecialcase_ = 'specialize:arg(0)' + return _make_scoped_allocator(T, zero)(n=n) +scoped_alloc._annspecialcase_ = 'specialize:arg(0, 2)' def functionptr(TYPE, name, **attrs): if not isinstance(TYPE, FuncType): From noreply at buildbot.pypy.org Mon Jun 2 23:55:38 2014 From: noreply at buildbot.pypy.org (amauryfa) Date: Mon, 2 Jun 2014 23:55:38 +0200 (CEST) Subject: [pypy-commit] pypy default: Add UnicodeData 6.2.0, used by Python 3.3 Message-ID: <20140602215538.C96041C0109@cobra.cs.uni-duesseldorf.de> Author: Amaury Forgeot d'Arc Branch: Changeset: r71884:5b7f8e48e360 Date: 2014-06-02 23:54 +0200 http://bitbucket.org/pypy/pypy/changeset/5b7f8e48e360/ Log: Add UnicodeData 6.2.0, used by Python 3.3 diff too long, truncating to 2000 out of 16022 lines diff --git a/rpython/rlib/unicodedata/CompositionExclusions-6.0.0.txt b/rpython/rlib/unicodedata/CompositionExclusions-6.2.0.txt copy from rpython/rlib/unicodedata/CompositionExclusions-6.0.0.txt copy to rpython/rlib/unicodedata/CompositionExclusions-6.2.0.txt --- a/rpython/rlib/unicodedata/CompositionExclusions-6.0.0.txt +++ b/rpython/rlib/unicodedata/CompositionExclusions-6.2.0.txt @@ -1,5 +1,5 @@ -# CompositionExclusions-6.0.0.txt -# Date: 2010-06-25, 14:34:00 PDT [KW] +# CompositionExclusions-6.2.0.txt +# Date: 2012-05-15, 22:21:00 GMT [KW, LI] # # This file lists the characters for the Composition Exclusion Table # defined in UAX #15, Unicode Normalization Forms. @@ -7,7 +7,7 @@ # This file is a normative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # For more information, see @@ -169,17 +169,16 @@ # FA20 CJK COMPATIBILITY IDEOGRAPH-FA20 # FA22 CJK COMPATIBILITY IDEOGRAPH-FA22 # FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 -# FA2A..FA2D [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D -# FA30..FA6D [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +# FA2A..FA6D [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D # FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 # 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 1033 +# Total code points: 1035 # ================================================ # (4) Non-Starter Decompositions # -# These characters can be derived from the UnicodeData file +# These characters can be derived from the UnicodeData.txt file # by including each expanding canonical decomposition # (i.e., those which canonically decompose to a sequence # of characters instead of a single character), such that: @@ -204,3 +203,4 @@ # Total code points: 4 +# EOF diff --git a/rpython/rlib/unicodedata/DerivedCoreProperties-6.0.0.txt b/rpython/rlib/unicodedata/DerivedCoreProperties-6.2.0.txt copy from rpython/rlib/unicodedata/DerivedCoreProperties-6.0.0.txt copy to rpython/rlib/unicodedata/DerivedCoreProperties-6.2.0.txt --- a/rpython/rlib/unicodedata/DerivedCoreProperties-6.0.0.txt +++ b/rpython/rlib/unicodedata/DerivedCoreProperties-6.2.0.txt @@ -1,8 +1,8 @@ -# DerivedCoreProperties-6.0.0.txt -# Date: 2010-08-19, 00:48:05 GMT [MD] +# DerivedCoreProperties-6.2.0.txt +# Date: 2012-05-20, 00:42:31 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -113,9 +113,7 @@ 27C0..27C4 ; Math # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET 27C5 ; Math # Ps LEFT S-SHAPED BAG DELIMITER 27C6 ; Math # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27CA ; Math # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CC ; Math # Sm LONG DIVISION -27CE..27E5 ; Math # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; Math # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27E6 ; Math # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET 27E7 ; Math # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET 27E8 ; Math # Ps MATHEMATICAL LEFT ANGLE BRACKET @@ -216,8 +214,42 @@ 1D7C3 ; Math # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; Math # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; Math # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; Math # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Math # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Math # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Math # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Math # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Math # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Math # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Math # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Math # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Math # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Math # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Math # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Math # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Math # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Math # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Math # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Math # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Math # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Math # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Math # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Math # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Math # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Math # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Math # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Math # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Math # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Math # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Math # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Math # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Math # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; Math # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 2165 +# Total code points: 2310 # ================================================ @@ -226,9 +258,9 @@ 0041..005A ; Alphabetic # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; Alphabetic # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; Alphabetic # L& FEMININE ORDINAL INDICATOR +00AA ; Alphabetic # Lo FEMININE ORDINAL INDICATOR 00B5 ; Alphabetic # L& MICRO SIGN -00BA ; Alphabetic # L& MASCULINE ORDINAL INDICATOR +00BA ; Alphabetic # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; Alphabetic # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; Alphabetic # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; Alphabetic # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -303,6 +335,10 @@ 0828 ; Alphabetic # Lm SAMARITAN MODIFIER LETTER I 0829..082C ; Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN 0840..0858 ; Alphabetic # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +08A0 ; Alphabetic # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; Alphabetic # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH +08E4..08E9 ; Alphabetic # Mn [6] ARABIC CURLY FATHA..ARABIC CURLY KASRATAN +08F0..08FE ; Alphabetic # Mn [15] ARABIC OPEN FATHATAN..ARABIC DAMMA WITH DOT 0900..0902 ; Alphabetic # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; Alphabetic # Mc DEVANAGARI SIGN VISARGA 0904..0939 ; Alphabetic # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -500,7 +536,7 @@ 0EC0..0EC4 ; Alphabetic # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; Alphabetic # Lm LAO KO LA 0ECD ; Alphabetic # Mn LAO NIGGAHITA -0EDC..0EDD ; Alphabetic # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; Alphabetic # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; Alphabetic # Lo TIBETAN SYLLABLE OM 0F40..0F47 ; Alphabetic # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA 0F49..0F6C ; Alphabetic # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA @@ -538,9 +574,11 @@ 109C ; Alphabetic # Mc MYANMAR VOWEL SIGN AITON A 109D ; Alphabetic # Mn MYANMAR VOWEL SIGN AITON AI 10A0..10C5 ; Alphabetic # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Alphabetic # L& GEORGIAN CAPITAL LETTER YN +10CD ; Alphabetic # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; Alphabetic # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; Alphabetic # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; Alphabetic # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; Alphabetic # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; Alphabetic # Lo ETHIOPIC SYLLABLE QHWA @@ -636,8 +674,9 @@ 1BA2..1BA5 ; Alphabetic # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA6..1BA7 ; Alphabetic # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; Alphabetic # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAC..1BAD ; Alphabetic # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; Alphabetic # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BC0..1BE5 ; Alphabetic # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; Alphabetic # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE7 ; Alphabetic # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; Alphabetic # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BEA..1BEC ; Alphabetic # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O @@ -653,10 +692,11 @@ 1C78..1C7D ; Alphabetic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CE9..1CEC ; Alphabetic # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; Alphabetic # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; Alphabetic # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; Alphabetic # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; Alphabetic # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; Alphabetic # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; Alphabetic # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; Alphabetic # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; Alphabetic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Alphabetic # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; Alphabetic # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; Alphabetic # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; Alphabetic # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -703,12 +743,15 @@ 24B6..24E9 ; Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z 2C00..2C2E ; Alphabetic # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; Alphabetic # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; Alphabetic # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; Alphabetic # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; Alphabetic # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Alphabetic # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; Alphabetic # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; Alphabetic # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Alphabetic # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Alphabetic # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; Alphabetic # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; Alphabetic # L& GEORGIAN SMALL LETTER YN +2D2D ; Alphabetic # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; Alphabetic # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; Alphabetic # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D80..2D96 ; Alphabetic # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO @@ -740,7 +783,7 @@ 31A0..31BA ; Alphabetic # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; Alphabetic # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; Alphabetic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; Alphabetic # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; Alphabetic # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; Alphabetic # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; Alphabetic # Lm YI SYLLABLE WU A016..A48C ; Alphabetic # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -752,8 +795,10 @@ A62A..A62B ; Alphabetic # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO A640..A66D ; Alphabetic # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O A66E ; Alphabetic # Lo CYRILLIC LETTER MULTIOCULAR O +A674..A67B ; Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA A67F ; Alphabetic # Lm CYRILLIC PAYEROK A680..A697 ; Alphabetic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE +A69F ; Alphabetic # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6A0..A6E5 ; Alphabetic # Lo [70] BAMUM LETTER A..BAMUM LETTER KI A6E6..A6EF ; Alphabetic # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM A717..A71F ; Alphabetic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK @@ -762,8 +807,9 @@ A771..A787 ; Alphabetic # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; Alphabetic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; Alphabetic # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; Alphabetic # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; Alphabetic # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; Alphabetic # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; Alphabetic # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; Alphabetic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Alphabetic # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; Alphabetic # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; Alphabetic # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O @@ -820,6 +866,13 @@ AAC2 ; Alphabetic # Lo TAI VIET TONE MAI SONG AADB..AADC ; Alphabetic # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; Alphabetic # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; Alphabetic # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; Alphabetic # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Alphabetic # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF2 ; Alphabetic # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; Alphabetic # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; Alphabetic # Mc MEETEI MAYEK VOWEL SIGN VISARGA AB01..AB06 ; Alphabetic # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; Alphabetic # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; Alphabetic # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -834,8 +887,7 @@ AC00..D7A3 ; Alphabetic # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; Alphabetic # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; Alphabetic # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; Alphabetic # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; Alphabetic # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; Alphabetic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Alphabetic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; Alphabetic # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Alphabetic # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -894,6 +946,8 @@ 1083F..10855 ; Alphabetic # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; Alphabetic # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; Alphabetic # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; Alphabetic # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; Alphabetic # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; Alphabetic # Lo KHAROSHTHI LETTER A 10A01..10A03 ; Alphabetic # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O @@ -916,10 +970,33 @@ 110B0..110B2 ; Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II 110B3..110B6 ; Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110D0..110E8 ; Alphabetic # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11100..11102 ; Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; Alphabetic # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Alphabetic # Mc CHAKMA VOWEL SIGN E +1112D..11132 ; Alphabetic # Mn [6] CHAKMA VOWEL SIGN AI..CHAKMA AU MARK +11180..11181 ; Alphabetic # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Alphabetic # Mc SHARADA SIGN VISARGA +11183..111B2 ; Alphabetic # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; Alphabetic # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Alphabetic # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF ; Alphabetic # Mc SHARADA VOWEL SIGN AU +111C1..111C4 ; Alphabetic # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +11680..116AA ; Alphabetic # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; Alphabetic # Mn TAKRI SIGN ANUSVARA +116AC ; Alphabetic # Mc TAKRI SIGN VISARGA +116AD ; Alphabetic # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 12000..1236E ; Alphabetic # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; Alphabetic # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; Alphabetic # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; Alphabetic # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; Alphabetic # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; Alphabetic # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; Alphabetic # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F93..16F9F ; Alphabetic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; Alphabetic # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D400..1D454 ; Alphabetic # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; Alphabetic # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A @@ -951,12 +1028,45 @@ 1D78A..1D7A8 ; Alphabetic # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7AA..1D7C2 ; Alphabetic # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; Alphabetic # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1EE00..1EE03 ; Alphabetic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Alphabetic # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Alphabetic # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Alphabetic # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Alphabetic # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Alphabetic # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Alphabetic # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Alphabetic # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Alphabetic # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Alphabetic # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Alphabetic # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Alphabetic # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Alphabetic # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Alphabetic # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Alphabetic # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Alphabetic # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Alphabetic # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Alphabetic # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Alphabetic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; Alphabetic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; Alphabetic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; Alphabetic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; Alphabetic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 101539 +# Total code points: 102159 # ================================================ @@ -964,9 +1074,9 @@ # Generated from: Ll + Other_Lowercase 0061..007A ; Lowercase # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; Lowercase # L& FEMININE ORDINAL INDICATOR +00AA ; Lowercase # Lo FEMININE ORDINAL INDICATOR 00B5 ; Lowercase # L& MICRO SIGN -00BA ; Lowercase # L& MASCULINE ORDINAL INDICATOR +00BA ; Lowercase # Lo MASCULINE ORDINAL INDICATOR 00DF..00F6 ; Lowercase # L& [24] LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS 00F8..00FF ; Lowercase # L& [8] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS 0101 ; Lowercase # L& LATIN SMALL LETTER A WITH MACRON @@ -1237,8 +1347,8 @@ 0527 ; Lowercase # L& CYRILLIC SMALL LETTER SHHA WITH DESCENDER 0561..0587 ; Lowercase # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 1D00..1D2B ; Lowercase # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; Lowercase # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; Lowercase # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Lowercase # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; Lowercase # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; Lowercase # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -1386,7 +1496,9 @@ 1FE0..1FE7 ; Lowercase # L& [8] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 1FF2..1FF4 ; Lowercase # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 1FF6..1FF7 ; Lowercase # L& [2] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI -2090..2094 ; Lowercase # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA +2071 ; Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Lowercase # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 210A ; Lowercase # L& SCRIPT SMALL G 210E..210F ; Lowercase # L& [2] PLANCK CONSTANT..PLANCK CONSTANT OVER TWO PI 2113 ; Lowercase # L& SCRIPT SMALL L @@ -1407,8 +1519,8 @@ 2C6C ; Lowercase # L& LATIN SMALL LETTER Z WITH DESCENDER 2C71 ; Lowercase # L& LATIN SMALL LETTER V WITH RIGHT HOOK 2C73..2C74 ; Lowercase # L& [2] LATIN SMALL LETTER W WITH HOOK..LATIN SMALL LETTER V WITH CURL -2C76..2C7C ; Lowercase # L& [7] LATIN SMALL LETTER HALF H..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; Lowercase # Lm MODIFIER LETTER CAPITAL V +2C76..2C7B ; Lowercase # L& [6] LATIN SMALL LETTER HALF H..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C81 ; Lowercase # L& COPTIC SMALL LETTER ALFA 2C83 ; Lowercase # L& COPTIC SMALL LETTER VIDA 2C85 ; Lowercase # L& COPTIC SMALL LETTER GAMMA @@ -1461,7 +1573,10 @@ 2CE3..2CE4 ; Lowercase # L& [2] COPTIC SMALL LETTER OLD NUBIAN WAU..COPTIC SYMBOL KAI 2CEC ; Lowercase # L& COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI 2CEE ; Lowercase # L& COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Lowercase # L& COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Lowercase # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Lowercase # L& GEORGIAN SMALL LETTER YN +2D2D ; Lowercase # L& GEORGIAN SMALL LETTER AEN A641 ; Lowercase # L& CYRILLIC SMALL LETTER ZEMLYA A643 ; Lowercase # L& CYRILLIC SMALL LETTER DZELO A645 ; Lowercase # L& CYRILLIC SMALL LETTER REVERSED DZE @@ -1547,11 +1662,13 @@ A78C ; Lowercase # L& LATIN SMALL LETTER SALTILLO A78E ; Lowercase # L& LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A791 ; Lowercase # L& LATIN SMALL LETTER N WITH DESCENDER +A793 ; Lowercase # L& LATIN SMALL LETTER C WITH BAR A7A1 ; Lowercase # L& LATIN SMALL LETTER G WITH OBLIQUE STROKE A7A3 ; Lowercase # L& LATIN SMALL LETTER K WITH OBLIQUE STROKE A7A5 ; Lowercase # L& LATIN SMALL LETTER N WITH OBLIQUE STROKE A7A7 ; Lowercase # L& LATIN SMALL LETTER R WITH OBLIQUE STROKE A7A9 ; Lowercase # L& LATIN SMALL LETTER S WITH OBLIQUE STROKE +A7F8..A7F9 ; Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Lowercase # L& LATIN LETTER SMALL CAPITAL TURNED M FB00..FB06 ; Lowercase # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Lowercase # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -1586,7 +1703,7 @@ 1D7C4..1D7C9 ; Lowercase # L& [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL 1D7CB ; Lowercase # L& MATHEMATICAL BOLD SMALL DIGAMMA -# Total code points: 1918 +# Total code points: 1934 # ================================================ @@ -1861,6 +1978,8 @@ 0526 ; Uppercase # L& CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER 0531..0556 ; Uppercase # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 10A0..10C5 ; Uppercase # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Uppercase # L& GEORGIAN CAPITAL LETTER YN +10CD ; Uppercase # L& GEORGIAN CAPITAL LETTER AEN 1E00 ; Uppercase # L& LATIN CAPITAL LETTER A WITH RING BELOW 1E02 ; Uppercase # L& LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04 ; Uppercase # L& LATIN CAPITAL LETTER B WITH DOT BELOW @@ -2077,6 +2196,7 @@ 2CE2 ; Uppercase # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB ; Uppercase # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED ; Uppercase # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Uppercase # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI A640 ; Uppercase # L& CYRILLIC CAPITAL LETTER ZEMLYA A642 ; Uppercase # L& CYRILLIC CAPITAL LETTER DZELO A644 ; Uppercase # L& CYRILLIC CAPITAL LETTER REVERSED DZE @@ -2160,11 +2280,13 @@ A78B ; Uppercase # L& LATIN CAPITAL LETTER SALTILLO A78D ; Uppercase # L& LATIN CAPITAL LETTER TURNED H A790 ; Uppercase # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Uppercase # L& LATIN CAPITAL LETTER C WITH BAR A7A0 ; Uppercase # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2 ; Uppercase # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; Uppercase # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; Uppercase # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; Uppercase # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA ; Uppercase # L& LATIN CAPITAL LETTER H WITH HOOK FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Uppercase # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW 1D400..1D419 ; Uppercase # L& [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z @@ -2199,7 +2321,7 @@ 1D790..1D7A8 ; Uppercase # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7CA ; Uppercase # L& MATHEMATICAL BOLD CAPITAL DIGAMMA -# Total code points: 1478 +# Total code points: 1483 # ================================================ @@ -2209,9 +2331,9 @@ 0041..005A ; Cased # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; Cased # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; Cased # L& FEMININE ORDINAL INDICATOR +00AA ; Cased # Lo FEMININE ORDINAL INDICATOR 00B5 ; Cased # L& MICRO SIGN -00BA ; Cased # L& MASCULINE ORDINAL INDICATOR +00BA ; Cased # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; Cased # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; Cased # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; Cased # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -2236,9 +2358,11 @@ 0531..0556 ; Cased # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 0561..0587 ; Cased # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 10A0..10C5 ; Cased # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Cased # L& GEORGIAN CAPITAL LETTER YN +10CD ; Cased # L& GEORGIAN CAPITAL LETTER AEN 1D00..1D2B ; Cased # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; Cased # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; Cased # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; Cased # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Cased # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; Cased # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; Cased # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; Cased # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -2261,7 +2385,9 @@ 1FE0..1FEC ; Cased # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA 1FF2..1FF4 ; Cased # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 1FF6..1FFC ; Cased # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI -2090..2094 ; Cased # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA +2071 ; Cased # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Cased # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Cased # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 2102 ; Cased # L& DOUBLE-STRUCK CAPITAL C 2107 ; Cased # L& EULER CONSTANT 210A..2113 ; Cased # L& [10] SCRIPT SMALL G..SCRIPT SMALL L @@ -2281,19 +2407,23 @@ 24B6..24E9 ; Cased # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z 2C00..2C2E ; Cased # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; Cased # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; Cased # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; Cased # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; Cased # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Cased # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; Cased # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; Cased # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Cased # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Cased # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Cased # L& GEORGIAN SMALL LETTER YN +2D2D ; Cased # L& GEORGIAN SMALL LETTER AEN A640..A66D ; Cased # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O A680..A697 ; Cased # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE A722..A76F ; Cased # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON A770 ; Cased # Lm MODIFIER LETTER US A771..A787 ; Cased # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A78B..A78E ; Cased # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; Cased # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; Cased # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; Cased # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; Cased # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; Cased # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Cased # L& LATIN LETTER SMALL CAPITAL TURNED M FB00..FB06 ; Cased # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Cased # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -2331,7 +2461,7 @@ 1D7AA..1D7C2 ; Cased # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; Cased # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA -# Total code points: 3427 +# Total code points: 3448 # ================================================ @@ -2377,7 +2507,7 @@ 05C4..05C5 ; Case_Ignorable # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT 05C7 ; Case_Ignorable # Mn HEBREW POINT QAMATS QATAN 05F4 ; Case_Ignorable # Po HEBREW PUNCTUATION GERSHAYIM -0600..0603 ; Case_Ignorable # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; Case_Ignorable # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 0610..061A ; Case_Ignorable # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA 0640 ; Case_Ignorable # Lm ARABIC TATWEEL 064B..065F ; Case_Ignorable # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW @@ -2403,6 +2533,7 @@ 0828 ; Case_Ignorable # Lm SAMARITAN MODIFIER LETTER I 0829..082D ; Case_Ignorable # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Case_Ignorable # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; Case_Ignorable # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; Case_Ignorable # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 093A ; Case_Ignorable # Mn DEVANAGARI VOWEL SIGN OE 093C ; Case_Ignorable # Mn DEVANAGARI SIGN NUKTA @@ -2492,7 +2623,7 @@ 1732..1734 ; Case_Ignorable # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; Case_Ignorable # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; Case_Ignorable # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U -17B4..17B5 ; Case_Ignorable # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; Case_Ignorable # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B7..17BD ; Case_Ignorable # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17C6 ; Case_Ignorable # Mn KHMER SIGN NIKAHIT 17C9..17D3 ; Case_Ignorable # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT @@ -2523,6 +2654,7 @@ 1B80..1B81 ; Case_Ignorable # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR 1BA2..1BA5 ; Case_Ignorable # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA8..1BA9 ; Case_Ignorable # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB ; Case_Ignorable # Mn SUNDANESE SIGN VIRAMA 1BE6 ; Case_Ignorable # Mn BATAK SIGN TOMPI 1BE8..1BE9 ; Case_Ignorable # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BED ; Case_Ignorable # Mn BATAK VOWEL SIGN KARO O @@ -2534,7 +2666,8 @@ 1CD4..1CE0 ; Case_Ignorable # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; Case_Ignorable # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Case_Ignorable # Mn VEDIC SIGN TIRYAK -1D2C..1D61 ; Case_Ignorable # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI +1CF4 ; Case_Ignorable # Mn VEDIC TONE CANDRA ABOVE +1D2C..1D6A ; Case_Ignorable # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1D78 ; Case_Ignorable # Lm MODIFIER LETTER CYRILLIC EN 1D9B..1DBF ; Case_Ignorable # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA 1DC0..1DE6 ; Case_Ignorable # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z @@ -2561,14 +2694,14 @@ 20E1 ; Case_Ignorable # Mn COMBINING LEFT RIGHT ARROW ABOVE 20E2..20E4 ; Case_Ignorable # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE 20E5..20F0 ; Case_Ignorable # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE -2C7D ; Case_Ignorable # Lm MODIFIER LETTER CAPITAL V +2C7C..2C7D ; Case_Ignorable # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2CEF..2CF1 ; Case_Ignorable # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D6F ; Case_Ignorable # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D7F ; Case_Ignorable # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; Case_Ignorable # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS 2E2F ; Case_Ignorable # Lm VERTICAL TILDE 3005 ; Case_Ignorable # Lm IDEOGRAPHIC ITERATION MARK -302A..302F ; Case_Ignorable # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; Case_Ignorable # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK 3031..3035 ; Case_Ignorable # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF 303B ; Case_Ignorable # Lm VERTICAL IDEOGRAPHIC ITERATION MARK 3099..309A ; Case_Ignorable # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK @@ -2580,8 +2713,9 @@ A60C ; Case_Ignorable # Lm VAI SYLLABLE LENGTHENER A66F ; Case_Ignorable # Mn COMBINING CYRILLIC VZMET A670..A672 ; Case_Ignorable # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; Case_Ignorable # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; Case_Ignorable # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK A67F ; Case_Ignorable # Lm CYRILLIC PAYEROK +A69F ; Case_Ignorable # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; Case_Ignorable # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A700..A716 ; Case_Ignorable # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR A717..A71F ; Case_Ignorable # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK @@ -2589,6 +2723,7 @@ A770 ; Case_Ignorable # Lm MODIFIER LETTER US A788 ; Case_Ignorable # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; Case_Ignorable # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A7F8..A7F9 ; Case_Ignorable # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A802 ; Case_Ignorable # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; Case_Ignorable # Mn SYLOTI NAGRI SIGN HASANTA A80B ; Case_Ignorable # Mn SYLOTI NAGRI SIGN ANUSVARA @@ -2614,6 +2749,9 @@ AABE..AABF ; Case_Ignorable # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; Case_Ignorable # Mn TAI VIET TONE MAI THO AADD ; Case_Ignorable # Lm TAI VIET SYMBOL SAM +AAEC..AAED ; Case_Ignorable # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF3..AAF4 ; Case_Ignorable # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF6 ; Case_Ignorable # Mn MEETEI MAYEK VIRAMA ABE5 ; Case_Ignorable # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE8 ; Case_Ignorable # Mn MEETEI MAYEK VOWEL SIGN UNAP ABED ; Case_Ignorable # Mn MEETEI MAYEK APUN IYEK @@ -2646,6 +2784,17 @@ 110B3..110B6 ; Case_Ignorable # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B9..110BA ; Case_Ignorable # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA 110BD ; Case_Ignorable # Cf KAITHI NUMBER SIGN +11100..11102 ; Case_Ignorable # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Case_Ignorable # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Case_Ignorable # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; Case_Ignorable # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Case_Ignorable # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +116AB ; Case_Ignorable # Mn TAKRI SIGN ANUSVARA +116AD ; Case_Ignorable # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; Case_Ignorable # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; Case_Ignorable # Mn TAKRI SIGN NUKTA +16F8F..16F92 ; Case_Ignorable # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Case_Ignorable # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1D167..1D169 ; Case_Ignorable # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D173..1D17A ; Case_Ignorable # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE 1D17B..1D182 ; Case_Ignorable # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE @@ -2656,7 +2805,7 @@ E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1692 +# Total code points: 1799 # ================================================ @@ -2932,6 +3081,8 @@ 0526 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER 0531..0556 ; Changes_When_Lowercased # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 10A0..10C5 ; Changes_When_Lowercased # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Changes_When_Lowercased # L& GEORGIAN CAPITAL LETTER YN +10CD ; Changes_When_Lowercased # L& GEORGIAN CAPITAL LETTER AEN 1E00 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH RING BELOW 1E02 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER B WITH DOT BELOW @@ -3141,6 +3292,7 @@ 2CE2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI A640 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ZEMLYA A642 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DZELO A644 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER REVERSED DZE @@ -3224,15 +3376,17 @@ A78B ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SALTILLO A78D ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER TURNED H A790 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER C WITH BAR A7A0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH HOOK FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Changes_When_Lowercased # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW -# Total code points: 1038 +# Total code points: 1043 # ================================================ @@ -3390,7 +3544,7 @@ 025B ; Changes_When_Uppercased # L& LATIN SMALL LETTER OPEN E 0260 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH HOOK 0263 ; Changes_When_Uppercased # L& LATIN SMALL LETTER GAMMA -0265 ; Changes_When_Uppercased # L& LATIN SMALL LETTER TURNED H +0265..0266 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK 0268..0269 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER IOTA 026B ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH MIDDLE TILDE 026F ; Changes_When_Uppercased # L& LATIN SMALL LETTER TURNED M @@ -3731,7 +3885,10 @@ 2CE3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD NUBIAN WAU 2CEC ; Changes_When_Uppercased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI 2CEE ; Changes_When_Uppercased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Changes_When_Uppercased # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Changes_When_Uppercased # L& GEORGIAN SMALL LETTER YN +2D2D ; Changes_When_Uppercased # L& GEORGIAN SMALL LETTER AEN A641 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ZEMLYA A643 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DZELO A645 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER REVERSED DZE @@ -3814,6 +3971,7 @@ A787 ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR T A78C ; Changes_When_Uppercased # L& LATIN SMALL LETTER SALTILLO A791 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH DESCENDER +A793 ; Changes_When_Uppercased # L& LATIN SMALL LETTER C WITH BAR A7A1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH OBLIQUE STROKE A7A3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH OBLIQUE STROKE A7A5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH OBLIQUE STROKE @@ -3824,7 +3982,7 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z 10428..1044F ; Changes_When_Uppercased # L& [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW -# Total code points: 1121 +# Total code points: 1126 # ================================================ @@ -3983,7 +4141,7 @@ 025B ; Changes_When_Titlecased # L& LATIN SMALL LETTER OPEN E 0260 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH HOOK 0263 ; Changes_When_Titlecased # L& LATIN SMALL LETTER GAMMA -0265 ; Changes_When_Titlecased # L& LATIN SMALL LETTER TURNED H +0265..0266 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK 0268..0269 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER IOTA 026B ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH MIDDLE TILDE 026F ; Changes_When_Titlecased # L& LATIN SMALL LETTER TURNED M @@ -4324,7 +4482,10 @@ 2CE3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD NUBIAN WAU 2CEC ; Changes_When_Titlecased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI 2CEE ; Changes_When_Titlecased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Changes_When_Titlecased # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Changes_When_Titlecased # L& GEORGIAN SMALL LETTER YN +2D2D ; Changes_When_Titlecased # L& GEORGIAN SMALL LETTER AEN A641 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ZEMLYA A643 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DZELO A645 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER REVERSED DZE @@ -4407,6 +4568,7 @@ A787 ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR T A78C ; Changes_When_Titlecased # L& LATIN SMALL LETTER SALTILLO A791 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH DESCENDER +A793 ; Changes_When_Titlecased # L& LATIN SMALL LETTER C WITH BAR A7A1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH OBLIQUE STROKE A7A3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH OBLIQUE STROKE A7A5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH OBLIQUE STROKE @@ -4417,7 +4579,7 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z 10428..1044F ; Changes_When_Titlecased # L& [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW -# Total code points: 1094 +# Total code points: 1099 # ================================================ @@ -4700,6 +4862,8 @@ 0531..0556 ; Changes_When_Casefolded # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 0587 ; Changes_When_Casefolded # L& ARMENIAN SMALL LIGATURE ECH YIWN 10A0..10C5 ; Changes_When_Casefolded # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Changes_When_Casefolded # L& GEORGIAN CAPITAL LETTER YN +10CD ; Changes_When_Casefolded # L& GEORGIAN CAPITAL LETTER AEN 1E00 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH RING BELOW 1E02 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER B WITH DOT BELOW @@ -4911,6 +5075,7 @@ 2CE2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI A640 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ZEMLYA A642 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DZELO A644 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER REVERSED DZE @@ -4994,17 +5159,19 @@ A78B ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SALTILLO A78D ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER TURNED H A790 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER C WITH BAR A7A0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH HOOK FB00..FB06 ; Changes_When_Casefolded # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Changes_When_Casefolded # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Changes_When_Casefolded # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW -# Total code points: 1102 +# Total code points: 1107 # ================================================ @@ -5033,7 +5200,7 @@ 025B ; Changes_When_Casemapped # L& LATIN SMALL LETTER OPEN E 0260 ; Changes_When_Casemapped # L& LATIN SMALL LETTER G WITH HOOK 0263 ; Changes_When_Casemapped # L& LATIN SMALL LETTER GAMMA -0265 ; Changes_When_Casemapped # L& LATIN SMALL LETTER TURNED H +0265..0266 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK 0268..0269 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER IOTA 026B ; Changes_When_Casemapped # L& LATIN SMALL LETTER L WITH MIDDLE TILDE 026F ; Changes_When_Casemapped # L& LATIN SMALL LETTER TURNED M @@ -5061,6 +5228,8 @@ 0531..0556 ; Changes_When_Casemapped # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 0561..0587 ; Changes_When_Casemapped # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 10A0..10C5 ; Changes_When_Casemapped # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Changes_When_Casemapped # L& GEORGIAN CAPITAL LETTER YN +10CD ; Changes_When_Casemapped # L& GEORGIAN CAPITAL LETTER AEN 1D79 ; Changes_When_Casemapped # L& LATIN SMALL LETTER INSULAR G 1D7D ; Changes_When_Casemapped # L& LATIN SMALL LETTER P WITH STROKE 1E00..1E9B ; Changes_When_Casemapped # L& [156] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER LONG S WITH DOT ABOVE @@ -5098,22 +5267,25 @@ 2C75..2C76 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER HALF H..LATIN SMALL LETTER HALF H 2C7E..2CE3 ; Changes_When_Casemapped # L& [102] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SMALL LETTER OLD NUBIAN WAU 2CEB..2CEE ; Changes_When_Casemapped # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Changes_When_Casemapped # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Changes_When_Casemapped # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Changes_When_Casemapped # L& GEORGIAN SMALL LETTER YN +2D2D ; Changes_When_Casemapped # L& GEORGIAN SMALL LETTER AEN A640..A66D ; Changes_When_Casemapped # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O A680..A697 ; Changes_When_Casemapped # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE A722..A72F ; Changes_When_Casemapped # L& [14] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CUATRILLO WITH COMMA A732..A76F ; Changes_When_Casemapped # L& [62] LATIN CAPITAL LETTER AA..LATIN SMALL LETTER CON A779..A787 ; Changes_When_Casemapped # L& [15] LATIN CAPITAL LETTER INSULAR D..LATIN SMALL LETTER INSULAR T A78B..A78D ; Changes_When_Casemapped # L& [3] LATIN CAPITAL LETTER SALTILLO..LATIN CAPITAL LETTER TURNED H -A790..A791 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; Changes_When_Casemapped # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; Changes_When_Casemapped # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; Changes_When_Casemapped # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK FB00..FB06 ; Changes_When_Casemapped # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Changes_When_Casemapped # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH FF21..FF3A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z 10400..1044F ; Changes_When_Casemapped # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW -# Total code points: 2128 +# Total code points: 2138 # ================================================ @@ -5128,9 +5300,9 @@ 0041..005A ; ID_Start # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; ID_Start # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; ID_Start # L& FEMININE ORDINAL INDICATOR +00AA ; ID_Start # Lo FEMININE ORDINAL INDICATOR 00B5 ; ID_Start # L& MICRO SIGN -00BA ; ID_Start # L& MASCULINE ORDINAL INDICATOR +00BA ; ID_Start # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; ID_Start # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; ID_Start # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; ID_Start # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -5184,6 +5356,8 @@ 0824 ; ID_Start # Lm SAMARITAN MODIFIER LETTER SHORT A 0828 ; ID_Start # Lm SAMARITAN MODIFIER LETTER I 0840..0858 ; ID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +08A0 ; ID_Start # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; ID_Start # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0904..0939 ; ID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; ID_Start # Lo DEVANAGARI SIGN AVAGRAHA 0950 ; ID_Start # Lo DEVANAGARI OM @@ -5291,7 +5465,7 @@ 0EBD ; ID_Start # Lo LAO SEMIVOWEL SIGN NYO 0EC0..0EC4 ; ID_Start # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; ID_Start # Lm LAO KO LA -0EDC..0EDD ; ID_Start # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; ID_Start # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; ID_Start # Lo TIBETAN SYLLABLE OM 0F40..0F47 ; ID_Start # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA 0F49..0F6C ; ID_Start # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA @@ -5306,9 +5480,11 @@ 1075..1081 ; ID_Start # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA 108E ; ID_Start # Lo MYANMAR LETTER RUMAI PALAUNG FA 10A0..10C5 ; ID_Start # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; ID_Start # L& GEORGIAN CAPITAL LETTER YN +10CD ; ID_Start # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; ID_Start # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; ID_Start # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; ID_Start # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; ID_Start # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; ID_Start # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; ID_Start # Lo ETHIOPIC SYLLABLE QHWA @@ -5358,16 +5534,17 @@ 1B45..1B4B ; ID_Start # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK 1B83..1BA0 ; ID_Start # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BAE..1BAF ; ID_Start # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BC0..1BE5 ; ID_Start # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; ID_Start # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1C00..1C23 ; ID_Start # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C4D..1C4F ; ID_Start # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; ID_Start # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; ID_Start # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CE9..1CEC ; ID_Start # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; ID_Start # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA +1CF5..1CF6 ; ID_Start # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; ID_Start # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; ID_Start # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; ID_Start # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; ID_Start # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; ID_Start # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; ID_Start # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; ID_Start # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; ID_Start # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -5415,12 +5592,15 @@ 2185..2188 ; ID_Start # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND 2C00..2C2E ; ID_Start # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; ID_Start # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; ID_Start # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; ID_Start # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; ID_Start # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; ID_Start # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; ID_Start # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; ID_Start # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; ID_Start # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; ID_Start # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; ID_Start # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; ID_Start # L& GEORGIAN SMALL LETTER YN +2D2D ; ID_Start # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; ID_Start # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; ID_Start # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D80..2D96 ; ID_Start # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO @@ -5451,7 +5631,7 @@ 31A0..31BA ; ID_Start # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; ID_Start # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; ID_Start # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; ID_Start # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; ID_Start # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; ID_Start # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; ID_Start # Lm YI SYLLABLE WU A016..A48C ; ID_Start # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -5473,8 +5653,9 @@ A771..A787 ; ID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; ID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; ID_Start # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; ID_Start # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; ID_Start # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; ID_Start # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; ID_Start # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; ID_Start # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; ID_Start # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; ID_Start # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O @@ -5504,6 +5685,9 @@ AAC2 ; ID_Start # Lo TAI VIET TONE MAI SONG AADB..AADC ; ID_Start # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; ID_Start # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; ID_Start # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; ID_Start # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; ID_Start # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK AB01..AB06 ; ID_Start # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; ID_Start # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; ID_Start # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -5513,8 +5697,7 @@ AC00..D7A3 ; ID_Start # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; ID_Start # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; ID_Start # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; ID_Start # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; ID_Start # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; ID_Start # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; ID_Start # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; ID_Start # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; ID_Start # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -5572,6 +5755,8 @@ 1083F..10855 ; ID_Start # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; ID_Start # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; ID_Start # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; ID_Start # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; ID_Start # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; ID_Start # Lo KHAROSHTHI LETTER A 10A10..10A13 ; ID_Start # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; ID_Start # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA @@ -5583,10 +5768,18 @@ 10C00..10C48 ; ID_Start # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 11003..11037 ; ID_Start # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA 11083..110AF ; ID_Start # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; ID_Start # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; ID_Start # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11183..111B2 ; ID_Start # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; ID_Start # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +11680..116AA ; ID_Start # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 12000..1236E ; ID_Start # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; ID_Start # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; ID_Start # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; ID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; ID_Start # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; ID_Start # Lo MIAO LETTER NASALIZATION +16F93..16F9F ; ID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; ID_Start # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D400..1D454 ; ID_Start # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; ID_Start # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A @@ -5618,12 +5811,45 @@ 1D78A..1D7A8 ; ID_Start # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7AA..1D7C2 ; ID_Start # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; ID_Start # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1EE00..1EE03 ; ID_Start # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; ID_Start # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; ID_Start # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; ID_Start # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; ID_Start # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; ID_Start # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; ID_Start # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; ID_Start # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; ID_Start # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; ID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; ID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; ID_Start # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; ID_Start # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; ID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; ID_Start # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; ID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; ID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; ID_Start # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; ID_Start # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; ID_Start # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; ID_Start # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; ID_Start # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; ID_Start # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; ID_Start # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; ID_Start # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; ID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; ID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 100747 +# Total code points: 101240 # ================================================ @@ -5641,10 +5867,10 @@ 0041..005A ; ID_Continue # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 005F ; ID_Continue # Pc LOW LINE 0061..007A ; ID_Continue # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; ID_Continue # L& FEMININE ORDINAL INDICATOR +00AA ; ID_Continue # Lo FEMININE ORDINAL INDICATOR 00B5 ; ID_Continue # L& MICRO SIGN 00B7 ; ID_Continue # Po MIDDLE DOT -00BA ; ID_Continue # L& MASCULINE ORDINAL INDICATOR +00BA ; ID_Continue # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; ID_Continue # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; ID_Continue # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; ID_Continue # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -5725,6 +5951,9 @@ 0829..082D ; ID_Continue # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0840..0858 ; ID_Continue # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0859..085B ; ID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08A0 ; ID_Continue # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; ID_Continue # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH +08E4..08FE ; ID_Continue # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; ID_Continue # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; ID_Continue # Mc DEVANAGARI SIGN VISARGA 0904..0939 ; ID_Continue # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -5946,7 +6175,7 @@ 0EC6 ; ID_Continue # Lm LAO KO LA 0EC8..0ECD ; ID_Continue # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA 0ED0..0ED9 ; ID_Continue # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDD ; ID_Continue # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; ID_Continue # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; ID_Continue # Lo TIBETAN SYLLABLE OM 0F18..0F19 ; ID_Continue # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F20..0F29 ; ID_Continue # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE @@ -5998,9 +6227,11 @@ 109A..109C ; ID_Continue # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A 109D ; ID_Continue # Mn MYANMAR VOWEL SIGN AITON AI 10A0..10C5 ; ID_Continue # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; ID_Continue # L& GEORGIAN CAPITAL LETTER YN +10CD ; ID_Continue # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; ID_Continue # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; ID_Continue # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; ID_Continue # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; ID_Continue # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; ID_Continue # Lo ETHIOPIC SYLLABLE QHWA @@ -6036,6 +6267,7 @@ 176E..1770 ; ID_Continue # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA 1772..1773 ; ID_Continue # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U 1780..17B3 ; ID_Continue # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B4..17B5 ; ID_Continue # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; ID_Continue # Mc KHMER VOWEL SIGN AA 17B7..17BD ; ID_Continue # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; ID_Continue # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -6114,9 +6346,11 @@ 1BA6..1BA7 ; ID_Continue # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; ID_Continue # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG 1BAA ; ID_Continue # Mc SUNDANESE SIGN PAMAAEH +1BAB ; ID_Continue # Mn SUNDANESE SIGN VIRAMA +1BAC..1BAD ; ID_Continue # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; ID_Continue # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; ID_Continue # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE -1BC0..1BE5 ; ID_Continue # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; ID_Continue # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE6 ; ID_Continue # Mn BATAK SIGN TOMPI 1BE7 ; ID_Continue # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; ID_Continue # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE @@ -6142,10 +6376,12 @@ 1CE9..1CEC ; ID_Continue # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CED ; ID_Continue # Mn VEDIC SIGN TIRYAK 1CEE..1CF1 ; ID_Continue # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; ID_Continue # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; ID_Continue # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; ID_Continue # Mn VEDIC TONE CANDRA ABOVE +1CF5..1CF6 ; ID_Continue # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; ID_Continue # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; ID_Continue # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; ID_Continue # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; ID_Continue # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; ID_Continue # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; ID_Continue # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; ID_Continue # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; ID_Continue # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -6200,13 +6436,16 @@ 2185..2188 ; ID_Continue # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND 2C00..2C2E ; ID_Continue # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; ID_Continue # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; ID_Continue # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; ID_Continue # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; ID_Continue # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; ID_Continue # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; ID_Continue # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; ID_Continue # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA 2CEF..2CF1 ; ID_Continue # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3 ; ID_Continue # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; ID_Continue # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; ID_Continue # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; ID_Continue # L& GEORGIAN SMALL LETTER YN +2D2D ; ID_Continue # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; ID_Continue # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; ID_Continue # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D7F ; ID_Continue # Mn TIFINAGH CONSONANT JOINER 2D80..2D96 ; ID_Continue # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE @@ -6223,7 +6462,8 @@ 3006 ; ID_Continue # Lo IDEOGRAPHIC CLOSING MARK 3007 ; ID_Continue # Nl IDEOGRAPHIC NUMBER ZERO 3021..3029 ; ID_Continue # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE -302A..302F ; ID_Continue # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; ID_Continue # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; ID_Continue # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3031..3035 ; ID_Continue # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF 3038..303A ; ID_Continue # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 303B ; ID_Continue # Lm VERTICAL IDEOGRAPHIC ITERATION MARK @@ -6241,7 +6481,7 @@ 31A0..31BA ; ID_Continue # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; ID_Continue # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; ID_Continue # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; ID_Continue # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; ID_Continue # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; ID_Continue # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; ID_Continue # Lm YI SYLLABLE WU A016..A48C ; ID_Continue # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -6255,9 +6495,10 @@ A640..A66D ; ID_Continue # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O A66E ; ID_Continue # Lo CYRILLIC LETTER MULTIOCULAR O A66F ; ID_Continue # Mn COMBINING CYRILLIC VZMET -A67C..A67D ; ID_Continue # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; ID_Continue # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK A67F ; ID_Continue # Lm CYRILLIC PAYEROK A680..A697 ; ID_Continue # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE +A69F ; ID_Continue # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6A0..A6E5 ; ID_Continue # Lo [70] BAMUM LETTER A..BAMUM LETTER KI A6E6..A6EF ; ID_Continue # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM A6F0..A6F1 ; ID_Continue # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS @@ -6267,8 +6508,9 @@ A771..A787 ; ID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; ID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; ID_Continue # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; ID_Continue # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; ID_Continue # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; ID_Continue # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; ID_Continue # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; ID_Continue # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; ID_Continue # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A802 ; ID_Continue # Mn SYLOTI NAGRI SIGN DVISVARA @@ -6337,6 +6579,14 @@ AAC2 ; ID_Continue # Lo TAI VIET TONE MAI SONG AADB..AADC ; ID_Continue # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; ID_Continue # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; ID_Continue # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; ID_Continue # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; ID_Continue # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; ID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF2 ; ID_Continue # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; ID_Continue # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; ID_Continue # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; ID_Continue # Mn MEETEI MAYEK VIRAMA AB01..AB06 ; ID_Continue # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; ID_Continue # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; ID_Continue # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -6354,8 +6604,7 @@ AC00..D7A3 ; ID_Continue # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; ID_Continue # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; ID_Continue # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; ID_Continue # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; ID_Continue # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; ID_Continue # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; ID_Continue # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; ID_Continue # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; ID_Continue # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -6422,6 +6671,8 @@ 1083F..10855 ; ID_Continue # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; ID_Continue # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; ID_Continue # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; ID_Continue # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; ID_Continue # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; ID_Continue # Lo KHAROSHTHI LETTER A 10A01..10A03 ; ID_Continue # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; ID_Continue # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O @@ -6449,10 +6700,40 @@ 110B3..110B6 ; ID_Continue # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; ID_Continue # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU 110B9..110BA ; ID_Continue # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110D0..110E8 ; ID_Continue # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; ID_Continue # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11100..11102 ; ID_Continue # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; ID_Continue # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; ID_Continue # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; ID_Continue # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; ID_Continue # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F ; ID_Continue # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11180..11181 ; ID_Continue # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; ID_Continue # Mc SHARADA SIGN VISARGA +11183..111B2 ; ID_Continue # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; ID_Continue # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; ID_Continue # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; ID_Continue # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; ID_Continue # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111D0..111D9 ; ID_Continue # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +11680..116AA ; ID_Continue # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; ID_Continue # Mn TAKRI SIGN ANUSVARA +116AC ; ID_Continue # Mc TAKRI SIGN VISARGA +116AD ; ID_Continue # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; ID_Continue # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; ID_Continue # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; ID_Continue # Mc TAKRI SIGN VIRAMA +116B7 ; ID_Continue # Mn TAKRI SIGN NUKTA +116C0..116C9 ; ID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 12000..1236E ; ID_Continue # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; ID_Continue # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; ID_Continue # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; ID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; ID_Continue # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; ID_Continue # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; ID_Continue # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; ID_Continue # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; ID_Continue # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; ID_Continue # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D165..1D166 ; ID_Continue # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D167..1D169 ; ID_Continue # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 @@ -6492,13 +6773,46 @@ 1D7AA..1D7C2 ; ID_Continue # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; ID_Continue # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; ID_Continue # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; ID_Continue # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; ID_Continue # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; ID_Continue # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; ID_Continue # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; ID_Continue # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; ID_Continue # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; ID_Continue # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; ID_Continue # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; ID_Continue # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; ID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; ID_Continue # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; ID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; ID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; ID_Continue # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; ID_Continue # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; ID_Continue # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; ID_Continue # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; ID_Continue # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; ID_Continue # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; ID_Continue # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; ID_Continue # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; ID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; ID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 102675 +# Total code points: 103355 # ================================================ @@ -6511,9 +6825,9 @@ 0041..005A ; XID_Start # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; XID_Start # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; XID_Start # L& FEMININE ORDINAL INDICATOR +00AA ; XID_Start # Lo FEMININE ORDINAL INDICATOR 00B5 ; XID_Start # L& MICRO SIGN -00BA ; XID_Start # L& MASCULINE ORDINAL INDICATOR +00BA ; XID_Start # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; XID_Start # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; XID_Start # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; XID_Start # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -6566,6 +6880,8 @@ 0824 ; XID_Start # Lm SAMARITAN MODIFIER LETTER SHORT A 0828 ; XID_Start # Lm SAMARITAN MODIFIER LETTER I 0840..0858 ; XID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +08A0 ; XID_Start # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; XID_Start # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0904..0939 ; XID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; XID_Start # Lo DEVANAGARI SIGN AVAGRAHA 0950 ; XID_Start # Lo DEVANAGARI OM @@ -6673,7 +6989,7 @@ 0EBD ; XID_Start # Lo LAO SEMIVOWEL SIGN NYO 0EC0..0EC4 ; XID_Start # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; XID_Start # Lm LAO KO LA -0EDC..0EDD ; XID_Start # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; XID_Start # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; XID_Start # Lo TIBETAN SYLLABLE OM 0F40..0F47 ; XID_Start # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA 0F49..0F6C ; XID_Start # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA @@ -6688,9 +7004,11 @@ 1075..1081 ; XID_Start # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA 108E ; XID_Start # Lo MYANMAR LETTER RUMAI PALAUNG FA 10A0..10C5 ; XID_Start # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; XID_Start # L& GEORGIAN CAPITAL LETTER YN +10CD ; XID_Start # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; XID_Start # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; XID_Start # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; XID_Start # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; XID_Start # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; XID_Start # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; XID_Start # Lo ETHIOPIC SYLLABLE QHWA @@ -6740,16 +7058,17 @@ 1B45..1B4B ; XID_Start # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK 1B83..1BA0 ; XID_Start # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BAE..1BAF ; XID_Start # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BC0..1BE5 ; XID_Start # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; XID_Start # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1C00..1C23 ; XID_Start # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C4D..1C4F ; XID_Start # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; XID_Start # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; XID_Start # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CE9..1CEC ; XID_Start # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; XID_Start # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA +1CF5..1CF6 ; XID_Start # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; XID_Start # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; XID_Start # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; XID_Start # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; XID_Start # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; XID_Start # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; XID_Start # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; XID_Start # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; XID_Start # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -6797,12 +7116,15 @@ 2185..2188 ; XID_Start # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND 2C00..2C2E ; XID_Start # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; XID_Start # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; XID_Start # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; XID_Start # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; XID_Start # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; XID_Start # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; XID_Start # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; XID_Start # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; XID_Start # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; XID_Start # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; XID_Start # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; XID_Start # L& GEORGIAN SMALL LETTER YN +2D2D ; XID_Start # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; XID_Start # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; XID_Start # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D80..2D96 ; XID_Start # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO @@ -6832,7 +7154,7 @@ 31A0..31BA ; XID_Start # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; XID_Start # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; XID_Start # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; XID_Start # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; XID_Start # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; XID_Start # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; XID_Start # Lm YI SYLLABLE WU A016..A48C ; XID_Start # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -6854,8 +7176,9 @@ A771..A787 ; XID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; XID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; XID_Start # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; XID_Start # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; XID_Start # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; XID_Start # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; XID_Start # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; XID_Start # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; XID_Start # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; XID_Start # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O @@ -6885,6 +7208,9 @@ AAC2 ; XID_Start # Lo TAI VIET TONE MAI SONG AADB..AADC ; XID_Start # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; XID_Start # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; XID_Start # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; XID_Start # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; XID_Start # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK AB01..AB06 ; XID_Start # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; XID_Start # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; XID_Start # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -6894,8 +7220,7 @@ AC00..D7A3 ; XID_Start # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; XID_Start # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; XID_Start # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; XID_Start # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; XID_Start # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; XID_Start # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; XID_Start # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; XID_Start # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; XID_Start # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -6958,6 +7283,8 @@ 1083F..10855 ; XID_Start # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; XID_Start # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; XID_Start # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; XID_Start # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; XID_Start # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; XID_Start # Lo KHAROSHTHI LETTER A 10A10..10A13 ; XID_Start # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; XID_Start # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA @@ -6969,10 +7296,18 @@ 10C00..10C48 ; XID_Start # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 11003..11037 ; XID_Start # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA 11083..110AF ; XID_Start # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; XID_Start # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; XID_Start # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11183..111B2 ; XID_Start # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; XID_Start # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +11680..116AA ; XID_Start # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 12000..1236E ; XID_Start # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; XID_Start # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; XID_Start # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; XID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; XID_Start # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; XID_Start # Lo MIAO LETTER NASALIZATION +16F93..16F9F ; XID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; XID_Start # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D400..1D454 ; XID_Start # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; XID_Start # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A @@ -7004,19 +7339,51 @@ 1D78A..1D7A8 ; XID_Start # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7AA..1D7C2 ; XID_Start # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; XID_Start # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1EE00..1EE03 ; XID_Start # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; XID_Start # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; XID_Start # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; XID_Start # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; XID_Start # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; XID_Start # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; XID_Start # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; XID_Start # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; XID_Start # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; XID_Start # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; XID_Start # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; XID_Start # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; XID_Start # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; XID_Start # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; XID_Start # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; XID_Start # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; XID_Start # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; XID_Start # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; XID_Start # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; XID_Start # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; XID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; XID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 100724 +# Total code points: 101217 # ================================================ # Derived Property: XID_Continue # Mod_ID_Continue modified for closure under NFKx # Modified as described in UAX #15 -# NOTE: Cf characters should be filtered out. # NOTE: Does NOT remove the non-NFKx characters. # Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string)) # NOTE: See UAX #31 for more information @@ -7025,10 +7392,10 @@ 0041..005A ; XID_Continue # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 005F ; XID_Continue # Pc LOW LINE 0061..007A ; XID_Continue # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; XID_Continue # L& FEMININE ORDINAL INDICATOR +00AA ; XID_Continue # Lo FEMININE ORDINAL INDICATOR 00B5 ; XID_Continue # L& MICRO SIGN 00B7 ; XID_Continue # Po MIDDLE DOT -00BA ; XID_Continue # L& MASCULINE ORDINAL INDICATOR +00BA ; XID_Continue # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; XID_Continue # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; XID_Continue # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; XID_Continue # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -7108,6 +7475,9 @@ 0829..082D ; XID_Continue # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0840..0858 ; XID_Continue # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0859..085B ; XID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08A0 ; XID_Continue # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; XID_Continue # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH +08E4..08FE ; XID_Continue # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; XID_Continue # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; XID_Continue # Mc DEVANAGARI SIGN VISARGA 0904..0939 ; XID_Continue # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -7329,7 +7699,7 @@ 0EC6 ; XID_Continue # Lm LAO KO LA 0EC8..0ECD ; XID_Continue # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA 0ED0..0ED9 ; XID_Continue # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDD ; XID_Continue # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; XID_Continue # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; XID_Continue # Lo TIBETAN SYLLABLE OM 0F18..0F19 ; XID_Continue # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F20..0F29 ; XID_Continue # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE @@ -7381,9 +7751,11 @@ 109A..109C ; XID_Continue # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A 109D ; XID_Continue # Mn MYANMAR VOWEL SIGN AITON AI 10A0..10C5 ; XID_Continue # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; XID_Continue # L& GEORGIAN CAPITAL LETTER YN +10CD ; XID_Continue # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; XID_Continue # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; XID_Continue # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; XID_Continue # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; XID_Continue # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; XID_Continue # Lo ETHIOPIC SYLLABLE QHWA @@ -7419,6 +7791,7 @@ 176E..1770 ; XID_Continue # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA 1772..1773 ; XID_Continue # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U 1780..17B3 ; XID_Continue # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B4..17B5 ; XID_Continue # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; XID_Continue # Mc KHMER VOWEL SIGN AA 17B7..17BD ; XID_Continue # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; XID_Continue # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -7497,9 +7870,11 @@ 1BA6..1BA7 ; XID_Continue # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; XID_Continue # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG 1BAA ; XID_Continue # Mc SUNDANESE SIGN PAMAAEH +1BAB ; XID_Continue # Mn SUNDANESE SIGN VIRAMA +1BAC..1BAD ; XID_Continue # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; XID_Continue # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; XID_Continue # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE -1BC0..1BE5 ; XID_Continue # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; XID_Continue # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE6 ; XID_Continue # Mn BATAK SIGN TOMPI 1BE7 ; XID_Continue # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; XID_Continue # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE @@ -7525,10 +7900,12 @@ 1CE9..1CEC ; XID_Continue # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CED ; XID_Continue # Mn VEDIC SIGN TIRYAK 1CEE..1CF1 ; XID_Continue # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; XID_Continue # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; XID_Continue # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; XID_Continue # Mn VEDIC TONE CANDRA ABOVE +1CF5..1CF6 ; XID_Continue # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; XID_Continue # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; XID_Continue # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; XID_Continue # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; XID_Continue # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; XID_Continue # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; XID_Continue # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; XID_Continue # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; XID_Continue # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -7583,13 +7960,16 @@ 2185..2188 ; XID_Continue # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND 2C00..2C2E ; XID_Continue # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; XID_Continue # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; XID_Continue # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; XID_Continue # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; XID_Continue # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; XID_Continue # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; XID_Continue # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; XID_Continue # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA 2CEF..2CF1 ; XID_Continue # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3 ; XID_Continue # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; XID_Continue # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; XID_Continue # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; XID_Continue # L& GEORGIAN SMALL LETTER YN +2D2D ; XID_Continue # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; XID_Continue # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; XID_Continue # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D7F ; XID_Continue # Mn TIFINAGH CONSONANT JOINER 2D80..2D96 ; XID_Continue # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE @@ -7606,7 +7986,8 @@ 3006 ; XID_Continue # Lo IDEOGRAPHIC CLOSING MARK 3007 ; XID_Continue # Nl IDEOGRAPHIC NUMBER ZERO 3021..3029 ; XID_Continue # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE -302A..302F ; XID_Continue # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; XID_Continue # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; XID_Continue # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3031..3035 ; XID_Continue # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF 3038..303A ; XID_Continue # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 303B ; XID_Continue # Lm VERTICAL IDEOGRAPHIC ITERATION MARK @@ -7623,7 +8004,7 @@ 31A0..31BA ; XID_Continue # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; XID_Continue # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; XID_Continue # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; XID_Continue # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; XID_Continue # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; XID_Continue # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; XID_Continue # Lm YI SYLLABLE WU A016..A48C ; XID_Continue # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -7637,9 +8018,10 @@ A640..A66D ; XID_Continue # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O A66E ; XID_Continue # Lo CYRILLIC LETTER MULTIOCULAR O A66F ; XID_Continue # Mn COMBINING CYRILLIC VZMET -A67C..A67D ; XID_Continue # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; XID_Continue # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK A67F ; XID_Continue # Lm CYRILLIC PAYEROK A680..A697 ; XID_Continue # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE +A69F ; XID_Continue # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6A0..A6E5 ; XID_Continue # Lo [70] BAMUM LETTER A..BAMUM LETTER KI A6E6..A6EF ; XID_Continue # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM A6F0..A6F1 ; XID_Continue # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS @@ -7649,8 +8031,9 @@ A771..A787 ; XID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; XID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; XID_Continue # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; XID_Continue # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; XID_Continue # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; XID_Continue # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; XID_Continue # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; XID_Continue # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; XID_Continue # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A802 ; XID_Continue # Mn SYLOTI NAGRI SIGN DVISVARA @@ -7719,6 +8102,14 @@ AAC2 ; XID_Continue # Lo TAI VIET TONE MAI SONG AADB..AADC ; XID_Continue # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; XID_Continue # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; XID_Continue # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; XID_Continue # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; XID_Continue # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; XID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF2 ; XID_Continue # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; XID_Continue # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; XID_Continue # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; XID_Continue # Mn MEETEI MAYEK VIRAMA AB01..AB06 ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -7736,8 +8127,7 @@ AC00..D7A3 ; XID_Continue # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; XID_Continue # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; XID_Continue # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; XID_Continue # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; XID_Continue # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; XID_Continue # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; XID_Continue # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; XID_Continue # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; XID_Continue # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -7810,6 +8200,8 @@ 1083F..10855 ; XID_Continue # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; XID_Continue # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; XID_Continue # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; XID_Continue # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; XID_Continue # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; XID_Continue # Lo KHAROSHTHI LETTER A 10A01..10A03 ; XID_Continue # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; XID_Continue # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O @@ -7837,10 +8229,40 @@ 110B3..110B6 ; XID_Continue # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; XID_Continue # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU 110B9..110BA ; XID_Continue # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110D0..110E8 ; XID_Continue # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; XID_Continue # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11100..11102 ; XID_Continue # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; XID_Continue # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; XID_Continue # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; XID_Continue # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; XID_Continue # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F ; XID_Continue # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11180..11181 ; XID_Continue # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; XID_Continue # Mc SHARADA SIGN VISARGA +11183..111B2 ; XID_Continue # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; XID_Continue # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; XID_Continue # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; XID_Continue # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; XID_Continue # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111D0..111D9 ; XID_Continue # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +11680..116AA ; XID_Continue # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; XID_Continue # Mn TAKRI SIGN ANUSVARA +116AC ; XID_Continue # Mc TAKRI SIGN VISARGA +116AD ; XID_Continue # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; XID_Continue # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; XID_Continue # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; XID_Continue # Mc TAKRI SIGN VIRAMA +116B7 ; XID_Continue # Mn TAKRI SIGN NUKTA +116C0..116C9 ; XID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 12000..1236E ; XID_Continue # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; XID_Continue # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; XID_Continue # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; XID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; XID_Continue # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; XID_Continue # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; XID_Continue # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; XID_Continue # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; XID_Continue # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; XID_Continue # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D165..1D166 ; XID_Continue # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D167..1D169 ; XID_Continue # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 @@ -7880,13 +8302,46 @@ 1D7AA..1D7C2 ; XID_Continue # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; XID_Continue # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; XID_Continue # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; XID_Continue # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; XID_Continue # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; XID_Continue # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; XID_Continue # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; XID_Continue # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; XID_Continue # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; XID_Continue # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; XID_Continue # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; XID_Continue # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; XID_Continue # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; XID_Continue # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; XID_Continue # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; XID_Continue # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; XID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; XID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 102656 +# Total code points: 103336 # ================================================ @@ -7897,12 +8352,12 @@ # + Variation_Selector # - White_Space # - FFF9..FFFB (Annotation Characters) -# - 0600..0603, 06DD, 070F, 110BD (exceptional Cf characters that should be visible) +# - 0600..0604, 06DD, 070F, 110BD (exceptional Cf characters that should be visible) 00AD ; Default_Ignorable_Code_Point # Cf SOFT HYPHEN 034F ; Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER 115F..1160 ; Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER -17B4..17B5 ; Default_Ignorable_Code_Point # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; Default_Ignorable_Code_Point # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 180B..180D ; Default_Ignorable_Code_Point # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE 200B..200F ; Default_Ignorable_Code_Point # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK 202A..202E ; Default_Ignorable_Code_Point # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE @@ -7956,6 +8411,7 @@ 0825..0827 ; Grapheme_Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Grapheme_Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Grapheme_Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; Grapheme_Extend # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; Grapheme_Extend # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 093A ; Grapheme_Extend # Mn DEVANAGARI VOWEL SIGN OE 093C ; Grapheme_Extend # Mn DEVANAGARI SIGN NUKTA @@ -8053,6 +8509,7 @@ 1732..1734 ; Grapheme_Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; Grapheme_Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; Grapheme_Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Grapheme_Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B7..17BD ; Grapheme_Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17C6 ; Grapheme_Extend # Mn KHMER SIGN NIKAHIT 17C9..17D3 ; Grapheme_Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT @@ -8080,6 +8537,7 @@ 1B80..1B81 ; Grapheme_Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR 1BA2..1BA5 ; Grapheme_Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA8..1BA9 ; Grapheme_Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB ; Grapheme_Extend # Mn SUNDANESE SIGN VIRAMA 1BE6 ; Grapheme_Extend # Mn BATAK SIGN TOMPI 1BE8..1BE9 ; Grapheme_Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BED ; Grapheme_Extend # Mn BATAK VOWEL SIGN KARO O @@ -8090,6 +8548,7 @@ 1CD4..1CE0 ; Grapheme_Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; Grapheme_Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Grapheme_Extend # Mn VEDIC SIGN TIRYAK +1CF4 ; Grapheme_Extend # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; Grapheme_Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; Grapheme_Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200D ; Grapheme_Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER @@ -8101,11 +8560,13 @@ 2CEF..2CF1 ; Grapheme_Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D7F ; Grapheme_Extend # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; Grapheme_Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; Grapheme_Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; Grapheme_Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3099..309A ; Grapheme_Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A66F ; Grapheme_Extend # Mn COMBINING CYRILLIC VZMET A670..A672 ; Grapheme_Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; Grapheme_Extend # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; Grapheme_Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; Grapheme_Extend # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; Grapheme_Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A802 ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN HASANTA @@ -8129,6 +8590,8 @@ AAB7..AAB8 ; Grapheme_Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE..AABF ; Grapheme_Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; Grapheme_Extend # Mn TAI VIET TONE MAI THO +AAEC..AAED ; Grapheme_Extend # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; Grapheme_Extend # Mn MEETEI MAYEK VIRAMA ABE5 ; Grapheme_Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE8 ; Grapheme_Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP ABED ; Grapheme_Extend # Mn MEETEI MAYEK APUN IYEK @@ -8147,6 +8610,16 @@ 11080..11081 ; Grapheme_Extend # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA 110B3..110B6 ; Grapheme_Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B9..110BA ; Grapheme_Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11100..11102 ; Grapheme_Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Grapheme_Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Grapheme_Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; Grapheme_Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Grapheme_Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +116AB ; Grapheme_Extend # Mn TAKRI SIGN ANUSVARA +116AD ; Grapheme_Extend # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; Grapheme_Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; Grapheme_Extend # Mn TAKRI SIGN NUKTA +16F8F..16F92 ; Grapheme_Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D165 ; Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM 1D167..1D169 ; Grapheme_Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16E..1D172 ; Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 @@ -8156,7 +8629,7 @@ 1D242..1D244 ; Grapheme_Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1234 +# Total code points: 1317 # ================================================ @@ -8195,10 +8668,11 @@ 00A0 ; Grapheme_Base # Zs NO-BREAK SPACE 00A1 ; Grapheme_Base # Po INVERTED EXCLAMATION MARK 00A2..00A5 ; Grapheme_Base # Sc [4] CENT SIGN..YEN SIGN -00A6..00A7 ; Grapheme_Base # So [2] BROKEN BAR..SECTION SIGN +00A6 ; Grapheme_Base # So BROKEN BAR +00A7 ; Grapheme_Base # Po SECTION SIGN 00A8 ; Grapheme_Base # Sk DIAERESIS 00A9 ; Grapheme_Base # So COPYRIGHT SIGN -00AA ; Grapheme_Base # L& FEMININE ORDINAL INDICATOR +00AA ; Grapheme_Base # Lo FEMININE ORDINAL INDICATOR 00AB ; Grapheme_Base # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 00AC ; Grapheme_Base # Sm NOT SIGN 00AE ; Grapheme_Base # So REGISTERED SIGN @@ -8208,11 +8682,10 @@ 00B2..00B3 ; Grapheme_Base # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE 00B4 ; Grapheme_Base # Sk ACUTE ACCENT 00B5 ; Grapheme_Base # L& MICRO SIGN -00B6 ; Grapheme_Base # So PILCROW SIGN -00B7 ; Grapheme_Base # Po MIDDLE DOT +00B6..00B7 ; Grapheme_Base # Po [2] PILCROW SIGN..MIDDLE DOT 00B8 ; Grapheme_Base # Sk CEDILLA 00B9 ; Grapheme_Base # No SUPERSCRIPT ONE -00BA ; Grapheme_Base # L& MASCULINE ORDINAL INDICATOR +00BA ; Grapheme_Base # Lo MASCULINE ORDINAL INDICATOR 00BB ; Grapheme_Base # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 00BC..00BE ; Grapheme_Base # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS 00BF ; Grapheme_Base # Po INVERTED QUESTION MARK @@ -8261,6 +8734,7 @@ 0561..0587 ; Grapheme_Base # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 0589 ; Grapheme_Base # Po ARMENIAN FULL STOP 058A ; Grapheme_Base # Pd ARMENIAN HYPHEN +058F ; Grapheme_Base # Sc ARMENIAN DRAM SIGN 05BE ; Grapheme_Base # Pd HEBREW PUNCTUATION MAQAF 05C0 ; Grapheme_Base # Po HEBREW PUNCTUATION PASEQ 05C3 ; Grapheme_Base # Po HEBREW PUNCTUATION SOF PASUQ @@ -8310,6 +8784,8 @@ 0830..083E ; Grapheme_Base # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU 0840..0858 ; Grapheme_Base # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 085E ; Grapheme_Base # Po MANDAIC PUNCTUATION +08A0 ; Grapheme_Base # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; Grapheme_Base # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0903 ; Grapheme_Base # Mc DEVANAGARI SIGN VISARGA 0904..0939 ; Grapheme_Base # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093B ; Grapheme_Base # Mc DEVANAGARI VOWEL SIGN OOE @@ -8372,6 +8848,7 @@ 0AD0 ; Grapheme_Base # Lo GUJARATI OM 0AE0..0AE1 ; Grapheme_Base # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL 0AE6..0AEF ; Grapheme_Base # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; Grapheme_Base # Po GUJARATI ABBREVIATION SIGN 0AF1 ; Grapheme_Base # Sc GUJARATI RUPEE SIGN 0B02..0B03 ; Grapheme_Base # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA 0B05..0B0C ; Grapheme_Base # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L @@ -8488,11 +8965,13 @@ 0EC0..0EC4 ; Grapheme_Base # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; Grapheme_Base # Lm LAO KO LA 0ED0..0ED9 ; Grapheme_Base # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDD ; Grapheme_Base # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; Grapheme_Base # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; Grapheme_Base # Lo TIBETAN SYLLABLE OM 0F01..0F03 ; Grapheme_Base # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA 0F04..0F12 ; Grapheme_Base # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD -0F13..0F17 ; Grapheme_Base # So [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F13 ; Grapheme_Base # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; Grapheme_Base # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; Grapheme_Base # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS 0F1A..0F1F ; Grapheme_Base # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG 0F20..0F29 ; Grapheme_Base # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE 0F2A..0F33 ; Grapheme_Base # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO @@ -8540,10 +9019,12 @@ 109A..109C ; Grapheme_Base # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A 109E..109F ; Grapheme_Base # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION 10A0..10C5 ; Grapheme_Base # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Grapheme_Base # L& GEORGIAN CAPITAL LETTER YN +10CD ; Grapheme_Base # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; Grapheme_Base # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FB ; Grapheme_Base # Po GEORGIAN PARAGRAPH SEPARATOR 10FC ; Grapheme_Base # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; Grapheme_Base # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; Grapheme_Base # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; Grapheme_Base # Lo ETHIOPIC SYLLABLE QHWA @@ -8559,8 +9040,7 @@ 12D8..1310 ; Grapheme_Base # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA 1312..1315 ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 1318..135A ; Grapheme_Base # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA -1360 ; Grapheme_Base # So ETHIOPIC SECTION MARK -1361..1368 ; Grapheme_Base # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +1360..1368 ; Grapheme_Base # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR 1369..137C ; Grapheme_Base # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND 1380..138F ; Grapheme_Base # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE 1390..1399 ; Grapheme_Base # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT @@ -8652,9 +9132,10 @@ 1BA1 ; Grapheme_Base # Mc SUNDANESE CONSONANT SIGN PAMINGKAL 1BA6..1BA7 ; Grapheme_Base # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BAA ; Grapheme_Base # Mc SUNDANESE SIGN PAMAAEH +1BAC..1BAD ; Grapheme_Base # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; Grapheme_Base # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; Grapheme_Base # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE From noreply at buildbot.pypy.org Tue Jun 3 00:54:13 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Tue, 3 Jun 2014 00:54:13 +0200 (CEST) Subject: [pypy-commit] pypy py3k: kill windows' popen funcs Message-ID: <20140602225413.E46A11C015E@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: py3k Changeset: r71885:9c3738d44368 Date: 2014-06-02 15:53 -0700 http://bitbucket.org/pypy/pypy/changeset/9c3738d44368/ Log: kill windows' popen funcs diff --git a/pypy/module/posix/__init__.py b/pypy/module/posix/__init__.py --- a/pypy/module/posix/__init__.py +++ b/pypy/module/posix/__init__.py @@ -21,9 +21,6 @@ if os.name == 'nt': del appleveldefs['urandom'] # at interp on win32 appleveldefs.update({ - 'popen2': 'app_posix.popen2', - 'popen3': 'app_posix.popen3', - 'popen4': 'app_posix.popen4', 'startfile': 'app_startfile.startfile', }) diff --git a/pypy/module/posix/app_posix.py b/pypy/module/posix/app_posix.py --- a/pypy/module/posix/app_posix.py +++ b/pypy/module/posix/app_posix.py @@ -122,86 +122,3 @@ return fd.read(n) except (OSError, IOError): raise NotImplementedError("/dev/urandom (or equivalent) not found") - - -else: - # Windows implementations - - def popen2(cmd, mode="t", bufsize=-1): - "" - - cmd = _makecmd_string(cmd) - - if mode not in ('b', 't'): - raise ValueError("invalid mode %r" % (mode,)) - - import subprocess - p = subprocess.Popen(cmd, shell=True, bufsize=bufsize, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - universal_newlines=(mode =='t')) - return (_wrap_close(p.stdin, p), _wrap_close(p.stdout, p)) - - def popen3(cmd, mode="t", bufsize=-1): - "" - - cmd = _makecmd_string(cmd) - - if mode not in ('b', 't'): - raise ValueError("invalid mode %r" % (mode,)) - - import subprocess - p = subprocess.Popen(cmd, shell=True, bufsize=bufsize, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=(mode =='t')) - return (_wrap_close(p.stdin, p), _wrap_close(p.stdout, p), - _wrap_close(p.stderr, p)) - - def popen4(cmd, mode="t", bufsize=-1): - "" - - cmd = _makecmd_string(cmd) - - if mode not in ('b', 't'): - raise ValueError("invalid mode %r" % (mode,)) - - import subprocess - p = subprocess.Popen(cmd, shell=True, bufsize=bufsize, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - universal_newlines=(mode =='t')) - return (_wrap_close(p.stdin, p), _wrap_close(p.stdout, p)) - - # helper for making popen cmd a string object - def _makecmd_string(cmd): - if isinstance(cmd, unicode): - cmd = cmd.encode('ascii') - - if not isinstance(cmd, str): - raise TypeError("invalid cmd type (%s, expected string)" % - (type(cmd),)) - return cmd - - # A proxy for a file whose close waits for the process - class _wrap_close(object): - def __init__(self, stream, proc): - self._stream = stream - self._proc = proc - def close(self): - self._stream.close() - return self._proc.wait() or None # 0 => None - __del__ = close - - def __enter__(self): - return self - - def __exit__(self, *k): - self.close() - - def __getattr__(self, name): - return getattr(self._stream, name) - def __iter__(self): - return iter(self._stream) From noreply at buildbot.pypy.org Tue Jun 3 00:54:15 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Tue, 3 Jun 2014 00:54:15 +0200 (CEST) Subject: [pypy-commit] pypy py3k: adapt to py3 Message-ID: <20140602225415.563381C015E@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: py3k Changeset: r71886:36b2ee07a173 Date: 2014-06-02 15:53 -0700 http://bitbucket.org/pypy/pypy/changeset/36b2ee07a173/ Log: adapt to py3 diff --git a/pypy/module/posix/app_startfile.py b/pypy/module/posix/app_startfile.py --- a/pypy/module/posix/app_startfile.py +++ b/pypy/module/posix/app_startfile.py @@ -25,18 +25,18 @@ # if operation is None: operation = w.NULL - if isinstance(filepath, str): - if isinstance(operation, unicode): + if isinstance(filepath, bytes): + if isinstance(operation, str): operation = operation.encode("ascii") rc = w.lib.ShellExecuteA(w.NULL, operation, filepath, w.NULL, w.NULL, w.SW_SHOWNORMAL) - elif isinstance(filepath, unicode): - if isinstance(operation, str): + elif isinstance(filepath, str): + if isinstance(operation, bytes): operation = operation.decode("ascii") rc = w.lib.ShellExecuteW(w.NULL, operation, filepath, w.NULL, w.NULL, w.SW_SHOWNORMAL) else: - raise TypeError("argument 1 must be str or unicode") + raise TypeError("argument 1 must be str or bytes") rc = int(w.cast("uintptr_t", rc)) if rc <= 32: code, msg = w.getwinerror() From noreply at buildbot.pypy.org Tue Jun 3 02:19:26 2014 From: noreply at buildbot.pypy.org (rlamy) Date: Tue, 3 Jun 2014 02:19:26 +0200 (CEST) Subject: [pypy-commit] pypy None-consistency: make getattr(None, ...) an annotator error Message-ID: <20140603001926.A85CE1D2D65@cobra.cs.uni-duesseldorf.de> Author: Ronan Lamy Branch: None-consistency Changeset: r71887:d4de40e2c467 Date: 2014-06-03 01:18 +0100 http://bitbucket.org/pypy/pypy/changeset/d4de40e2c467/ Log: make getattr(None, ...) an annotator error diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py --- a/rpython/annotator/unaryop.py +++ b/rpython/annotator/unaryop.py @@ -760,10 +760,6 @@ def bind_callables_under(self, classdef, name): return self - def getattr(self, s_attr): - return s_ImpossibleValue - getattr.can_only_throw = [] - def setattr(self, s_attr, s_value): return None From noreply at buildbot.pypy.org Mon Jun 9 13:31:09 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 9 Jun 2014 13:31:09 +0200 (CEST) Subject: [pypy-commit] pypy default: Add a warning Message-ID: <20140609113109.5C8341C104D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r71995:1b719225dde5 Date: 2014-06-09 13:30 +0200 http://bitbucket.org/pypy/pypy/changeset/1b719225dde5/ Log: Add a warning diff --git a/pypy/module/micronumpy/tool/numready/page.html b/pypy/module/micronumpy/tool/numready/page.html --- a/pypy/module/micronumpy/tool/numready/page.html +++ b/pypy/module/micronumpy/tool/numready/page.html @@ -40,6 +40,7 @@

numpy compatability test results, generated automatically by running
pypy/module/micronumpy/tool/numready/main.py <path-to-latest-pypy>

Overall: {{ msg }}

+

Warning: a positive result does not mean the function is actually working! It only means that the function/module/constant is present. It may be missing other things.

From noreply at buildbot.pypy.org Mon Jun 9 15:21:36 2014 From: noreply at buildbot.pypy.org (hgattic) Date: Mon, 9 Jun 2014 15:21:36 +0200 (CEST) Subject: [pypy-commit] pypy llvm-translation-backend: working Message-ID: <20140609132136.D8E161C104D@cobra.cs.uni-duesseldorf.de> Author: hgattic Branch: llvm-translation-backend Changeset: r71996:4ea859050a7f Date: 2014-06-09 15:21 +0200 http://bitbucket.org/pypy/pypy/changeset/4ea859050a7f/ Log: working From noreply at buildbot.pypy.org Mon Jun 9 16:28:34 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 9 Jun 2014 16:28:34 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder-perf: Fix: always do this, even if not found-early-to-be-virtual Message-ID: <20140609142834.DF9D91C104D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder-perf Changeset: r71998:e0b4b9346cfb Date: 2014-06-09 16:27 +0200 http://bitbucket.org/pypy/pypy/changeset/e0b4b9346cfb/ Log: Fix: always do this, even if not found-early-to-be-virtual diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -342,7 +342,7 @@ @staticmethod @always_inline def ll_append_multiple_char(ll_builder, char, times): - if jit.isvirtual(ll_builder): + if jit.we_are_jitted(ll_builder): if BaseStringBuilderRepr._ll_jit_try_append_multiple_char( ll_builder, char, times): return From noreply at buildbot.pypy.org Mon Jun 9 16:31:56 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 9 Jun 2014 16:31:56 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder-perf: Fix. Remove more isvirtual()s. Message-ID: <20140609143156.AE83C1C104D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder-perf Changeset: r71999:181e95dfdf21 Date: 2014-06-09 16:30 +0200 http://bitbucket.org/pypy/pypy/changeset/181e95dfdf21/ Log: Fix. Remove more isvirtual()s. diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -342,7 +342,7 @@ @staticmethod @always_inline def ll_append_multiple_char(ll_builder, char, times): - if jit.we_are_jitted(ll_builder): + if jit.we_are_jitted(): if BaseStringBuilderRepr._ll_jit_try_append_multiple_char( ll_builder, char, times): return @@ -417,7 +417,6 @@ return ll_builder.total_size - num_chars_missing_from_last_piece @classmethod - @jit.look_inside_iff(lambda cls, ll_builder: jit.isvirtual(ll_builder)) def ll_build(cls, ll_builder): buf = ll_builder.current_buf if buf: From noreply at buildbot.pypy.org Mon Jun 9 16:39:10 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 9 Jun 2014 16:39:10 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder-perf: Tentatively get rid of all isvirtual()s Message-ID: <20140609143910.D40C11C104D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder-perf Changeset: r72000:5588dd1ec91c Date: 2014-06-09 16:37 +0200 http://bitbucket.org/pypy/pypy/changeset/5588dd1ec91c/ Log: Tentatively get rid of all isvirtual()s diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -219,7 +219,7 @@ @staticmethod @always_inline def ll_append_char(ll_builder, char): - if jit.isvirtual(ll_builder): + if jit.we_are_jitted(): BaseStringBuilderRepr._ll_jit_append_char(ll_builder, char) else: BaseStringBuilderRepr._ll_append_char(ll_builder, char) @@ -250,7 +250,7 @@ @staticmethod def ll_append_char_2(ll_builder, char0, char1): - if jit.isvirtual(ll_builder): + if jit.we_are_jitted(): BaseStringBuilderRepr._ll_jit_append_char_2(ll_builder, char0,char1) else: BaseStringBuilderRepr._ll_append_char_2(ll_builder, char0, char1) @@ -325,7 +325,7 @@ ll_str.chars[start], ll_str.chars[start + 1]) return True - if jit.isvirtual(ll_builder) and bool(ll_builder.current_buf): + if bool(ll_builder.current_buf): ofs = ll_builder.current_ofs end = ofs + size * ll_builder.charsize if end <= ll_builder.current_end: From noreply at buildbot.pypy.org Mon Jun 9 16:20:30 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 9 Jun 2014 16:20:30 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder-perf: Test and fix Message-ID: <20140609142030.EDBA11C08F6@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder-perf Changeset: r71997:99a38a918a30 Date: 2014-06-09 16:19 +0200 http://bitbucket.org/pypy/pypy/changeset/99a38a918a30/ Log: Test and fix diff --git a/rpython/jit/metainterp/test/test_string.py b/rpython/jit/metainterp/test/test_string.py --- a/rpython/jit/metainterp/test/test_string.py +++ b/rpython/jit/metainterp/test/test_string.py @@ -690,6 +690,26 @@ assert res == 0 self.check_resops(call=2) # (ll_shrink_array) * 2 unroll + def test_stringbuilder_append_len2_2(self): + jitdriver = JitDriver(reds=['n', 'str1'], greens=[]) + def f(n): + str1 = str(n) + while n > 0: + jitdriver.jit_merge_point(n=n, str1=str1) + sb = StringBuilder(4) + sb.append("a") + sb.append(str1) + s = sb.build() + if len(s) != 3: raise ValueError + if s[0] != "a": raise ValueError + if s[1] != "1": raise ValueError + if s[2] != "0": raise ValueError + n -= 1 + return n + res = self.meta_interp(f, [10], backendopt=True) + assert res == 0 + self.check_resops(call=2) # (ll_shrink_array) * 2 unroll + def test_stringbuilder_append_slice_1(self): jitdriver = JitDriver(reds=['n'], greens=[]) def f(n): @@ -764,6 +784,26 @@ assert res == 0 self.check_resops(call=4) # (append, build) * 2 unroll + def test_stringbuilder_bug1(self): + jitdriver = JitDriver(reds=['n', 's1'], greens=[]) + @dont_look_inside + def escape(x): + pass + def f(n): + s1 = unicode(str(n) * 16) + while n > 0: + jitdriver.jit_merge_point(n=n, s1=s1) + sb = UnicodeBuilder(32) + sb.append(s1) + sb.append(u"\n\n") + s = sb.build() + if len(s) != 34: raise ValueError + n -= 1 + return n + f(10) + res = self.meta_interp(f, [10], backendopt=True) + assert res == 0 + def test_shrink_array(self): jitdriver = JitDriver(reds=['result', 'n'], greens=[]) _str, _StringBuilder = self._str, self._StringBuilder diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -262,6 +262,7 @@ end = ofs + 2 * ll_builder.charsize if end > ll_builder.current_end: ofs = ll_builder.grow(ll_builder, 2) + end = ofs + 2 * ll_builder.charsize ll_builder.current_ofs = end # --- no GC! --- raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) From noreply at buildbot.pypy.org Mon Jun 9 17:03:32 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 9 Jun 2014 17:03:32 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder-perf: Use unsigned numbers, needed on 32-bit in case a raw buffer straddles Message-ID: <20140609150332.7631C1C33EC@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder-perf Changeset: r72001:e0d8eae14284 Date: 2014-06-09 17:02 +0200 http://bitbucket.org/pypy/pypy/changeset/e0d8eae14284/ Log: Use unsigned numbers, needed on 32-bit in case a raw buffer straddles the 2**31 middle-of-memory limit diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -1,6 +1,6 @@ from rpython.rlib import rgc, jit from rpython.rlib.objectmodel import enforceargs, specialize -from rpython.rlib.rarithmetic import ovfcheck +from rpython.rlib.rarithmetic import ovfcheck, r_uint, intmask from rpython.rlib.debug import ll_assert from rpython.rlib.rgc import must_be_light_finalizer from rpython.rtyper.rptr import PtrRepr @@ -49,15 +49,21 @@ # new_piece = lltype.malloc(STRINGPIECE) charsize = ll_builder.charsize - new_piece.piece_lgt = needed * charsize + try: + new_piece.piece_lgt = ovfcheck(needed * charsize) + except OverflowError: + raise MemoryError raw_ptr = lltype.malloc(rffi.CCHARP.TO, needed * charsize, flavor='raw') new_piece.raw_ptr = raw_ptr new_piece.prev_piece = ll_builder.extra_pieces ll_builder.extra_pieces = new_piece - ll_builder.current_ofs = rffi.cast(lltype.Signed, raw_ptr) - ll_builder.current_end = (rffi.cast(lltype.Signed, raw_ptr) + - needed * charsize) - ll_builder.total_size += needed + ll_builder.current_ofs = rffi.cast(lltype.Unsigned, raw_ptr) + ll_builder.current_end = (rffi.cast(lltype.Unsigned, raw_ptr) + + r_uint(needed * charsize)) + try: + ll_builder.total_size = ovfcheck(ll_builder.total_size + needed) + except OverflowError: + raise MemoryError if ll_builder.current_buf: STRTYPE = lltype.typeOf(ll_builder.current_buf).TO ll_builder.initial_buf = ll_builder.current_buf @@ -67,10 +73,10 @@ def stringbuilder_append_overflow(ll_builder, ll_str): # First, the part that still fits in the current piece ofs = ll_builder.current_ofs - part1 = ll_builder.current_end - ofs # in bytes, not (uni)chars + part1 = intmask(ll_builder.current_end - ofs) # in bytes, not (uni)chars # --- no GC! --- raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) - rffi.c_memcpy(rffi.ptradd(raw, ofs), + rffi.c_memcpy(rffi.ptradd(raw, intmask(ofs)), ll_str2raw(ll_str, 0), part1) # --- end --- @@ -79,11 +85,11 @@ part2 = len(ll_str.chars) - part1 # in (uni)chars ll_assert(part2 > 0, "append_overflow: no overflow") ofs = stringbuilder_grow(ll_builder, part2) - ll_builder.current_ofs = ofs + part2 * ll_builder.charsize + ll_builder.current_ofs = ofs + r_uint(part2 * ll_builder.charsize) # --- no GC! --- ll_assert(not ll_builder.current_buf, "after grow(), current_buf!=NULL") raw = lltype.nullptr(rffi.CCHARP.TO) - rffi.c_memcpy(rffi.ptradd(raw, ofs), + rffi.c_memcpy(rffi.ptradd(raw, intmask(ofs)), ll_str2raw(ll_str, part1), part2 * ll_builder.charsize) # --- end --- @@ -113,8 +119,8 @@ STRINGBUILDER = lltype.GcStruct('stringbuilder', ('current_buf', lltype.Ptr(STR)), - ('current_ofs', lltype.Signed), - ('current_end', lltype.Signed), + ('current_ofs', lltype.Unsigned), + ('current_end', lltype.Unsigned), ('total_size', lltype.Signed), ('extra_pieces', lltype.Ptr(STRINGPIECE)), ('initial_buf', lltype.Ptr(STR)), @@ -128,8 +134,8 @@ UNICODEBUILDER = lltype.GcStruct('unicodebuilder', ('current_buf', lltype.Ptr(UNICODE)), - ('current_ofs', lltype.Signed), # position measured in *bytes* - ('current_end', lltype.Signed), # position measured in *bytes* + ('current_ofs', lltype.Unsigned), # position measured in *bytes* + ('current_end', lltype.Unsigned), # position measured in *bytes* ('total_size', lltype.Signed), ('extra_pieces', lltype.Ptr(STRINGPIECE)), ('initial_buf', lltype.Ptr(UNICODE)), @@ -145,7 +151,7 @@ def ll_baseofs(ll_str): STRTYPE = lltype.typeOf(ll_str).TO ofs = rffi.offsetof(STRTYPE, 'chars') + rffi.itemoffsetof(STRTYPE.chars, 0) - return llmemory.raw_malloc_usage(ofs) # for direct run + return r_uint(llmemory.raw_malloc_usage(ofs)) # for direct run ll_baseofs._always_inline_ = True def ll_str2raw(ll_str, charoffset): @@ -158,7 +164,7 @@ ll_str2raw._always_inline_ = True def ll_rawsetitem(raw, byteoffset, char): - raw = rffi.ptradd(raw, byteoffset) + raw = rffi.ptradd(raw, intmask(byteoffset)) if lltype.typeOf(char) == lltype.Char: raw[0] = char else: @@ -204,14 +210,14 @@ def _ll_append(ll_builder, ll_str): lgt = len(ll_str.chars) * ll_builder.charsize # in bytes ofs = ll_builder.current_ofs - newofs = ofs + lgt + newofs = ofs + r_uint(lgt) if newofs > ll_builder.current_end: ll_builder.append_overflow(ll_builder, ll_str) else: ll_builder.current_ofs = newofs # --- no GC! --- raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) - rffi.c_memcpy(rffi.ptradd(raw, ofs), + rffi.c_memcpy(rffi.ptradd(raw, intmask(ofs)), ll_str2raw(ll_str, 0), lgt) # --- end --- @@ -298,7 +304,7 @@ def _ll_append_slice(ll_builder, ll_str, start, end): lgt = (end - start) * ll_builder.charsize # in bytes ofs = ll_builder.current_ofs - newofs = ofs + lgt + newofs = ofs + r_uint(lgt) if newofs > ll_builder.current_end: ll_str = rstr.LLHelpers.ll_stringslice_startstop(ll_str, start, end) ll_builder.append_overflow(ll_builder, ll_str) @@ -306,7 +312,7 @@ ll_builder.current_ofs = newofs # --- no GC! --- raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) - rffi.c_memcpy(rffi.ptradd(raw, ofs), + rffi.c_memcpy(rffi.ptradd(raw, intmask(ofs)), ll_str2raw(ll_str, start), lgt) # --- end --- @@ -327,11 +333,11 @@ return True if bool(ll_builder.current_buf): ofs = ll_builder.current_ofs - end = ofs + size * ll_builder.charsize + end = ofs + r_uint(size * ll_builder.charsize) if end <= ll_builder.current_end: ll_builder.current_ofs = end buf = ll_builder.current_buf - index = (ofs - ll_baseofs(buf)) // ll_builder.charsize + index = intmask(ofs - ll_baseofs(buf)) // ll_builder.charsize if lltype.typeOf(buf).TO.chars.OF == lltype.Char: rstr.copy_string_contents(ll_str, buf, start, index, size) else: @@ -353,7 +359,7 @@ def _ll_append_multiple_char(ll_builder, char, times): lgt = times * ll_builder.charsize # in bytes ofs = ll_builder.current_ofs - newofs = ofs + lgt + newofs = ofs + r_uint(lgt) if newofs > ll_builder.current_end: ll_str = rstr.LLHelpers.ll_char_mul(char, times) ll_builder.append_overflow(ll_builder, ll_str) @@ -392,7 +398,7 @@ def ll_append_charpsize(ll_builder, charp, size): lgt = size * ll_builder.charsize # in bytes ofs = ll_builder.current_ofs - newofs = ofs + lgt + newofs = ofs + r_uint(lgt) if newofs > ll_builder.current_end: if ll_builder.charsize == 1: ll_str = llstr(rffi.charpsize2str(charp, size)) @@ -403,7 +409,7 @@ ll_builder.current_ofs = newofs # --- no GC! --- raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) - rffi.c_memcpy(rffi.ptradd(raw, ofs), + rffi.c_memcpy(rffi.ptradd(raw, intmask(ofs)), rffi.cast(rffi.CCHARP, charp), lgt) # --- end --- @@ -411,7 +417,7 @@ @staticmethod @always_inline def ll_getlength(ll_builder): - num_chars_missing_from_last_piece = ( + num_chars_missing_from_last_piece = intmask( (ll_builder.current_end - ll_builder.current_ofs) // ll_builder.charsize) return ll_builder.total_size - num_chars_missing_from_last_piece @@ -430,8 +436,8 @@ buf = rgc.ll_shrink_array(buf, final_size) ll_builder.total_size = final_size ll_builder.current_buf = buf - ll_builder.current_ofs = 0 - ll_builder.current_end = 0 + ll_builder.current_ofs = r_uint(0) + ll_builder.current_end = r_uint(0) return buf else: return BaseStringBuilderRepr._ll_build_extra(cls, ll_builder) @@ -445,15 +451,15 @@ ll_assert(bool(extra), "build() twice on a StringBuilder") ll_builder.extra_pieces = lltype.nullptr(STRINGPIECE) result = cls.mallocfn(final_size) - piece_lgt = ll_builder.current_ofs - rffi.cast(lltype.Signed, # in bytes - extra.raw_ptr) - ll_assert(piece_lgt == extra.piece_lgt - (ll_builder.current_end - - ll_builder.current_ofs), + piece_lgt = intmask( # in bytes + ll_builder.current_ofs - rffi.cast(lltype.Unsigned, extra.raw_ptr)) + ll_assert(piece_lgt == intmask(extra.piece_lgt - + (ll_builder.current_end - ll_builder.current_ofs)), "bogus last piece_lgt") ll_builder.total_size = final_size ll_builder.current_buf = result - ll_builder.current_ofs = 0 - ll_builder.current_end = 0 + ll_builder.current_ofs = r_uint(0) + ll_builder.current_end = r_uint(0) # --- no GC! --- dst = ll_str2raw(result, final_size) From noreply at buildbot.pypy.org Mon Jun 9 17:09:33 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 9 Jun 2014 17:09:33 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder-perf: Add an XXX comment Message-ID: <20140609150933.ACCE51C08F6@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder-perf Changeset: r72002:1bc7e315768b Date: 2014-06-09 17:08 +0200 http://bitbucket.org/pypy/pypy/changeset/1bc7e315768b/ Log: Add an XXX comment diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -28,6 +28,13 @@ # - The data is copied at most twice, and only once in case it fits # into the initial size (and the GC supports shrinking the STR). # +# XXX too much a mess to handle the case where the JIT sees this code. +# Think about an easier alternative, like using raw_store(current_buf, ..) +# uniformly, where current_buf is a GC pointer that can be NULL. We'd +# need support in the JIT to map that to virtual string index. We'd also +# need a way to express c_memcpy() below --- similar to copystrcontent, +# but without the assumption that it's about a string (or unicode). +# # ------------------------------------------------------------ From noreply at buildbot.pypy.org Mon Jun 9 17:17:46 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 9 Jun 2014 17:17:46 +0200 (CEST) Subject: [pypy-commit] pypy fix-bytearray-complexity: Don't use buffers as the 2nd paramter to rstring.(r)split Message-ID: <20140609151746.899471C08F6@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: fix-bytearray-complexity Changeset: r72003:b2b630192e51 Date: 2014-06-09 09:25 -0500 http://bitbucket.org/pypy/pypy/changeset/b2b630192e51/ Log: Don't use buffers as the 2nd paramter to rstring.(r)split diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -561,16 +561,10 @@ res = split(value, maxsplit=maxsplit) return self._newlist_unwrapped(space, res) - if self._use_rstr_ops(space, w_sep): - by = self._op_val(space, w_sep) - if len(by) == 0: - raise oefmt(space.w_ValueError, "empty separator") - res = split(value, by, maxsplit) - else: - by = _get_buffer(space, w_sep) - if len(by) == 0: - raise oefmt(space.w_ValueError, "empty separator") - res = split(value, by, maxsplit) + by = self._op_val(space, w_sep) + if len(by) == 0: + raise oefmt(space.w_ValueError, "empty separator") + res = split(value, by, maxsplit) return self._newlist_unwrapped(space, res) @@ -582,16 +576,10 @@ res = rsplit(value, maxsplit=maxsplit) return self._newlist_unwrapped(space, res) - if self._use_rstr_ops(space, w_sep): - by = self._op_val(space, w_sep) - if len(by) == 0: - raise oefmt(space.w_ValueError, "empty separator") - res = rsplit(value, by, maxsplit) - else: - by = _get_buffer(space, w_sep) - if len(by) == 0: - raise oefmt(space.w_ValueError, "empty separator") - res = rsplit(value, by, maxsplit) + by = self._op_val(space, w_sep) + if len(by) == 0: + raise oefmt(space.w_ValueError, "empty separator") + res = rsplit(value, by, maxsplit) return self._newlist_unwrapped(space, res) @@ -629,10 +617,7 @@ end)) def _startswith(self, space, value, w_prefix, start, end): - if self._use_rstr_ops(space, w_prefix): - return startswith(value, self._op_val(space, w_prefix), start, end) - else: - return startswith(value, _get_buffer(space, w_prefix), start, end) + return startswith(value, self._op_val(space, w_prefix), start, end) def descr_endswith(self, space, w_suffix, w_start=None, w_end=None): (value, start, end) = self._convert_idx_params(space, w_start, w_end, @@ -646,10 +631,7 @@ end)) def _endswith(self, space, value, w_prefix, start, end): - if self._use_rstr_ops(space, w_prefix): - return endswith(value, self._op_val(space, w_prefix), start, end) - else: - return endswith(value, _get_buffer(space, w_prefix), start, end) + return endswith(value, self._op_val(space, w_prefix), start, end) def _strip(self, space, w_chars, left, right): "internal function called by str_xstrip methods" diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -56,6 +56,13 @@ i = j + 1 return res + if isinstance(value, unicode): + assert isinstance(by, unicode) + if isinstance(value, str): + assert isinstance(by, str) + if isinstance(value, list): + assert isinstance(by, str) + bylen = len(by) if bylen == 0: raise ValueError("empty separator") @@ -131,6 +138,13 @@ res.reverse() return res + if isinstance(value, unicode): + assert isinstance(by, unicode) + if isinstance(value, str): + assert isinstance(by, str) + if isinstance(value, list): + assert isinstance(by, str) + if maxsplit > 0: res = newlist_hint(min(maxsplit + 1, len(value))) else: diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py --- a/rpython/rlib/test/test_rstring.py +++ b/rpython/rlib/test/test_rstring.py @@ -10,14 +10,11 @@ def check_split(value, sub, *args, **kwargs): result = kwargs['res'] assert split(value, sub, *args) == result - assert split(value, buffer(sub), *args) == result list_result = [list(i) for i in result] assert split(list(value), sub, *args) == list_result - assert split(list(value), buffer(sub), *args) == list_result assert split(buffer(value), sub, *args) == result - assert split(buffer(value), buffer(sub), *args) == result check_split("", 'x', res=['']) check_split("a", "a", 1, res=['', '']) @@ -50,14 +47,11 @@ def check_rsplit(value, sub, *args, **kwargs): result = kwargs['res'] assert rsplit(value, sub, *args) == result - assert rsplit(value, buffer(sub), *args) == result list_result = [list(i) for i in result] assert rsplit(list(value), sub, *args) == list_result - assert rsplit(list(value), buffer(sub), *args) == list_result assert rsplit(buffer(value), sub, *args) == result - assert rsplit(buffer(value), buffer(sub), *args) == result check_rsplit("a", "a", 1, res=['', '']) check_rsplit(" ", " ", 1, res=['', '']) @@ -87,10 +81,8 @@ def check_replace(value, sub, *args, **kwargs): result = kwargs['res'] assert replace(value, sub, *args) == result - assert replace(value, buffer(sub), *args) == result assert replace(list(value), sub, *args) == list(result) - assert replace(list(value), buffer(sub), *args) == list(result) check_replace('one!two!three!', '!', '@', 1, res='one at two!three!') check_replace('one!two!three!', '!', '', res='onetwothree') @@ -256,9 +248,6 @@ def test_buffer_parameter(self): def fn(): res = True - res = res and split('a//b//c//d', StringBuffer('//')) == ['a', 'b', 'c', 'd'] - res = res and split(u'a//b//c//d', StringBuffer('//')) == [u'a', u'b', u'c', u'd'] - res = res and rsplit('a//b//c//d', StringBuffer('//')) == ['a', 'b', 'c', 'd'] res = res and find('a//b//c//d', StringBuffer('//'), 0, 10) != -1 res = res and rfind('a//b//c//d', StringBuffer('//'), 0, 10) != -1 res = res and count('a//b//c//d', StringBuffer('//'), 0, 10) != 0 From noreply at buildbot.pypy.org Mon Jun 9 17:17:47 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 9 Jun 2014 17:17:47 +0200 (CEST) Subject: [pypy-commit] pypy fix-bytearray-complexity: Document new magic method support Message-ID: <20140609151747.DEB0D1C08F6@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: fix-bytearray-complexity Changeset: r72004:7faf6b6d4368 Date: 2014-06-09 10:16 -0500 http://bitbucket.org/pypy/pypy/changeset/7faf6b6d4368/ Log: Document new magic method support diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst --- a/pypy/doc/coding-guide.rst +++ b/pypy/doc/coding-guide.rst @@ -348,8 +348,11 @@ **objects** - Normal rules apply. Special methods are not honoured, except ``__init__``, - ``__del__`` and ``__iter__``. + Normal rules apply. The only special methods that are honoured are + ``__init__``, ``__del__``, ``__len__``, ``__getitem__``, ``__setitem__``, + ``__getslice__``, ``__setslice__``, and ``__iter__``. To handle slicing, + ``__getslice__`` and ``__setslice__`` must be used; using ``__getitem__`` and + ``__setitem__`` for slicing isn't supported. This layout makes the number of types to take care about quite limited. From noreply at buildbot.pypy.org Mon Jun 9 17:42:38 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 9 Jun 2014 17:42:38 +0200 (CEST) Subject: [pypy-commit] pypy fix-bytearray-complexity: Update whatsnew Message-ID: <20140609154238.609DE1C08F6@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: fix-bytearray-complexity Changeset: r72005:c1a8bc453186 Date: 2014-06-09 10:41 -0500 http://bitbucket.org/pypy/pypy/changeset/c1a8bc453186/ Log: Update whatsnew diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -12,3 +12,9 @@ .. branch: release-2.3.x .. branch: unify-call-ops + +.. branch: fix-bytearray-complexity +Bytearray operations no longer copy the bytearray unnecessarily + +Added support for ``__getitem__``, ``__setitem__``, ``__getslice__``, +``__setslice__``, and ``__len__`` to RPython From noreply at buildbot.pypy.org Mon Jun 9 19:57:21 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 9 Jun 2014 19:57:21 +0200 (CEST) Subject: [pypy-commit] pypy default: Tests and fix (thanks defnull): handle zero-width matches differently in Message-ID: <20140609175721.790541C33EC@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72006:c4a666833c26 Date: 2014-06-09 19:56 +0200 http://bitbucket.org/pypy/pypy/changeset/c4a666833c26/ Log: Tests and fix (thanks defnull): handle zero-width matches differently in greedy repetition operators, in what is hopefully the same way as CPython. diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -418,32 +418,33 @@ marks = p.marks enum = p.enum.move_to_next_result(ctx) # - # zero-width match protection min = ctx.pat(ppos+1) - if self.num_pending >= min: - while enum is not None and ptr == ctx.match_end: - enum = enum.move_to_next_result(ctx) - # matched marks for zero-width assertions - marks = ctx.match_marks - # if enum is not None: # matched one more 'item'. record it and continue. + last_match_length = ctx.match_end - ptr self.pending = Pending(ptr, marks, enum, self.pending) self.num_pending += 1 ptr = ctx.match_end marks = ctx.match_marks - match_more = True - else: - # 'item' no longer matches. - if self.num_pending >= min: - # try to match 'tail' if we have enough 'item' - result = sre_match(ctx, tailppos, ptr, marks) - if result is not None: - self.subresult = result - self.cur_ptr = ptr - self.cur_marks = marks - return self - match_more = False + if last_match_length == 0 and self.num_pending >= min: + # zero-width protection: after an empty match, if there + # are enough matches, don't try to match more. Instead, + # fall through to trying to match 'tail'. + pass + else: + match_more = True + continue + + # 'item' no longer matches. + if self.num_pending >= min: + # try to match 'tail' if we have enough 'item' + result = sre_match(ctx, tailppos, ptr, marks) + if result is not None: + self.subresult = result + self.cur_ptr = ptr + self.cur_marks = marks + return self + match_more = False class MinUntilMatchResult(AbstractUntilMatchResult): diff --git a/rpython/rlib/rsre/test/test_search.py b/rpython/rlib/rsre/test/test_search.py --- a/rpython/rlib/rsre/test/test_search.py +++ b/rpython/rlib/rsre/test/test_search.py @@ -149,8 +149,11 @@ def test_empty_maxuntil(self): r_code, r = get_code_and_re(r'(a?)+y') assert r.match('y') + assert r.match('aaayaaay').span() == (0, 4) res = rsre_core.match(r_code, 'y') assert res + res = rsre_core.match(r_code, 'aaayaaay') + assert res and res.span() == (0, 4) # r_code, r = get_code_and_re(r'(a?){4,6}y') assert r.match('y') @@ -162,6 +165,14 @@ res = rsre_core.match(r_code, 'y') assert res + def test_empty_maxuntil_2(self): + r_code, r = get_code_and_re(r'X(.*?)+X') + assert r.match('XfooXbarX').span() == (0, 5) + assert r.match('XfooXbarX').span(1) == (4, 4) + res = rsre_core.match(r_code, 'XfooXbarX') + assert res.span() == (0, 5) + assert res.span(1) == (4, 4) + def test_empty_minuntil(self): r_code, r = get_code_and_re(r'(a?)+?y') #assert not r.match('z') -- CPython bug (at least 2.5) eats all memory From noreply at buildbot.pypy.org Mon Jun 9 20:01:10 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 9 Jun 2014 20:01:10 +0200 (CEST) Subject: [pypy-commit] pypy default: Add the original failure as a test. Message-ID: <20140609180110.591751C3434@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72007:b91e7ae78e03 Date: 2014-06-09 20:00 +0200 http://bitbucket.org/pypy/pypy/changeset/b91e7ae78e03/ Log: Add the original failure as a test. diff --git a/rpython/rlib/rsre/test/test_match.py b/rpython/rlib/rsre/test/test_match.py --- a/rpython/rlib/rsre/test/test_match.py +++ b/rpython/rlib/rsre/test/test_match.py @@ -267,3 +267,8 @@ match = rsre_core.match(r, "abbbbbbbbbcdef") assert match assert match.match_end == 11 + + def test_empty_maxuntil(self): + r = get_code("\\{\\{((?:.*?)+)\\}\\}") + match = rsre_core.match(r, "{{a}}{{b}}") + assert match.group(1) == "a" From noreply at buildbot.pypy.org Mon Jun 9 20:10:30 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 9 Jun 2014 20:10:30 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder-perf: Revert e0d8eae14284, keeping signed integers around the code, but Message-ID: <20140609181030.9CBD21C08F6@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder-perf Changeset: r72008:c484932efa95 Date: 2014-06-09 20:09 +0200 http://bitbucket.org/pypy/pypy/changeset/c484932efa95/ Log: Revert e0d8eae14284, keeping signed integers around the code, but simply use uint_lt() functions that perform an unsigned comparison. diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -1,6 +1,6 @@ from rpython.rlib import rgc, jit from rpython.rlib.objectmodel import enforceargs, specialize -from rpython.rlib.rarithmetic import ovfcheck, r_uint, intmask +from rpython.rlib.rarithmetic import ovfcheck, r_uint from rpython.rlib.debug import ll_assert from rpython.rlib.rgc import must_be_light_finalizer from rpython.rtyper.rptr import PtrRepr @@ -42,6 +42,13 @@ func._always_inline_ = True return func +def uint_lt(a, b): + return r_uint(a) < r_uint(b) +def uint_le(a, b): + return r_uint(a) <= r_uint(b) +def uint_gt(a, b): + return r_uint(a) > r_uint(b) + def new_grow_funcs(name, mallocfn): @@ -57,18 +64,19 @@ new_piece = lltype.malloc(STRINGPIECE) charsize = ll_builder.charsize try: - new_piece.piece_lgt = ovfcheck(needed * charsize) + needed_chars = needed * charsize except OverflowError: raise MemoryError + new_piece.piece_lgt = needed_chars raw_ptr = lltype.malloc(rffi.CCHARP.TO, needed * charsize, flavor='raw') new_piece.raw_ptr = raw_ptr new_piece.prev_piece = ll_builder.extra_pieces ll_builder.extra_pieces = new_piece - ll_builder.current_ofs = rffi.cast(lltype.Unsigned, raw_ptr) - ll_builder.current_end = (rffi.cast(lltype.Unsigned, raw_ptr) + - r_uint(needed * charsize)) + ll_builder.current_ofs = rffi.cast(lltype.Signed, raw_ptr) + ll_builder.current_end = (rffi.cast(lltype.Signed, raw_ptr) + + needed_chars) try: - ll_builder.total_size = ovfcheck(ll_builder.total_size + needed) + ll_builder.total_size = ll_builder.total_size + needed except OverflowError: raise MemoryError if ll_builder.current_buf: @@ -80,10 +88,10 @@ def stringbuilder_append_overflow(ll_builder, ll_str): # First, the part that still fits in the current piece ofs = ll_builder.current_ofs - part1 = intmask(ll_builder.current_end - ofs) # in bytes, not (uni)chars + part1 = ll_builder.current_end - ofs # in bytes, not (uni)chars # --- no GC! --- raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) - rffi.c_memcpy(rffi.ptradd(raw, intmask(ofs)), + rffi.c_memcpy(rffi.ptradd(raw, ofs), ll_str2raw(ll_str, 0), part1) # --- end --- @@ -92,11 +100,11 @@ part2 = len(ll_str.chars) - part1 # in (uni)chars ll_assert(part2 > 0, "append_overflow: no overflow") ofs = stringbuilder_grow(ll_builder, part2) - ll_builder.current_ofs = ofs + r_uint(part2 * ll_builder.charsize) + ll_builder.current_ofs = ofs + part2 * ll_builder.charsize # --- no GC! --- ll_assert(not ll_builder.current_buf, "after grow(), current_buf!=NULL") raw = lltype.nullptr(rffi.CCHARP.TO) - rffi.c_memcpy(rffi.ptradd(raw, intmask(ofs)), + rffi.c_memcpy(rffi.ptradd(raw, ofs), ll_str2raw(ll_str, part1), part2 * ll_builder.charsize) # --- end --- @@ -126,8 +134,8 @@ STRINGBUILDER = lltype.GcStruct('stringbuilder', ('current_buf', lltype.Ptr(STR)), - ('current_ofs', lltype.Unsigned), - ('current_end', lltype.Unsigned), + ('current_ofs', lltype.Signed), + ('current_end', lltype.Signed), ('total_size', lltype.Signed), ('extra_pieces', lltype.Ptr(STRINGPIECE)), ('initial_buf', lltype.Ptr(STR)), @@ -141,8 +149,8 @@ UNICODEBUILDER = lltype.GcStruct('unicodebuilder', ('current_buf', lltype.Ptr(UNICODE)), - ('current_ofs', lltype.Unsigned), # position measured in *bytes* - ('current_end', lltype.Unsigned), # position measured in *bytes* + ('current_ofs', lltype.Signed), # position measured in *bytes* + ('current_end', lltype.Signed), # position measured in *bytes* ('total_size', lltype.Signed), ('extra_pieces', lltype.Ptr(STRINGPIECE)), ('initial_buf', lltype.Ptr(UNICODE)), @@ -158,7 +166,7 @@ def ll_baseofs(ll_str): STRTYPE = lltype.typeOf(ll_str).TO ofs = rffi.offsetof(STRTYPE, 'chars') + rffi.itemoffsetof(STRTYPE.chars, 0) - return r_uint(llmemory.raw_malloc_usage(ofs)) # for direct run + return llmemory.raw_malloc_usage(ofs) # for direct run ll_baseofs._always_inline_ = True def ll_str2raw(ll_str, charoffset): @@ -171,7 +179,7 @@ ll_str2raw._always_inline_ = True def ll_rawsetitem(raw, byteoffset, char): - raw = rffi.ptradd(raw, intmask(byteoffset)) + raw = rffi.ptradd(raw, byteoffset) if lltype.typeOf(char) == lltype.Char: raw[0] = char else: @@ -217,14 +225,14 @@ def _ll_append(ll_builder, ll_str): lgt = len(ll_str.chars) * ll_builder.charsize # in bytes ofs = ll_builder.current_ofs - newofs = ofs + r_uint(lgt) - if newofs > ll_builder.current_end: + newofs = ofs + lgt + if uint_gt(newofs, ll_builder.current_end): ll_builder.append_overflow(ll_builder, ll_str) else: ll_builder.current_ofs = newofs # --- no GC! --- raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) - rffi.c_memcpy(rffi.ptradd(raw, intmask(ofs)), + rffi.c_memcpy(rffi.ptradd(raw, ofs), ll_str2raw(ll_str, 0), lgt) # --- end --- @@ -253,7 +261,8 @@ @staticmethod def _ll_jit_append_char(ll_builder, char): ofs = ll_builder.current_ofs - if bool(ll_builder.current_buf) and ofs < ll_builder.current_end: + if bool(ll_builder.current_buf) and uint_lt(ofs, + ll_builder.current_end): ll_builder.current_ofs = ofs + ll_builder.charsize buf = ll_builder.current_buf index = (ofs - ll_baseofs(buf)) // ll_builder.charsize @@ -273,7 +282,7 @@ def _ll_append_char_2(ll_builder, char0, char1): ofs = ll_builder.current_ofs end = ofs + 2 * ll_builder.charsize - if end > ll_builder.current_end: + if uint_gt(end, ll_builder.current_end): ofs = ll_builder.grow(ll_builder, 2) end = ofs + 2 * ll_builder.charsize ll_builder.current_ofs = end @@ -287,7 +296,8 @@ def _ll_jit_append_char_2(ll_builder, char0, char1): ofs = ll_builder.current_ofs end = ofs + 2 * ll_builder.charsize - if bool(ll_builder.current_buf) and end <= ll_builder.current_end: + if bool(ll_builder.current_buf) and uint_le(end, + ll_builder.current_end): ll_builder.current_ofs = end buf = ll_builder.current_buf index = (ofs - ll_baseofs(buf)) // ll_builder.charsize @@ -311,15 +321,15 @@ def _ll_append_slice(ll_builder, ll_str, start, end): lgt = (end - start) * ll_builder.charsize # in bytes ofs = ll_builder.current_ofs - newofs = ofs + r_uint(lgt) - if newofs > ll_builder.current_end: + newofs = ofs + lgt + if uint_gt(newofs, ll_builder.current_end): ll_str = rstr.LLHelpers.ll_stringslice_startstop(ll_str, start, end) ll_builder.append_overflow(ll_builder, ll_str) else: ll_builder.current_ofs = newofs # --- no GC! --- raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) - rffi.c_memcpy(rffi.ptradd(raw, intmask(ofs)), + rffi.c_memcpy(rffi.ptradd(raw, ofs), ll_str2raw(ll_str, start), lgt) # --- end --- @@ -340,11 +350,11 @@ return True if bool(ll_builder.current_buf): ofs = ll_builder.current_ofs - end = ofs + r_uint(size * ll_builder.charsize) - if end <= ll_builder.current_end: + end = ofs + size * ll_builder.charsize + if uint_le(end, ll_builder.current_end): ll_builder.current_ofs = end buf = ll_builder.current_buf - index = intmask(ofs - ll_baseofs(buf)) // ll_builder.charsize + index = (ofs - ll_baseofs(buf)) // ll_builder.charsize if lltype.typeOf(buf).TO.chars.OF == lltype.Char: rstr.copy_string_contents(ll_str, buf, start, index, size) else: @@ -366,15 +376,15 @@ def _ll_append_multiple_char(ll_builder, char, times): lgt = times * ll_builder.charsize # in bytes ofs = ll_builder.current_ofs - newofs = ofs + r_uint(lgt) - if newofs > ll_builder.current_end: + newofs = ofs + lgt + if uint_gt(newofs, ll_builder.current_end): ll_str = rstr.LLHelpers.ll_char_mul(char, times) ll_builder.append_overflow(ll_builder, ll_str) else: ll_builder.current_ofs = newofs # --- no GC! --- raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) - while ofs < newofs: + while uint_lt(ofs, newofs): ll_rawsetitem(raw, ofs, char) ofs += ll_builder.charsize # --- end --- @@ -405,8 +415,8 @@ def ll_append_charpsize(ll_builder, charp, size): lgt = size * ll_builder.charsize # in bytes ofs = ll_builder.current_ofs - newofs = ofs + r_uint(lgt) - if newofs > ll_builder.current_end: + newofs = ofs + lgt + if uint_gt(newofs, ll_builder.current_end): if ll_builder.charsize == 1: ll_str = llstr(rffi.charpsize2str(charp, size)) else: @@ -416,7 +426,7 @@ ll_builder.current_ofs = newofs # --- no GC! --- raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) - rffi.c_memcpy(rffi.ptradd(raw, intmask(ofs)), + rffi.c_memcpy(rffi.ptradd(raw, ofs), rffi.cast(rffi.CCHARP, charp), lgt) # --- end --- @@ -424,7 +434,7 @@ @staticmethod @always_inline def ll_getlength(ll_builder): - num_chars_missing_from_last_piece = intmask( + num_chars_missing_from_last_piece = ( (ll_builder.current_end - ll_builder.current_ofs) // ll_builder.charsize) return ll_builder.total_size - num_chars_missing_from_last_piece @@ -443,8 +453,8 @@ buf = rgc.ll_shrink_array(buf, final_size) ll_builder.total_size = final_size ll_builder.current_buf = buf - ll_builder.current_ofs = r_uint(0) - ll_builder.current_end = r_uint(0) + ll_builder.current_ofs = 0 + ll_builder.current_end = 0 return buf else: return BaseStringBuilderRepr._ll_build_extra(cls, ll_builder) @@ -458,15 +468,15 @@ ll_assert(bool(extra), "build() twice on a StringBuilder") ll_builder.extra_pieces = lltype.nullptr(STRINGPIECE) result = cls.mallocfn(final_size) - piece_lgt = intmask( # in bytes - ll_builder.current_ofs - rffi.cast(lltype.Unsigned, extra.raw_ptr)) - ll_assert(piece_lgt == intmask(extra.piece_lgt - - (ll_builder.current_end - ll_builder.current_ofs)), + piece_lgt = ll_builder.current_ofs - rffi.cast(lltype.Signed, # in bytes + extra.raw_ptr) + ll_assert(piece_lgt == extra.piece_lgt - (ll_builder.current_end - + ll_builder.current_ofs), "bogus last piece_lgt") ll_builder.total_size = final_size ll_builder.current_buf = result - ll_builder.current_ofs = r_uint(0) - ll_builder.current_end = r_uint(0) + ll_builder.current_ofs = 0 + ll_builder.current_end = 0 # --- no GC! --- dst = ll_str2raw(result, final_size) From noreply at buildbot.pypy.org Mon Jun 9 23:44:59 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 9 Jun 2014 23:44:59 +0200 (CEST) Subject: [pypy-commit] pypy ufuncapi: merge default into branch Message-ID: <20140609214459.81E5A1C08F6@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: ufuncapi Changeset: r72009:7292e1a96b0a Date: 2014-06-07 23:35 +0300 http://bitbucket.org/pypy/pypy/changeset/7292e1a96b0a/ Log: merge default into branch diff too long, truncating to 2000 out of 19349 lines diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -10,3 +10,7 @@ 20e51c4389ed4469b66bb9d6289ce0ecfc82c4b9 release-2.3.0 0000000000000000000000000000000000000000 release-2.3.0 394146e9bb673514c61f0150ab2013ccf78e8de7 release-2.3 +32f35069a16d819b58c1b6efb17c44e3e53397b2 release-2.2=3.1 +32f35069a16d819b58c1b6efb17c44e3e53397b2 release-2.3.1 +32f35069a16d819b58c1b6efb17c44e3e53397b2 release-2.2=3.1 +0000000000000000000000000000000000000000 release-2.2=3.1 diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -128,6 +128,7 @@ Stian Andreassen Laurence Tratt Wanja Saatkamp + Ivan Sichmann Freitas Gerald Klix Mike Blume Oscar Nierstrasz @@ -212,7 +213,9 @@ Alejandro J. Cura Jacob Oscarson Travis Francis Athougies + Ryan Gonzalez Kristjan Valur Jonsson + Sebastian Pawluś Neil Blakey-Milner anatoly techtonik Lutz Paelike @@ -245,6 +248,7 @@ Michael Hudson-Doyle Anders Sigfridsson Yasir Suhail + rafalgalczynski at gmail.com Floris Bruynooghe Laurens Van Houtven Akira Li @@ -274,6 +278,8 @@ Zooko Wilcox-O Hearn Tomer Chachamu Christopher Groskopf + Asmo Soinio + Stefan Marr jiaaro opassembler.py Antony Lee diff --git a/lib-python/2.7/imputil.py b/lib-python/2.7/imputil.py --- a/lib-python/2.7/imputil.py +++ b/lib-python/2.7/imputil.py @@ -422,7 +422,8 @@ saved back to the filesystem for future imports. The source file's modification timestamp must be provided as a Long value. """ - codestring = open(pathname, 'rU').read() + with open(pathname, 'rU') as fp: + codestring = fp.read() if codestring and codestring[-1] != '\n': codestring = codestring + '\n' code = __builtin__.compile(codestring, pathname, 'exec') @@ -603,8 +604,8 @@ self.desc = desc def import_file(self, filename, finfo, fqname): - fp = open(filename, self.desc[1]) - module = imp.load_module(fqname, fp, filename, self.desc) + with open(filename, self.desc[1]) as fp: + module = imp.load_module(fqname, fp, filename, self.desc) module.__file__ = filename return 0, module, { } diff --git a/lib-python/2.7/modulefinder.py b/lib-python/2.7/modulefinder.py --- a/lib-python/2.7/modulefinder.py +++ b/lib-python/2.7/modulefinder.py @@ -109,16 +109,16 @@ def run_script(self, pathname): self.msg(2, "run_script", pathname) - fp = open(pathname, READ_MODE) - stuff = ("", "r", imp.PY_SOURCE) - self.load_module('__main__', fp, pathname, stuff) + with open(pathname, READ_MODE) as fp: + stuff = ("", "r", imp.PY_SOURCE) + self.load_module('__main__', fp, pathname, stuff) def load_file(self, pathname): dir, name = os.path.split(pathname) name, ext = os.path.splitext(name) - fp = open(pathname, READ_MODE) - stuff = (ext, "r", imp.PY_SOURCE) - self.load_module(name, fp, pathname, stuff) + with open(pathname, READ_MODE) as fp: + stuff = (ext, "r", imp.PY_SOURCE) + self.load_module(name, fp, pathname, stuff) def import_hook(self, name, caller=None, fromlist=None, level=-1): self.msg(3, "import_hook", name, caller, fromlist, level) @@ -461,6 +461,8 @@ fp, buf, stuff = self.find_module("__init__", m.__path__) self.load_module(fqname, fp, buf, stuff) self.msgout(2, "load_package ->", m) + if fp: + fp.close() return m def add_module(self, fqname): diff --git a/lib-python/2.7/test/test_argparse.py b/lib-python/2.7/test/test_argparse.py --- a/lib-python/2.7/test/test_argparse.py +++ b/lib-python/2.7/test/test_argparse.py @@ -48,6 +48,9 @@ def tearDown(self): os.chdir(self.old_dir) + import gc + # Force a collection which should close FileType() options + gc.collect() for root, dirs, files in os.walk(self.temp_dir, topdown=False): for name in files: os.chmod(os.path.join(self.temp_dir, name), stat.S_IWRITE) diff --git a/lib_pypy/_tkinter/license.terms b/lib_pypy/_tkinter/license.terms new file mode 100644 --- /dev/null +++ b/lib_pypy/_tkinter/license.terms @@ -0,0 +1,39 @@ +This software is copyrighted by the Regents of the University of +California, Sun Microsystems, Inc., and other parties. The following +terms apply to all files associated with the software unless explicitly +disclaimed in individual files. + +The authors hereby grant permission to use, copy, modify, distribute, +and license this software and its documentation for any purpose, provided +that existing copyright notices are retained in all copies and that this +notice is included verbatim in any distributions. No written agreement, +license, or royalty fee is required for any of the authorized uses. +Modifications to this software may be copyrighted by their authors +and need not follow the licensing terms described here, provided that +the new terms are clearly indicated on the first page of each file where +they apply. + +IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY +FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY +DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE +IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE +NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR +MODIFICATIONS. + +GOVERNMENT USE: If you are acquiring this software on behalf of the +U.S. government, the Government shall have only "Restricted Rights" +in the software and related documentation as defined in the Federal +Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you +are acquiring the software on behalf of the Department of Defense, the +software shall be classified as "Commercial Computer Software" and the +Government shall have only "Restricted Rights" as defined in Clause +252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the +authors grant the U.S. Government and others acting in its behalf +permission to use and distribute the software in accordance with the +terms specified in this license. diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst --- a/pypy/doc/contributor.rst +++ b/pypy/doc/contributor.rst @@ -99,6 +99,7 @@ Stian Andreassen Laurence Tratt Wanja Saatkamp + Ivan Sichmann Freitas Gerald Klix Mike Blume Oscar Nierstrasz @@ -183,7 +184,9 @@ Alejandro J. Cura Jacob Oscarson Travis Francis Athougies + Ryan Gonzalez Kristjan Valur Jonsson + Sebastian Pawluś Neil Blakey-Milner anatoly techtonik Lutz Paelike @@ -216,6 +219,7 @@ Michael Hudson-Doyle Anders Sigfridsson Yasir Suhail + rafalgalczynski at gmail.com Floris Bruynooghe Laurens Van Houtven Akira Li @@ -245,6 +249,8 @@ Zooko Wilcox-O Hearn Tomer Chachamu Christopher Groskopf + Asmo Soinio + Stefan Marr jiaaro opassembler.py Antony Lee diff --git a/pypy/doc/index-of-release-notes.rst b/pypy/doc/index-of-release-notes.rst --- a/pypy/doc/index-of-release-notes.rst +++ b/pypy/doc/index-of-release-notes.rst @@ -6,6 +6,7 @@ .. toctree:: + release-2.3.1.rst release-2.3.0.rst release-2.2.1.rst release-2.2.0.rst diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst --- a/pypy/doc/index.rst +++ b/pypy/doc/index.rst @@ -40,7 +40,7 @@ * `FAQ`_: some frequently asked questions. -* `Release 2.3.0`_: the latest official release +* `Release 2.3.1`_: the latest official release * `PyPy Blog`_: news and status info about PyPy @@ -110,7 +110,7 @@ .. _`Getting Started`: getting-started.html .. _`Papers`: extradoc.html .. _`Videos`: video-index.html -.. _`Release 2.3.0`: http://pypy.org/download.html +.. _`Release 2.3.1`: http://pypy.org/download.html .. _`speed.pypy.org`: http://speed.pypy.org .. _`RPython toolchain`: translation.html .. _`potential project ideas`: project-ideas.html diff --git a/pypy/doc/release-2.3.1.rst b/pypy/doc/release-2.3.1.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/release-2.3.1.rst @@ -0,0 +1,81 @@ +================================================= +PyPy 2.3.1 - Terrestrial Arthropod Trap Revisited +================================================= + +We're pleased to announce PyPy 2.3.1, a feature-and-bugfix improvement over our +recent release last month. + +This release contains several bugfixes and enhancements. + +You can download the PyPy 2.3.1 release here: + + http://pypy.org/download.html + +We would like to thank our donors for the continued support of the PyPy +project, and for those who donate to our three sub-projects. +We've shown quite a bit of progress +but we're slowly running out of funds. +Please consider donating more, or even better convince your employer to donate, +so we can finish those projects! The three sub-projects are: + +* `Py3k`_ (supporting Python 3.x): the release PyPy3 2.3 is imminent. + +* `STM`_ (software transactional memory): a preview will be released very soon, + once we fix a few bugs + +* `NumPy`_ which requires installation of our fork of upstream numpy, available `on bitbucket`_ + +.. _`Py3k`: http://pypy.org/py3donate.html +.. _`STM`: http://pypy.org/tmdonate2.html +.. _`NumPy`: http://pypy.org/numpydonate.html +.. _`on bitbucket`: https://www.bitbucket.org/pypy/numpy + +What is PyPy? +============= + +PyPy is a very compliant Python interpreter, almost a drop-in replacement for +CPython 2.7. It's fast (`pypy 2.3 and cpython 2.7.x`_ performance comparison; +note that cpython's speed has not changed since 2.7.2) +due to its integrated tracing JIT compiler. + +This release supports x86 machines running Linux 32/64, Mac OS X 64, Windows, +and OpenBSD, +as well as newer ARM hardware (ARMv6 or ARMv7, with VFPv3) running Linux. + +While we support 32 bit python on Windows, work on the native Windows 64 +bit python is still stalling, we would welcome a volunteer +to `handle that`_. + +.. _`pypy 2.3 and cpython 2.7.x`: http://speed.pypy.org +.. _`handle that`: http://doc.pypy.org/en/latest/windows.html#what-is-missing-for-a-full-64-bit-translation + +Highlights +========== + +Issues with the 2.3 release were resolved after being reported by users to +our new issue tracker at https://bitbucket.org/pypy/pypy/issues or on IRC at +#pypy. Here is a summary of the user-facing changes; +for more information see `whats-new`_: + +* The built-in ``struct`` module was renamed to ``_struct``, solving issues + with IDLE and other modules. + +* Support for compilation with gcc-4.9 + +* A rewrite of packaging.py which produces our downloadable packages to + modernize command line argument handling and to document third-party + contributions in our LICENSE file + +* A CFFI-based version of the gdbm module is now included in our downloads + +* Many issues were resolved_ since the 2.3 release on May 8 + +.. _`whats-new`: http://doc.pypy.org/en/latest/whatsnew-2.3.1.html +.. _resolved: https://bitbucket.org/pypy/pypy/issues?status=resolved +Please try it out and let us know what you think. We especially welcome +success stories, we know you are using PyPy, please tell us about it! + +Cheers + +The PyPy Team + diff --git a/pypy/doc/whatsnew-2.3.1.rst b/pypy/doc/whatsnew-2.3.1.rst --- a/pypy/doc/whatsnew-2.3.1.rst +++ b/pypy/doc/whatsnew-2.3.1.rst @@ -9,5 +9,16 @@ Support compilation with gcc-4.9 -Fixes for issues #1769, #1764, #1762, #1752 +Added support for the stdlib gdbm module via cffi +Annotator cleanups + +.. branch: release-2.3.x + +.. branch: unify-call-ops + +.. branch packaging +Use argparse for packaging.py, and add third-party components to LICENSE file. +Also mention that gdbm is GPL. +Do not crash the packaging process on failure in CFFI or license-building, +rather complete the build step and return -1. diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -3,11 +3,6 @@ ======================= .. this is a revision shortly after release-2.3.x -.. startrev: b2cc67adbaad +.. startrev: 87fdc76bccb4 -Added support for the stdlib gdbm module via cffi -Annotator cleanups - -.. branch: release-2.3.x - diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -10,8 +10,14 @@ 64bit Windows. See at the end of this page for what is missing for a full 64bit translation. -To build pypy-c you need a C compiler. Microsoft Visual Studio is -preferred, but can also use the mingw32 port of gcc. +To build pypy-c you need a working python environment, and a C compiler. +It is possible to translate with a CPython 2.6 or later, but this is not +the preferred way, because it will take a lot longer to run – depending +on your architecture, between two and three times as long. So head to +`our downloads`_ and get the latest stable version. + +Microsoft Visual Studio is preferred as a compiler, but there are reports +of success with the mingw32 port of gcc. Translating PyPy with Visual Studio @@ -34,10 +40,20 @@ **Note:** PyPy is currently not supported for 64 bit Windows, and translation will fail in this case. -The compiler is all you need to build pypy-c, but it will miss some +Python and a C compiler are all you need to build pypy, but it will miss some modules that relies on third-party libraries. See below how to get and build them. +Please see the `non-windows instructions`_ for more information, especially note +that translation is RAM-hungry. A standard translation requires around 4GB, so +special preparations are necessary, or you may want to use the method in the +notes of the `build instructions`_ to reduce memory usage at the price of a +slower translation:: + + set PYPY_GC_MAX_DELTA=200MB + pypy --jit loop_longevity=300 ../../rpython/bin/rpython -Ojit targetpypystandalone + set PYPY_GC_MAX_DELTA= + Preping Windows for the Large Build ----------------------------------- @@ -52,9 +68,10 @@ Then you need to execute:: - editbin /largeaddressaware pypy.exe + editbin /largeaddressaware translator.exe -on the pypy.exe file you compiled. +where ``translator.exe`` is the pypy.exe or cpython.exe you will use to +translate with. Installing external packages ---------------------------- @@ -244,7 +261,9 @@ .. _`msys for mingw`: http://sourceforge.net/projects/mingw-w64/files/External%20binary%20packages%20%28Win64%20hosted%29/MSYS%20%2832-bit%29 .. _`libffi source files`: http://sourceware.org/libffi/ .. _`RPython translation toolchain`: translation.html - +.. _`our downloads`: http://pypy.org/download.html +.. _`non-windows instructions`: getting-started-python.html#translating-the-pypy-python-interpreter +.. _`build instructions`: http://pypy.org/download.html#building-from-source What is missing for a full 64-bit translation --------------------------------------------- diff --git a/pypy/module/__builtin__/app_io.py b/pypy/module/__builtin__/app_io.py --- a/pypy/module/__builtin__/app_io.py +++ b/pypy/module/__builtin__/app_io.py @@ -4,6 +4,7 @@ """ import sys +from _ast import PyCF_ACCEPT_NULL_BYTES def execfile(filename, glob=None, loc=None): """execfile(filename[, globals[, locals]]) @@ -24,7 +25,8 @@ finally: f.close() #Don't exec the source directly, as this loses the filename info - co = compile(source.rstrip()+"\n", filename, 'exec') + co = compile(source.rstrip()+"\n", filename, 'exec', + PyCF_ACCEPT_NULL_BYTES) exec co in glob, loc def _write_prompt(stdout, prompt): diff --git a/pypy/module/__builtin__/test/test_builtin.py b/pypy/module/__builtin__/test/test_builtin.py --- a/pypy/module/__builtin__/test/test_builtin.py +++ b/pypy/module/__builtin__/test/test_builtin.py @@ -1,7 +1,10 @@ import sys +from rpython.tool.udir import udir + class AppTestBuiltinApp: def setup_class(cls): + space = cls.space class X(object): def __eq__(self, other): raise OverflowError @@ -11,18 +14,25 @@ try: d[X()] except OverflowError: - cls.w_sane_lookup = cls.space.wrap(True) + cls.w_sane_lookup = space.wrap(True) except KeyError: - cls.w_sane_lookup = cls.space.wrap(False) + cls.w_sane_lookup = space.wrap(False) # starting with CPython 2.6, when the stack is almost out, we # can get a random error, instead of just a RuntimeError. # For example if an object x has a __getattr__, we can get # AttributeError if attempting to call x.__getattr__ runs out # of stack. That's annoying, so we just work around it. if cls.runappdirect: - cls.w_safe_runtimerror = cls.space.wrap(True) + cls.w_safe_runtimerror = space.wrap(True) else: - cls.w_safe_runtimerror = cls.space.wrap(sys.version_info < (2, 6)) + cls.w_safe_runtimerror = space.wrap(sys.version_info < (2, 6)) + + emptyfile = udir.join('emptyfile.py') + emptyfile.write('') + nullbytes = udir.join('nullbytes.py') + nullbytes.write('#abc\x00def\n') + cls.w_emptyfile = space.wrap(str(emptyfile)) + cls.w_nullbytes = space.wrap(str(nullbytes)) def test_builtin_names(self): import __builtin__ @@ -431,7 +441,7 @@ assert setattr(x, 'x', 11) == None assert delattr(x, 'x') == None # To make this test, we need autopath to work in application space. - #self.assertEquals(execfile('emptyfile.py'), None) + assert execfile(self.emptyfile) == None def test_divmod(self): assert divmod(15,10) ==(1,5) @@ -611,14 +621,21 @@ assert firstlineno == 2 def test_compile_null_bytes(self): - import _ast raises(TypeError, compile, '\x00', 'mymod', 'exec', 0) - raises(SyntaxError, compile, '\x00', 'mymod', 'exec', - _ast.PyCF_ACCEPT_NULL_BYTES) src = "#abc\x00def\n" raises(TypeError, compile, src, 'mymod', 'exec') raises(TypeError, compile, src, 'mymod', 'exec', 0) - compile(src, 'mymod', 'exec', _ast.PyCF_ACCEPT_NULL_BYTES) # works + execfile(self.nullbytes) # works + + def test_compile_null_bytes_flag(self): + try: + from _ast import PyCF_ACCEPT_NULL_BYTES + except ImportError: + skip('PyPy only (requires _ast.PyCF_ACCEPT_NULL_BYTES)') + raises(SyntaxError, compile, '\x00', 'mymod', 'exec', + PyCF_ACCEPT_NULL_BYTES) + src = "#abc\x00def\n" + compile(src, 'mymod', 'exec', PyCF_ACCEPT_NULL_BYTES) # works def test_print_function(self): import __builtin__ @@ -720,7 +737,6 @@ class TestInternal: def test_execfile(self, space): - from rpython.tool.udir import udir fn = str(udir.join('test_execfile')) f = open(fn, 'w') print >>f, "i=42" diff --git a/pypy/module/cpyext/pystate.py b/pypy/module/cpyext/pystate.py --- a/pypy/module/cpyext/pystate.py +++ b/pypy/module/cpyext/pystate.py @@ -208,6 +208,9 @@ @cpython_api([], PyGILState_STATE, error=CANNOT_FAIL) def PyGILState_Ensure(space): + # XXX XXX XXX THIS IS A VERY MINIMAL IMPLEMENTATION THAT WILL HAPPILY + # DEADLOCK IF CALLED TWICE ON THE SAME THREAD, OR CRASH IF CALLED IN A + # NEW THREAD. We should very carefully follow what CPython does instead. if rffi.aroundstate.after: # After external call is before entering Python rffi.aroundstate.after() @@ -215,6 +218,7 @@ @cpython_api([PyGILState_STATE], lltype.Void) def PyGILState_Release(space, state): + # XXX XXX XXX We should very carefully follow what CPython does instead. if rffi.aroundstate.before: # Before external call is after running Python rffi.aroundstate.before() diff --git a/pypy/module/imp/importing.py b/pypy/module/imp/importing.py --- a/pypy/module/imp/importing.py +++ b/pypy/module/imp/importing.py @@ -748,11 +748,11 @@ self.lockcounter = 0 def lock_held_by_someone_else(self): - return self.lockowner is not None and not self.lock_held() + me = self.space.getexecutioncontext() # used as thread ident + return self.lockowner is not None and self.lockowner is not me - def lock_held(self): - me = self.space.getexecutioncontext() # used as thread ident - return self.lockowner is me + def lock_held_by_anyone(self): + return self.lockowner is not None def acquire_lock(self): # this function runs with the GIL acquired so there is no race diff --git a/pypy/module/imp/interp_imp.py b/pypy/module/imp/interp_imp.py --- a/pypy/module/imp/interp_imp.py +++ b/pypy/module/imp/interp_imp.py @@ -165,7 +165,7 @@ def lock_held(space): if space.config.objspace.usemodules.thread: - return space.wrap(importing.getimportlock(space).lock_held()) + return space.wrap(importing.getimportlock(space).lock_held_by_anyone()) else: return space.w_False diff --git a/pypy/module/micronumpy/__init__.py b/pypy/module/micronumpy/__init__.py --- a/pypy/module/micronumpy/__init__.py +++ b/pypy/module/micronumpy/__init__.py @@ -15,6 +15,7 @@ 'empty': 'ctors.zeros', 'empty_like': 'ctors.empty_like', 'fromstring': 'ctors.fromstring', + 'frombuffer': 'ctors.frombuffer', 'concatenate': 'arrayops.concatenate', 'count_nonzero': 'arrayops.count_nonzero', diff --git a/pypy/module/micronumpy/ctors.py b/pypy/module/micronumpy/ctors.py --- a/pypy/module/micronumpy/ctors.py +++ b/pypy/module/micronumpy/ctors.py @@ -1,5 +1,6 @@ from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import unwrap_spec, WrappedDefault +from rpython.rlib.buffer import SubBuffer from rpython.rlib.rstring import strip_spaces from rpython.rtyper.lltypesystem import lltype, rffi from pypy.module.micronumpy import descriptor, loop @@ -192,3 +193,62 @@ return _fromstring_bin(space, s, count, length, dtype) else: return _fromstring_text(space, s, count, sep, length, dtype) + + +def _getbuffer(space, w_buffer): + try: + return space.writebuf_w(w_buffer) + except OperationError as e: + if not e.match(space, space.w_TypeError): + raise + return space.readbuf_w(w_buffer) + + + at unwrap_spec(count=int, offset=int) +def frombuffer(space, w_buffer, w_dtype=None, count=-1, offset=0): + dtype = space.interp_w(descriptor.W_Dtype, + space.call_function(space.gettypefor(descriptor.W_Dtype), w_dtype)) + if dtype.elsize == 0: + raise oefmt(space.w_ValueError, "itemsize cannot be zero in type") + + try: + buf = _getbuffer(space, w_buffer) + except OperationError as e: + if not e.match(space, space.w_TypeError): + raise + w_buffer = space.getattr(w_buffer, space.wrap('__buffer__')) + buf = _getbuffer(space, w_buffer) + + ts = buf.getlength() + if offset < 0 or offset > ts: + raise oefmt(space.w_ValueError, + "offset must be non-negative and no greater than " + "buffer length (%d)", ts) + + s = ts - offset + if offset: + buf = SubBuffer(buf, offset, s) + + n = count + itemsize = dtype.elsize + assert itemsize > 0 + if n < 0: + if s % itemsize != 0: + raise oefmt(space.w_ValueError, + "buffer size must be a multiple of element size") + n = s / itemsize + else: + if s < n * itemsize: + raise oefmt(space.w_ValueError, + "buffer is smaller than requested size") + + try: + storage = buf.get_raw_address() + except ValueError: + a = W_NDimArray.from_shape(space, [n], dtype=dtype) + loop.fromstring_loop(space, a, dtype, itemsize, buf.as_str()) + return a + else: + writable = not buf.readonly + return W_NDimArray.from_shape_and_storage(space, [n], storage, dtype=dtype, + w_base=w_buffer, writable=writable) diff --git a/pypy/module/micronumpy/test/test_ndarray.py b/pypy/module/micronumpy/test/test_ndarray.py --- a/pypy/module/micronumpy/test/test_ndarray.py +++ b/pypy/module/micronumpy/test/test_ndarray.py @@ -3132,6 +3132,8 @@ class AppTestSupport(BaseNumpyAppTest): + spaceconfig = {'usemodules': ['micronumpy', 'array']} + def setup_class(cls): import struct BaseNumpyAppTest.setup_class.im_func(cls) @@ -3142,6 +3144,44 @@ cls.w_float64val = cls.space.wrap(struct.pack('d', 300.4)) cls.w_ulongval = cls.space.wrap(struct.pack('L', 12)) + def test_frombuffer(self): + import numpy as np + exc = raises(AttributeError, np.frombuffer, None) + assert str(exc.value) == "'NoneType' object has no attribute '__buffer__'" + exc = raises(AttributeError, np.frombuffer, memoryview(self.data)) + assert str(exc.value) == "'memoryview' object has no attribute '__buffer__'" + exc = raises(ValueError, np.frombuffer, self.data, 'S0') + assert str(exc.value) == "itemsize cannot be zero in type" + exc = raises(ValueError, np.frombuffer, self.data, offset=-1) + assert str(exc.value) == "offset must be non-negative and no greater than buffer length (32)" + exc = raises(ValueError, np.frombuffer, self.data, count=100) + assert str(exc.value) == "buffer is smaller than requested size" + for data in [self.data, buffer(self.data)]: + a = np.frombuffer(data) + for i in range(4): + assert a[i] == i + 1 + + import array + data = array.array('c', 'testing') + a = np.frombuffer(data, 'c') + assert a.base is data + a[2] = 'Z' + assert data.tostring() == 'teZting' + + data = buffer(data) + a = np.frombuffer(data, 'c') + assert a.base is data + exc = raises(ValueError, "a[2] = 'Z'") + assert str(exc.value) == "assignment destination is read-only" + + class A(object): + __buffer__ = 'abc' + + data = A() + a = np.frombuffer(data, 'c') + #assert a.base is data.__buffer__ + assert a.tostring() == 'abc' + def test_fromstring(self): import sys from numpypy import fromstring, dtype diff --git a/pypy/module/sys/__init__.py b/pypy/module/sys/__init__.py --- a/pypy/module/sys/__init__.py +++ b/pypy/module/sys/__init__.py @@ -13,7 +13,7 @@ """NOT_RPYTHON""" # because parent __init__ isn't if space.config.translating: del self.__class__.interpleveldefs['pypy_getudir'] - super(Module, self).__init__(space, w_name) + super(Module, self).__init__(space, w_name) self.recursionlimit = 100 self.w_default_encoder = None self.defaultencoding = "ascii" @@ -21,13 +21,13 @@ self.debug = True interpleveldefs = { - '__name__' : '(space.wrap("sys"))', - '__doc__' : '(space.wrap("PyPy sys module"))', + '__name__' : '(space.wrap("sys"))', + '__doc__' : '(space.wrap("PyPy sys module"))', - 'platform' : 'space.wrap(sys.platform)', + 'platform' : 'space.wrap(sys.platform)', 'maxint' : 'space.wrap(sys.maxint)', 'maxsize' : 'space.wrap(sys.maxint)', - 'byteorder' : 'space.wrap(sys.byteorder)', + 'byteorder' : 'space.wrap(sys.byteorder)', 'maxunicode' : 'space.wrap(vm.MAXUNICODE)', 'stdin' : 'state.getio(space).w_stdin', '__stdin__' : 'state.getio(space).w_stdin', @@ -36,35 +36,35 @@ 'stderr' : 'state.getio(space).w_stderr', '__stderr__' : 'state.getio(space).w_stderr', 'pypy_objspaceclass' : 'space.wrap(repr(space))', - #'prefix' : # added by pypy_initial_path() when it + #'prefix' : # added by pypy_initial_path() when it #'exec_prefix' : # succeeds, pointing to trunk or /usr 'path' : 'state.get(space).w_path', - 'modules' : 'state.get(space).w_modules', + 'modules' : 'state.get(space).w_modules', 'argv' : 'state.get(space).w_argv', 'py3kwarning' : 'space.w_False', - 'warnoptions' : 'state.get(space).w_warnoptions', + 'warnoptions' : 'state.get(space).w_warnoptions', 'builtin_module_names' : 'space.w_None', 'pypy_getudir' : 'state.pypy_getudir', # not translated 'pypy_find_stdlib' : 'initpath.pypy_find_stdlib', 'pypy_find_executable' : 'initpath.pypy_find_executable', 'pypy_resolvedirof' : 'initpath.pypy_resolvedirof', - '_getframe' : 'vm._getframe', - '_current_frames' : 'currentframes._current_frames', - 'setrecursionlimit' : 'vm.setrecursionlimit', - 'getrecursionlimit' : 'vm.getrecursionlimit', - 'setcheckinterval' : 'vm.setcheckinterval', - 'getcheckinterval' : 'vm.getcheckinterval', - 'exc_info' : 'vm.exc_info', - 'exc_clear' : 'vm.exc_clear', + '_getframe' : 'vm._getframe', + '_current_frames' : 'currentframes._current_frames', + 'setrecursionlimit' : 'vm.setrecursionlimit', + 'getrecursionlimit' : 'vm.getrecursionlimit', + 'setcheckinterval' : 'vm.setcheckinterval', + 'getcheckinterval' : 'vm.getcheckinterval', + 'exc_info' : 'vm.exc_info', + 'exc_clear' : 'vm.exc_clear', 'settrace' : 'vm.settrace', 'gettrace' : 'vm.gettrace', 'setprofile' : 'vm.setprofile', 'getprofile' : 'vm.getprofile', 'call_tracing' : 'vm.call_tracing', 'getsizeof' : 'vm.getsizeof', - - 'executable' : 'space.wrap("py.py")', + + 'executable' : 'space.wrap("py.py")', 'api_version' : 'version.get_api_version(space)', 'version_info' : 'version.get_version_info(space)', 'version' : 'version.get_version(space)', @@ -73,14 +73,14 @@ '_mercurial' : 'version.get_repo_info(space)', 'hexversion' : 'version.get_hexversion(space)', - 'displayhook' : 'hook.displayhook', - '__displayhook__' : 'hook.__displayhook__', + 'displayhook' : 'hook.displayhook', + '__displayhook__' : 'hook.__displayhook__', 'meta_path' : 'space.wrap([])', 'path_hooks' : 'space.wrap([])', 'path_importer_cache' : 'space.wrap({})', 'dont_write_bytecode' : 'space.w_False', - - 'getdefaultencoding' : 'interp_encoding.getdefaultencoding', + + 'getdefaultencoding' : 'interp_encoding.getdefaultencoding', 'setdefaultencoding' : 'interp_encoding.setdefaultencoding', 'getfilesystemencoding' : 'interp_encoding.getfilesystemencoding', @@ -119,21 +119,21 @@ w_modules = self.get('modules') try: return space.getitem(w_modules, space.wrap(name)) - except OperationError, e: - if not e.match(space, space.w_KeyError): - raise - return None + except OperationError, e: + if not e.match(space, space.w_KeyError): + raise + return None - def setmodule(self, w_module): + def setmodule(self, w_module): space = self.space w_name = self.space.getattr(w_module, space.wrap('__name__')) w_modules = self.get('modules') self.space.setitem(w_modules, w_name, w_module) def getdictvalue(self, space, attr): - """ specialize access to dynamic exc_* attributes. """ - value = MixedModule.getdictvalue(self, space, attr) - if value is not None: + """ specialize access to dynamic exc_* attributes. """ + value = MixedModule.getdictvalue(self, space, attr) + if value is not None: return value if attr == 'exc_type': operror = space.getexecutioncontext().sys_exc_info() @@ -153,7 +153,7 @@ return space.w_None else: return space.wrap(operror.get_traceback()) - return None + return None def get_w_default_encoder(self): if self.w_default_encoder is not None: diff --git a/pypy/module/thread/test/test_import_lock.py b/pypy/module/thread/test/test_import_lock.py --- a/pypy/module/thread/test/test_import_lock.py +++ b/pypy/module/thread/test/test_import_lock.py @@ -62,6 +62,28 @@ self.waitfor(lambda: done) assert done + def test_lock_held_by_another_thread(self): + import thread, imp + lock_held = thread.allocate_lock() + test_complete = thread.allocate_lock() + lock_released = thread.allocate_lock() + def other_thread(): + imp.acquire_lock() # 3 + assert imp.lock_held() + lock_held.release() # 4 + test_complete.acquire() # 7 + imp.release_lock() # 8 + lock_released.release() # 9 + lock_held.acquire() + test_complete.acquire() + lock_released.acquire() + # + thread.start_new_thread(other_thread, ()) # 1 + lock_held.acquire() # 2 + assert imp.lock_held() # 5 + test_complete.release() # 6 + lock_released.acquire() # 10 + class TestImportLock: def test_lock(self, space, monkeypatch): from pypy.module.imp.importing import getimportlock, importhook diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py --- a/pypy/tool/release/package.py +++ b/pypy/tool/release/package.py @@ -1,23 +1,25 @@ #!/usr/bin/env python -""" A sample script that packages PyPy, provided that it's already built. +""" packages PyPy, provided that it's already built. It uses 'pypy/goal/pypy-c' and parts of the rest of the working copy. Usage: - package.py [--nostrip] [--without-tk] root-pypy-dir [name-of-archive] [name-of-pypy-c] [destination-for-tarball] [pypy-c-path] + package.py [--options] -Usually you would do: package.py ../../.. pypy-VER-PLATFORM -The output is found in the directory /tmp/usession-YOURNAME/build/. +Usually you would do: package.py --version-name pypy-VER-PLATFORM +The output is found in the directory from --builddir, +by default /tmp/usession-YOURNAME/build/. """ import shutil import sys import os #Add toplevel repository dir to sys.path -sys.path.insert(0,os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) +basedir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) +sys.path.insert(0,basedir) import py import fnmatch -from rpython.tool.udir import udir import subprocess +import glob if sys.version_info < (2,6): py.test.skip("requires 2.6 so far") @@ -40,17 +42,119 @@ class PyPyCNotFound(Exception): pass -def fix_permissions(basedir): +class MissingDependenciesError(Exception): + pass + +def fix_permissions(dirname): if sys.platform != 'win32': - os.system("chmod -R a+rX %s" % basedir) - os.system("chmod -R g-w %s" % basedir) + os.system("chmod -R a+rX %s" % dirname) + os.system("chmod -R g-w %s" % dirname) -def package(basedir, name='pypy-nightly', rename_pypy_c='pypy', - copy_to_dir=None, override_pypy_c=None, nostrip=False, - withouttk=False): - assert '/' not in rename_pypy_c +sep_template = "\nThis copy of PyPy includes a copy of %s, which is licensed under the following terms:\n\n" + +def generate_license_linux(basedir, options): + base_file = str(basedir.join('LICENSE')) + with open(base_file) as fid: + txt = fid.read() + searches = [("bzip2","libbz2-*", "copyright", '---------'), + ("openssl", "openssl*", "copyright", 'LICENSE ISSUES'), + ] + if not options.no_tk: + name = 'Tcl/Tk' + txt += "License for '%s'" %name + txt += '\n' + "="*(14 + len(name)) + '\n' + txt += sep_template % name + base_file = str(basedir.join('lib_pypy/_tkinter/license.terms')) + with open(base_file, 'r') as fid: + txt += fid.read() + for name, pat, fname, first_line in searches: + txt += "License for '" + name + "'" + txt += '\n' + "="*(14 + len(name)) + '\n' + txt += sep_template % name + dirs = glob.glob(options.license_base + "/" +pat) + if not dirs: + raise ValueError, "Could not find "+ options.license_base + "/" + pat + if len(dirs) > 2: + raise ValueError, "Multiple copies of "+pat + dir = dirs[0] + with open(os.path.join(dir, fname)) as fid: + # Read up to the line dividing the packaging header from the actual copyright + for line in fid: + if first_line in line: + break + txt += line + for line in fid: + txt += line + if len(line.strip())<1: + txt += '\n' + txt += third_party_header + # Do something for gdbm, which is GPL + txt += gdbm_bit + return txt + +def generate_license_windows(basedir, options): + base_file = str(basedir.join('LICENSE')) + with open(base_file) as fid: + txt = fid.read() + # shutil.copyfileobj(open("crtlicense.txt"), out) # We do not ship msvc runtime files + if not options.no_tk: + name = 'Tcl/Tk' + txt += "License for '%s'" %name + txt += '\n' + "="*(14 + len(name)) + '\n' + txt += sep_template % name + base_file = str(basedir.join('lib_pypy/_tkinter/license.terms')) + with open(base_file, 'r') as fid: + txt += fid.read() + for name, pat, file in (("bzip2","bzip2-*", "LICENSE"), + ("openssl", "openssl-*", "LICENSE")): + txt += sep_template % name + dirs = glob.glob(options.license_base + "/" +pat) + if not dirs: + raise ValueError, "Could not find "+ options.license_base + "/" + pat + if len(dirs) > 2: + raise ValueError, "Multiple copies of "+pat + dir = dirs[0] + with open(os.path.join(dir, file)) as fid: + txt += fid.read() + return txt + +def generate_license_darwin(basedir, options): + # where are copyright files on macos? + return generate_license_linux(basedir, options) + +if sys.platform == 'win32': + generate_license = generate_license_windows +elif sys.platform == 'darwin': + generate_license = generate_license_darwin +else: + generate_license = generate_license_linux + +def create_cffi_import_libraries(pypy_c, options): + modules = ['_sqlite3'] + subprocess.check_call([str(pypy_c), '-c', 'import _sqlite3']) + if not sys.platform == 'win32': + modules += ['_curses', 'syslog', 'gdbm', '_sqlite3'] + if not options.no_tk: + modules.append(('_tkinter')) + for module in modules: + try: + subprocess.check_call([str(pypy_c), '-c', 'import ' + module]) + except subprocess.CalledProcessError: + print >>sys.stderr, """Building {0} bindings failed. +You can either install development headers package or +add --without-{0} option to skip packaging binary CFFI extension.""".format(module) + raise MissingDependenciesError(module) + +def create_package(basedir, options): + retval = 0 + name = options.name + if not name: + name = 'pypy-nightly' + rename_pypy_c = options.pypy_c + override_pypy_c = options.override_pypy_c + basedir = py.path.local(basedir) - if override_pypy_c is None: + if not override_pypy_c: basename = 'pypy-c' if sys.platform == 'win32': basename += '.exe' @@ -68,28 +172,18 @@ raise PyPyCNotFound( 'Bogus path: %r does not exist (see docstring for more info)' % (os.path.dirname(str(pypy_c)),)) - win_extras = ['libpypy-c.dll', 'libexpat.dll', 'sqlite3.dll', - 'libeay32.dll', 'ssleay32.dll'] - subprocess.check_call([str(pypy_c), '-c', 'import _sqlite3']) - if not sys.platform == 'win32': - subprocess.check_call([str(pypy_c), '-c', 'import _curses']) - subprocess.check_call([str(pypy_c), '-c', 'import syslog']) - subprocess.check_call([str(pypy_c), '-c', 'import gdbm']) - if not withouttk: + if not options.no_cffi: try: - subprocess.check_call([str(pypy_c), '-c', 'import _tkinter']) - except subprocess.CalledProcessError: - print >>sys.stderr, """Building Tk bindings failed. -You can either install Tk development headers package or -add --without-tk option to skip packaging binary CFFI extension.""" - sys.exit(1) - #Can the dependencies be found from cffi somehow? - win_extras += ['tcl85.dll', 'tk85.dll'] + create_cffi_import_libraries(pypy_c, options) + except MissingDependenciesError: + # This is a non-fatal error + retval = -1 + if sys.platform == 'win32' and not rename_pypy_c.lower().endswith('.exe'): rename_pypy_c += '.exe' binaries = [(pypy_c, rename_pypy_c)] # - builddir = udir.ensure("build", dir=True) + builddir = options.builddir pypydir = builddir.ensure(name, dir=True) includedir = basedir.join('include') # Recursively copy all headers, shutil has only ignore @@ -102,10 +196,11 @@ pypydir.ensure('include', dir=True) if sys.platform == 'win32': - #Don't include a mscvrXX.dll, users should get their own. - #Instructions are provided on the website. - # Can't rename a DLL: it is always called 'libpypy-c.dll' + win_extras = ['libpypy-c.dll', 'libexpat.dll', 'sqlite3.dll', + 'libeay32.dll', 'ssleay32.dll'] + if not options.no_tk: + win_extras += ['tcl85.dll', 'tk85.dll'] for extra in win_extras: p = pypy_c.dirpath().join(extra) @@ -116,7 +211,7 @@ continue print "Picking %s" % p binaries.append((p, p.basename)) - importlib_name = 'python27.lib' + importlib_name = 'python27.lib' if pypy_c.dirpath().join(importlib_name).check(): shutil.copyfile(str(pypy_c.dirpath().join(importlib_name)), str(pypydir.join('include/python27.lib'))) @@ -127,7 +222,7 @@ # XXX users will complain that they cannot compile cpyext # modules for windows, has the lib moved or are there no # exported functions in the dll so no import library is created? - if not withouttk: + if not options.no_tk: try: p = pypy_c.dirpath().join('tcl85.dll') if not p.check(): @@ -139,7 +234,7 @@ tk85.dll and tcl85.dll found, expecting to find runtime in ..\\lib directory next to the dlls, as per build instructions.""" import traceback;traceback.print_exc() - sys.exit(1) + raise MissingDependenciesError('Tk runtime') # Careful: to copy lib_pypy, copying just the hg-tracked files # would not be enough: there are also ctypes_config_cache/_*_cache.py. @@ -150,11 +245,24 @@ str(pypydir.join('lib_pypy')), ignore=ignore_patterns('.svn', 'py', '*.pyc', '*~', '*.c', '*.o')) - for file in ['LICENSE', 'README.rst']: + for file in ['README.rst',]: shutil.copy(str(basedir.join(file)), str(pypydir)) for file in ['_testcapimodule.c', '_ctypes_test.c']: - shutil.copyfile(str(basedir.join('lib_pypy', file)), + shutil.copyfile(str(basedir.join('lib_pypy', file)), str(pypydir.join('lib_pypy', file))) + try: + license = generate_license(basedir, options) + with open(str(pypydir.join('LICENSE')), 'w') as LICENSE: + LICENSE.write(license) + except: + # Non-fatal error, use original LICENCE file + import traceback;traceback.print_exc() + base_file = str(basedir.join('LICENSE')) + with open(base_file) as fid: + license = fid.read() + with open(str(pypydir.join('LICENSE')), 'w') as LICENSE: + LICENSE.write(license) + retval = -1 # spdir = pypydir.ensure('site-packages', dir=True) shutil.copy(str(basedir.join('site-packages', 'README')), str(spdir)) @@ -167,17 +275,17 @@ for source, target in binaries: archive = bindir.join(target) shutil.copy(str(source), str(archive)) + fix_permissions(builddir) + old_dir = os.getcwd() - fix_permissions(builddir) try: os.chdir(str(builddir)) - # - # 'strip' fun: see issue #587 - if not nostrip: + if not options.nostrip: for source, target in binaries: if sys.platform == 'win32': pass elif sys.platform == 'darwin': + # 'strip' fun: see issue #587 for why -x os.system("strip -x " + str(bindir.join(target))) # ignore errors else: os.system("strip " + str(bindir.join(target))) # ignore errors @@ -208,41 +316,91 @@ raise OSError('"tar" returned exit status %r' % e) finally: os.chdir(old_dir) - if copy_to_dir is not None: - print "Copying %s to %s" % (archive, copy_to_dir) - shutil.copy(archive, str(copy_to_dir)) + if options.targetdir: + print "Copying %s to %s" % (archive, options.targetdir) + shutil.copy(archive, options.targetdir) else: print "Ready in %s" % (builddir,) - return builddir # for tests + return retval, builddir # for tests +def package(*args): + try: + import argparse + except ImportError: + import imp + argparse = imp.load_source('argparse', 'lib-python/2.7/argparse.py') + if sys.platform == 'win32': + pypy_exe = 'pypy.exe' + license_base = os.path.join(basedir, r'..\..\..\local') # as on buildbot YMMV + else: + pypy_exe = 'pypy' + license_base = '/usr/share/doc' + parser = argparse.ArgumentParser() + args = list(args) + args[0] = str(args[0]) + parser.add_argument('--without-tk', dest='no_tk', action='store_true', + help='build and package the cffi tkinter module') + parser.add_argument('--without-cffi', dest='no_cffi', action='store_true', + help='do not pre-import any cffi modules') + parser.add_argument('--nostrip', dest='nostrip', action='store_true', + help='do not strip the exe, making it ~10MB larger') + parser.add_argument('--rename_pypy_c', dest='pypy_c', type=str, default=pypy_exe, + help='target executable name, defaults to "pypy"') + parser.add_argument('--archive-name', dest='name', type=str, default='', + help='pypy-VER-PLATFORM') + parser.add_argument('--license_base', type=str, default=license_base, + help='where to start looking for third party upstream licensing info') + parser.add_argument('--builddir', type=str, default='', + help='tmp dir for packaging') + parser.add_argument('--targetdir', type=str, default='', + help='destination dir for archive') + parser.add_argument('--override_pypy_c', type=str, default='', + help='use as pypy exe instead of pypy/goal/pypy-c') + # Positional arguments, for backward compatability with buldbots + parser.add_argument('extra_args', help='optional interface to positional arguments', nargs=argparse.REMAINDER, + metavar='[root-pypy-dir] [name-of-archive] [name-of-pypy-c] [destination-for-tarball] [pypy-c-path]', + ) + options = parser.parse_args(args) -def print_usage(): - print >>sys.stderr, __doc__ - sys.exit(1) + # Handle positional arguments, choke if both methods are used + for i,target, default in ([1, 'name', ''], [2, 'pypy_c', pypy_exe], + [3, 'targetdir', ''], [4,'override_pypy_c', '']): + if len(options.extra_args)>i: + if getattr(options, target) != default: + print 'positional argument',i,target,'already has value',getattr(options, target) + parser.print_help() + return + setattr(options, target, options.extra_args[i]) + if os.environ.has_key("PYPY_PACKAGE_NOSTRIP"): + options.nostrip = True + + if os.environ.has_key("PYPY_PACKAGE_WITHOUTTK"): + options.tk = True + if not options.builddir: + # The import actually creates the udir directory + from rpython.tool.udir import udir + options.builddir = udir.ensure("build", dir=True) + assert '/' not in options.pypy_c + return create_package(basedir, options) + + +third_party_header = '''\n\nLicenses and Acknowledgements for Incorporated Software +======================================================= + +This section is an incomplete, but growing list of licenses and acknowledgements +for third-party software incorporated in the PyPy distribution. + +''' + +gdbm_bit = '''gdbm +---- + +The gdbm module includes code from gdbm.h, which is distributed under the terms +of the GPL license version 2 or any later version. +''' if __name__ == '__main__': - if len(sys.argv) == 1: - print_usage() - - args = sys.argv[1:] - kw = {} - - for i, arg in enumerate(args): - if arg == '--nostrip': - kw['nostrip'] = True - elif arg == '--without-tk': - kw['withouttk'] = True - elif not arg.startswith('--'): - break - else: - print_usage() - - if os.environ.has_key("PYPY_PACKAGE_NOSTRIP"): - kw['nostrip'] = True - - if os.environ.has_key("PYPY_PACKAGE_WITHOUTTK"): - kw['withouttk'] = True - - args = args[i:] - package(*args, **kw) + import sys + retval, _ = package(*sys.argv[1:]) + sys.exit(retval) diff --git a/pypy/tool/release/test/test_package.py b/pypy/tool/release/test/test_package.py --- a/pypy/tool/release/test/test_package.py +++ b/pypy/tool/release/test/test_package.py @@ -1,7 +1,7 @@ import py from pypy.conftest import pypydir -from pypy.tool.release import package +from pypy.tool.release import package, package from pypy.module.sys.version import CPYTHON_VERSION import tarfile, zipfile, sys @@ -25,8 +25,9 @@ else: fake_pypy_c = False try: - builddir = package.package(py.path.local(pypydir).dirpath(), test, + retval, builddir = package.package(py.path.local(pypydir).dirpath(), test, rename_pypy_c) + assert retval == 0 prefix = builddir.join(test) cpyver = '%d.%d' % CPYTHON_VERSION[:2] assert prefix.join('lib-python', cpyver, 'test').check() @@ -74,7 +75,6 @@ pypy_c.remove() def test_with_zipfile_module(): - from pypy.tool.release import package prev = package.USE_ZIPFILE_MODULE try: package.USE_ZIPFILE_MODULE = True @@ -106,3 +106,22 @@ check(file1, 0644) check(file2, 0644) check(pypy, 0755) + +def test_generate_license(): + from os.path import dirname, abspath + class Options(object): + pass + options = Options() + basedir = dirname(dirname(dirname(dirname(dirname(abspath(__file__)))))) + options.no_tk = False + if sys.platform == 'win32': + # Following recommended build setup at + # http://doc.pypy.org/en/latest/windows.html#abridged-method-for-ojit-builds-using-visual-studio-2008 + options.license_base = dirname(basedir) + '/local' + else: + options.license_base = '/usr/share/doc' + license = package.generate_license(py.path.local(basedir), options) + assert 'bzip2' in license + assert 'openssl' in license + assert 'Tcl' in license + diff --git a/rpython/annotator/binaryop.py b/rpython/annotator/binaryop.py --- a/rpython/annotator/binaryop.py +++ b/rpython/annotator/binaryop.py @@ -2,15 +2,14 @@ Binary operations between SomeValues. """ -import py import operator from rpython.tool.pairtype import pair, pairtype from rpython.annotator.model import ( SomeObject, SomeInteger, SomeBool, s_Bool, SomeString, SomeChar, SomeList, - SomeDict, SomeOrderedDict, SomeUnicodeCodePoint, SomeUnicodeString, + SomeDict, SomeUnicodeCodePoint, SomeUnicodeString, SomeTuple, SomeImpossibleValue, s_ImpossibleValue, SomeInstance, - SomeBuiltinMethod, SomeIterator, SomePBC, SomeFloat, s_None, SomeByteArray, - SomeWeakRef, SomeSingleFloat, + SomeBuiltinMethod, SomeIterator, SomePBC, SomeNone, SomeFloat, s_None, + SomeByteArray, SomeWeakRef, SomeSingleFloat, SomeLongFloat, SomeType, SomeConstantType, unionof, UnionError, read_can_only_throw, add_knowntypedata, merge_knowntypedata,) @@ -768,57 +767,51 @@ # mixing Nones with other objects -def _make_none_union(classname, constructor_args='', glob=None): - if glob is None: - glob = globals() - loc = locals() - source = py.code.Source(""" - class __extend__(pairtype(%(classname)s, SomePBC)): - def union((obj, pbc)): - if pbc.isNone(): - return %(classname)s(%(constructor_args)s) - else: - raise UnionError(pbc, obj) +class __extend__(pairtype(SomeObject, SomeNone)): + def union((obj, none)): + return obj.noneify() - class __extend__(pairtype(SomePBC, %(classname)s)): - def union((pbc, obj)): - if pbc.isNone(): - return %(classname)s(%(constructor_args)s) - else: - raise UnionError(pbc, obj) - """ % loc) - exec source.compile() in glob +class __extend__(pairtype(SomeNone, SomeObject)): + def union((none, obj)): + return obj.noneify() -_make_none_union('SomeInstance', 'classdef=obj.classdef, can_be_None=True') -_make_none_union('SomeString', 'no_nul=obj.no_nul, can_be_None=True') -_make_none_union('SomeUnicodeString', 'can_be_None=True') -_make_none_union('SomeList', 'obj.listdef') -_make_none_union('SomeOrderedDict', 'obj.dictdef') -_make_none_union('SomeDict', 'obj.dictdef') -_make_none_union('SomeWeakRef', 'obj.classdef') +class __extend__(pairtype(SomeImpossibleValue, SomeNone)): + def union((imp1, none)): + return s_None -# getitem on SomePBCs, in particular None fails +class __extend__(pairtype(SomeNone, SomeImpossibleValue)): + def union((none, imp2)): + return s_None + class __extend__(pairtype(SomePBC, SomeObject)): def getitem((pbc, o)): - if not pbc.isNone(): - raise AnnotatorError("getitem on %r" % pbc) + raise AnnotatorError("getitem on %r" % pbc) + + def setitem((pbc, o), s_value): + raise AnnotatorError("setitem on %r" % pbc) + +class __extend__(pairtype(SomeNone, SomeObject)): + def getitem((none, o)): return s_ImpossibleValue - def setitem((pbc, o), s_value): - if not pbc.isNone(): - raise AnnotatorError("setitem on %r" % pbc) + def setitem((none, o), s_value): + return None class __extend__(pairtype(SomePBC, SomeString)): def add((pbc, o)): - if not pbc.isNone(): - raise AnnotatorError('add on %r' % pbc) + raise AnnotatorError('add on %r' % pbc) + +class __extend__(pairtype(SomeNone, SomeString)): + def add((none, o)): return s_ImpossibleValue class __extend__(pairtype(SomeString, SomePBC)): def add((o, pbc)): - if not pbc.isNone(): - raise AnnotatorError('add on %r' % pbc) + raise AnnotatorError('add on %r' % pbc) + +class __extend__(pairtype(SomeString, SomeNone)): + def add((o, none)): return s_ImpossibleValue #_________________________________________ diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py --- a/rpython/annotator/bookkeeper.py +++ b/rpython/annotator/bookkeeper.py @@ -18,7 +18,7 @@ from rpython.annotator.dictdef import DictDef from rpython.annotator import description from rpython.annotator.signature import annotationoftype -from rpython.annotator.argument import simple_args, complex_args +from rpython.annotator.argument import simple_args from rpython.rlib.objectmodel import r_dict, Symbolic from rpython.tool.algo.unionfind import UnionFind from rpython.rtyper import extregistry @@ -103,8 +103,9 @@ self.consider_call_site(call_op) for pbc, args_s in self.emulated_pbc_calls.itervalues(): - self.consider_call_site_for_pbc(pbc, 'simple_call', - args_s, s_ImpossibleValue, None) + args = simple_args(args_s) + self.consider_call_site_for_pbc(pbc, args, + s_ImpossibleValue, None) self.emulated_pbc_calls = {} finally: self.leave() @@ -152,16 +153,14 @@ args_s = [lltype_to_annotation(adtmeth.ll_ptrtype)] + args_s if isinstance(s_callable, SomePBC): s_result = binding(call_op.result, s_ImpossibleValue) - self.consider_call_site_for_pbc(s_callable, call_op.opname, args_s, + args = call_op.build_args(args_s) + self.consider_call_site_for_pbc(s_callable, args, s_result, call_op) - def consider_call_site_for_pbc(self, s_callable, opname, args_s, s_result, + def consider_call_site_for_pbc(self, s_callable, args, s_result, call_op): descs = list(s_callable.descriptions) - if not descs: - return family = descs[0].getcallfamily() - args = self.build_args(opname, args_s) s_callable.getKind().consider_call_site(self, family, descs, args, s_result, call_op) @@ -452,9 +451,6 @@ attr = s_attr.const descs = list(pbc.descriptions) - if not descs: - return s_ImpossibleValue - first = descs[0] if len(descs) == 1: return first.s_read_attribute(attr) @@ -495,8 +491,6 @@ annotations). """ descs = list(pbc.descriptions) - if not descs: - return s_ImpossibleValue first = descs[0] first.mergecallfamilies(*descs[1:]) @@ -562,12 +556,6 @@ assert self.annotator.binding(op.args[pos]) == s_type return op - def build_args(self, op, args_s): - if op == "simple_call": - return simple_args(args_s) - elif op == "call_args": - return complex_args(args_s) - def ondegenerated(self, what, s_value, where=None, called_from_graph=None): self.annotator.ondegenerated(what, s_value, where=where, called_from_graph=called_from_graph) diff --git a/rpython/annotator/classdef.py b/rpython/annotator/classdef.py --- a/rpython/annotator/classdef.py +++ b/rpython/annotator/classdef.py @@ -1,8 +1,9 @@ """ Type inference for user-defined classes. """ -from rpython.annotator.model import SomePBC, s_ImpossibleValue, unionof -from rpython.annotator.model import SomeInteger, SomeTuple, SomeString, AnnotatorError +from rpython.annotator.model import ( + SomePBC, s_ImpossibleValue, unionof, s_None, SomeInteger, + SomeTuple, SomeString, AnnotatorError) from rpython.annotator import description @@ -105,8 +106,7 @@ # check for method demotion and after-the-fact method additions if isinstance(s_newvalue, SomePBC): attr = self.name - if (not s_newvalue.isNone() and - s_newvalue.getKind() == description.MethodDesc): + if s_newvalue.getKind() == description.MethodDesc: # is method if homedef.classdesc.read_attribute(attr, None) is None: if not homedef.check_missing_attribute_update(attr): @@ -351,8 +351,10 @@ if uplookup is not None: d.append(updesc.bind_self(self, flags)) - if d or pbc.can_be_None: + if d: return SomePBC(d, can_be_None=pbc.can_be_None) + elif pbc.can_be_None: + return s_None else: return s_ImpossibleValue diff --git a/rpython/annotator/model.py b/rpython/annotator/model.py --- a/rpython/annotator/model.py +++ b/rpython/annotator/model.py @@ -122,6 +122,9 @@ def can_be_none(self): return True + def noneify(self): + raise UnionError(self, s_None) + def nonnoneify(self): return self @@ -258,11 +261,17 @@ "Stands for an object which is known to be a string." knowntype = str + def noneify(self): + return SomeString(can_be_None=True, no_nul=self.no_nul) + class SomeUnicodeString(SomeStringOrUnicode): "Stands for an object which is known to be an unicode string" knowntype = unicode + def noneify(self): + return SomeUnicodeString(can_be_None=True, no_nul=self.no_nul) + class SomeByteArray(SomeStringOrUnicode): immutable = False @@ -313,6 +322,9 @@ def can_be_none(self): return True + def noneify(self): + return SomeList(self.listdef) + class SomeTuple(SomeObject): "Stands for a tuple of known length." @@ -358,6 +370,9 @@ else: return '{...%s...}' % (len(const),) + def noneify(self): + return type(self)(self.dictdef) + class SomeOrderedDict(SomeDict): try: from collections import OrderedDict as knowntype @@ -417,6 +432,9 @@ def nonnoneify(self): return SomeInstance(self.classdef, can_be_None=False) + def noneify(self): + return SomeInstance(self.classdef, can_be_None=True) + class SomePBC(SomeObject): """Stands for a global user instance, built prior to the analysis, @@ -424,36 +442,32 @@ immutable = True def __init__(self, descriptions, can_be_None=False, subset_of=None): + assert descriptions # descriptions is a set of Desc instances descriptions = set(descriptions) self.descriptions = descriptions self.can_be_None = can_be_None self.subset_of = subset_of self.simplify() - if self.isNone(): - self.knowntype = type(None) - self.const = None - else: - knowntype = reduce(commonbase, - [x.knowntype for x in descriptions]) - if knowntype == type(Exception): - knowntype = type - if knowntype != object: - self.knowntype = knowntype - if len(descriptions) == 1 and not can_be_None: - # hack for the convenience of direct callers to SomePBC(): - # only if there is a single object in descriptions - desc, = descriptions - if desc.pyobj is not None: - self.const = desc.pyobj - elif len(descriptions) > 1: - from rpython.annotator.description import ClassDesc - if self.getKind() is ClassDesc: - # a PBC of several classes: enforce them all to be - # built, without support for specialization. See - # rpython/test/test_rpbc.test_pbc_of_classes_not_all_used - for desc in descriptions: - desc.getuniqueclassdef() + knowntype = reduce(commonbase, [x.knowntype for x in descriptions]) + if knowntype == type(Exception): + knowntype = type + if knowntype != object: + self.knowntype = knowntype + if len(descriptions) == 1 and not can_be_None: + # hack for the convenience of direct callers to SomePBC(): + # only if there is a single object in descriptions + desc, = descriptions + if desc.pyobj is not None: + self.const = desc.pyobj + elif len(descriptions) > 1: + from rpython.annotator.description import ClassDesc + if self.getKind() is ClassDesc: + # a PBC of several classes: enforce them all to be + # built, without support for specialization. See + # rpython/test/test_rpbc.test_pbc_of_classes_not_all_used + for desc in descriptions: + desc.getuniqueclassdef() def any_description(self): return iter(self.descriptions).next() @@ -466,32 +480,26 @@ kinds.add(x.__class__) if len(kinds) > 1: raise AnnotatorError("mixing several kinds of PBCs: %r" % kinds) - if not kinds: - raise ValueError("no 'kind' on the 'None' PBC") return kinds.pop() def simplify(self): - if self.descriptions: - # We check that the set only contains a single kind of Desc instance - kind = self.getKind() - # then we remove unnecessary entries in self.descriptions: - # some MethodDescs can be 'shadowed' by others - if len(self.descriptions) > 1: - kind.simplify_desc_set(self.descriptions) - else: - assert self.can_be_None, "use s_ImpossibleValue" - - def isNone(self): - return len(self.descriptions) == 0 + # We check that the set only contains a single kind of Desc instance + kind = self.getKind() + # then we remove unnecessary entries in self.descriptions: + # some MethodDescs can be 'shadowed' by others + if len(self.descriptions) > 1: + kind.simplify_desc_set(self.descriptions) def can_be_none(self): return self.can_be_None def nonnoneify(self): - if self.isNone(): - return s_ImpossibleValue - else: - return SomePBC(self.descriptions, can_be_None=False) + return SomePBC(self.descriptions, can_be_None=False, + subset_of=self.subset_of) + + def noneify(self): + return SomePBC(self.descriptions, can_be_None=True, + subset_of=self.subset_of) def fmt_descriptions(self, pbis): if hasattr(self, 'const'): @@ -505,6 +513,23 @@ else: return kt.__name__ +class SomeNone(SomeObject): + knowntype = type(None) + const = None + + def __init__(self): + pass + + def is_constant(self): + return True + + def is_immutable_constant(self): + return True + + def nonnoneify(self): + return s_ImpossibleValue + + class SomeConstantType(SomePBC): can_be_None = False subset_of = None @@ -557,7 +582,7 @@ return False -s_None = SomePBC([], can_be_None=True) +s_None = SomeNone() s_Bool = SomeBool() s_Int = SomeInteger() s_ImpossibleValue = SomeImpossibleValue() @@ -576,6 +601,9 @@ # 'classdef' is None for known-to-be-dead weakrefs. self.classdef = classdef + def noneify(self): + return SomeWeakRef(self.classdef) + # ____________________________________________________________ @@ -647,7 +675,7 @@ def not_const(s_obj): - if s_obj.is_constant() and not isinstance(s_obj, SomePBC): + if s_obj.is_constant() and not isinstance(s_obj, (SomePBC, SomeNone)): new_s_obj = SomeObject.__new__(s_obj.__class__) dic = new_s_obj.__dict__ = s_obj.__dict__.copy() if 'const' in dic: diff --git a/rpython/annotator/signature.py b/rpython/annotator/signature.py --- a/rpython/annotator/signature.py +++ b/rpython/annotator/signature.py @@ -106,12 +106,11 @@ for i, argtype in enumerate(self.argtypes): if isinstance(argtype, (types.FunctionType, types.MethodType)): argtype = argtype(*inputcells) - if isinstance(argtype, lltype.LowLevelType) and\ - argtype is lltype.Void: + if argtype is lltype.Void: # XXX the mapping between Void and annotation # is not quite well defined s_input = inputcells[i] - assert isinstance(s_input, annmodel.SomePBC) + assert isinstance(s_input, (annmodel.SomePBC, annmodel.SomeNone)) assert s_input.is_constant() args_s.append(s_input) elif argtype is None: diff --git a/rpython/annotator/test/test_model.py b/rpython/annotator/test/test_model.py --- a/rpython/annotator/test/test_model.py +++ b/rpython/annotator/test/test_model.py @@ -130,8 +130,9 @@ py.test.raises(AnnotatorError, compile_function, blocked_inference) -if __name__ == '__main__': - for name, value in globals().items(): - if name.startswith('test_'): - value() - +def test_not_const(): + s_int = SomeInteger() + s_int.const = 2 + assert s_int != SomeInteger() + assert not_const(s_int) == SomeInteger() + assert not_const(s_None) == s_None diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py --- a/rpython/annotator/unaryop.py +++ b/rpython/annotator/unaryop.py @@ -8,7 +8,7 @@ from rpython.annotator.model import (SomeObject, SomeInteger, SomeBool, SomeString, SomeChar, SomeList, SomeDict, SomeTuple, SomeImpossibleValue, SomeUnicodeCodePoint, SomeInstance, SomeBuiltin, SomeBuiltinMethod, - SomeFloat, SomeIterator, SomePBC, SomeType, s_ImpossibleValue, + SomeFloat, SomeIterator, SomePBC, SomeNone, SomeType, s_ImpossibleValue, s_Bool, s_None, unionof, add_knowntypedata, HarmlesslyBlocked, SomeWeakRef, SomeUnicodeString, SomeByteArray) from rpython.annotator.bookkeeper import getbookkeeper, immutablevalue @@ -739,8 +739,7 @@ getattr.can_only_throw = [] def setattr(self, s_attr, s_value): - if not self.isNone(): - raise AnnotatorError("Cannot modify attribute of a pre-built constant") + raise AnnotatorError("Cannot modify attribute of a pre-built constant") def call(self, args): bookkeeper = getbookkeeper() @@ -751,19 +750,33 @@ return SomePBC(d, can_be_None=self.can_be_None) def bool_behavior(self, s): - if self.isNone(): - s.const = False - elif not self.can_be_None: + if not self.can_be_None: s.const = True def len(self): - if self.isNone(): - # this None could later be generalized into an empty list, - # whose length is the constant 0; so let's tentatively answer 0. - return immutablevalue(0) - else: - # This should probably never happen - raise AnnotatorError("Cannot call len on a pbc") + raise AnnotatorError("Cannot call len on a pbc") + +class __extend__(SomeNone): + def bind_callables_under(self, classdef, name): + return self + + def getattr(self, s_attr): + return s_ImpossibleValue + getattr.can_only_throw = [] + + def setattr(self, s_attr, s_value): + return None + + def call(self, args): + return s_ImpossibleValue + + def bool_behavior(self, s): + s.const = False + + def len(self): + # XXX: this None could later be generalized into an empty list, + # whose length is the constant 0; so let's tentatively answer 0. + return immutablevalue(0) #_________________________________________ # weakrefs diff --git a/rpython/flowspace/operation.py b/rpython/flowspace/operation.py --- a/rpython/flowspace/operation.py +++ b/rpython/flowspace/operation.py @@ -14,6 +14,7 @@ SpaceOperation) from rpython.flowspace.specialcase import register_flow_sc from rpython.annotator.model import SomeTuple +from rpython.annotator.argument import ArgumentsForTranslation from rpython.flowspace.specialcase import SPECIAL_CASES @@ -511,6 +512,9 @@ return sc(ctx, *args_w) return ctx.do_op(self) + def build_args(self, args_s): + return ArgumentsForTranslation(list(args_s)) + class CallArgs(SingleDispatchMixin, CallOp): opname = 'call_args' @@ -529,6 +533,10 @@ "should not call %r with keyword arguments" % (fn,)) return ctx.do_op(self) + def build_args(self, args_s): + return ArgumentsForTranslation.fromshape(args_s[0].const, + list(args_s[1:])) + # Other functions that get directly translated to SpaceOperators func2op[type] = op.type diff --git a/rpython/jit/backend/detect_cpu.py b/rpython/jit/backend/detect_cpu.py --- a/rpython/jit/backend/detect_cpu.py +++ b/rpython/jit/backend/detect_cpu.py @@ -63,7 +63,10 @@ 'AMD64': MODEL_X86, # win64 'armv7l': MODEL_ARM, 'armv6l': MODEL_ARM, - }[mach] + }.get(mach) + + if result is None: + raise ProcessorAutodetectError, "unknown machine name %s" % mach # if result.startswith('x86'): if sys.maxint == 2**63-1: @@ -78,7 +81,9 @@ # if result.startswith('arm'): from rpython.jit.backend.arm.detect import detect_float - assert detect_float(), 'the JIT-compiler requires a vfp unit' + if not detect_float(): + raise ProcessorAutodetectError( + 'the JIT-compiler requires a vfp unit') # return result diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py --- a/rpython/jit/backend/llsupport/rewrite.py +++ b/rpython/jit/backend/llsupport/rewrite.py @@ -232,8 +232,8 @@ self.emitting_an_operation_that_can_collect() op = ResOperation(rop.CALL_MALLOC_GC, args, v_result, descr) self.newops.append(op) - # mark 'v_result' as freshly malloced, so not needing a write barrier - self.write_barrier_applied[v_result] = None + # In general, don't add v_result to write_barrier_applied: + # v_result might be a large young array. def gen_malloc_fixedsize(self, size, typeid, v_result): """Generate a CALL_MALLOC_GC(malloc_fixedsize_fn, ...). @@ -251,6 +251,9 @@ args = [ConstInt(addr), ConstInt(size)] descr = self.gc_ll_descr.malloc_fixedsize_descr self._gen_call_malloc_gc(args, v_result, descr) + # mark 'v_result' as freshly malloced, so not needing a write barrier + # (this is always true because it's a fixed-size object) + self.write_barrier_applied[v_result] = None def gen_boehm_malloc_array(self, arraydescr, v_num_elem, v_result): """Generate a CALL_MALLOC_GC(malloc_array_fn, ...) for Boehm.""" @@ -316,7 +319,9 @@ [ConstInt(kind), ConstInt(itemsize), v_length], v_result, descr=arraydescr) self.newops.append(op) - self.write_barrier_applied[v_result] = None + # don't record v_result into self.write_barrier_applied: + # it can be a large, young array with card marking, and then + # the GC relies on the write barrier being called return True def gen_malloc_nursery_varsize_frame(self, sizebox, v_result): diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py --- a/rpython/jit/backend/llsupport/test/test_rewrite.py +++ b/rpython/jit/backend/llsupport/test/test_rewrite.py @@ -435,16 +435,19 @@ nonstd_descr.itemsize = 8 nonstd_descr_gcref = 123 From noreply at buildbot.pypy.org Mon Jun 9 23:45:00 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 9 Jun 2014 23:45:00 +0200 (CEST) Subject: [pypy-commit] pypy ufuncapi: different approach - implement frompyfunc, extended for pypy-only kwargs. Message-ID: <20140609214500.CC4211C08F6@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: ufuncapi Changeset: r72010:9f583aac6370 Date: 2014-06-10 00:32 +0300 http://bitbucket.org/pypy/pypy/changeset/9f583aac6370/ Log: different approach - implement frompyfunc, extended for pypy-only kwargs. then implement app-level calls with cffi functions to frompyfunc diff --git a/pypy/module/micronumpy/__init__.py b/pypy/module/micronumpy/__init__.py --- a/pypy/module/micronumpy/__init__.py +++ b/pypy/module/micronumpy/__init__.py @@ -34,6 +34,7 @@ appleveldefs = {} interpleveldefs = { 'FLOATING_POINT_SUPPORT': 'space.wrap(1)', + 'frompyfunc': 'ufuncs.frompyfunc', } # ufuncs for exposed, impl in [ diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py --- a/pypy/module/micronumpy/test/test_ufuncs.py +++ b/pypy/module/micronumpy/test/test_ufuncs.py @@ -2,6 +2,8 @@ from pypy.module.micronumpy.ufuncs import (find_binop_result_dtype, find_unaryop_result_dtype) from pypy.module.micronumpy.descriptor import get_dtype_cache +from pypy.interpreter.gateway import interp2app +from pypy.conftest import option try: @@ -144,28 +146,43 @@ # promote bools, happens with sign ufunc assert find_unaryop_result_dtype(space, bool_dtype, promote_bools=True) is int8_dtype -class TestUfuncFromCFunc(object): - def test_fromcfunc(self,space): - if not cfuncs: - skip('no cffi available') - from pypy.module.micronumpy.ufuncs import ufunc_from_func_and_data_and_signature as from_cfunc - from pypy.module.micronumpy.ctors import array - int32_dtype = get_dtype_cache(space).w_int32dtype - float64_dtype = get_dtype_cache(space).w_float64dtype - data = ffi.new('char *[2]') - func = from_cfunc([cfuncs.double_times2, cfuncs.int_times2], data, - [float64_dtype, float64_dtype, int32_dtype, int32_dtype], - 1, 1, 0, 'times2', 'times2_doc', 0, '()->()', - ) - def get(i): - return w_result.getitem(space, [i]).value - for d in [int32_dtype, float64_dtype]: - w_array = array(space, space.wrap(range(10)), w_dtype=d) - w_result = func.call(space, [w_array]) - for i in 10: - assert get(i) == 2*i +class AppTestUfuncs(BaseNumpyAppTest): + def setup_class(cls): + BaseNumpyAppTest.setup_class.im_func(cls) + if cfuncs: + def int_times2(space, __args__): + args, kwargs = __args__.unpack() + arr = map(space.unwrap, args) + # Assume arr is contiguous + addr = cfuncs.new('char *[2]') + addr[0] = arr[0].data + addr[1] = arr[1].data + dims = cfuncs.new('int *[1]') + dims[0] = arr[0].size + steps = cfuncs.new('int *[1]') + steps[0] = arr[0].strides[-1] + cfuncs.int_times2(addr, dims, steps, 0) + def double_times2(space, __args__): + args, kwargs = __args__.unpack() + arr = map(space.unwrap, args) + # Assume arr is contiguous + addr = cfuncs.new('char *[2]') + addr[0] = arr[0].data + addr[1] = arr[1].data + dims = cfuncs.new('int *[1]') + dims[0] = arr[0].size + steps = cfuncs.new('int *[1]') + steps[0] = arr[0].strides[-1] + cfuncs.double_times2(addr, dims, steps, 0) + if option.runappdirect: + times2 = cls.space.wrap([double_times2, int_times2]) + else: + times2 = cls.space.wrap([interp2app(double_times2), + interp2app(int_times2)]) + else: + times2 = None + cls.w_times2 = cls.space.wrap(times2) -class AppTestUfuncs(BaseNumpyAppTest): def test_constants(self): import numpy as np assert np.FLOATING_POINT_SUPPORT == 1 @@ -180,27 +197,46 @@ raises(TypeError, ufunc) def test_frompyfunc(self): - try: - from numpy import frompyfunc - except ImportError: - skip('frompyfunc not available') from numpy import ufunc, frompyfunc, arange, dtype def adder(a, b): return a+b - myufunc = frompyfunc(adder, 2, 1) + try: + myufunc = frompyfunc(adder, 2, 1) + int_func22 = frompyfunc(int, 2, 2) + int_func12 = frompyfunc(int, 1, 2) + retype = dtype(object) + except NotImplementedError as e: + assert 'object' in str(e) + # Use pypy specific extension for out_dtype + myufunc = frompyfunc(adder, 2, 1, out_dtype='match') + int_func22 = frompyfunc(int, 2, 2, out_dtype='match') + int_func12 = frompyfunc(int, 1, 2, out_dtype='match') + retype = dtype(int) assert isinstance(myufunc, ufunc) res = myufunc(arange(10), arange(10)) - assert res.dtype == dtype(object) + assert res.dtype == retype assert all(res == arange(10) + arange(10)) raises(TypeError, frompyfunc, 1, 2, 3) - int_func22 = frompyfunc(int, 2, 2) raises (ValueError, int_func22, arange(10)) - int_func12 = frompyfunc(int, 1, 2) res = int_func12(arange(10)) assert len(res) == 2 assert isinstance(res, tuple) assert (res[0] == arange(10)).all() + def test_from_cffi_func(self): + import sys + if '__pypy__' not in sys.builtin_module_names: + skip('pypy-only test') + from numpy import frompyfunc, dtype, arange + if self.times2 is None: + skip('cffi not available') + ufunc = frompyfunc(self.times2, 1, 1, signature='()->()', + dtypes=[dtype(float), dtype(float), dtype(int), dtype(int)], + ) + f = arange(10, dtype=int) + f2 = ufunc(f) + assert f2 + def test_ufunc_attrs(self): from numpy import add, multiply, sin diff --git a/pypy/module/micronumpy/ufuncs.py b/pypy/module/micronumpy/ufuncs.py --- a/pypy/module/micronumpy/ufuncs.py +++ b/pypy/module/micronumpy/ufuncs.py @@ -1,7 +1,8 @@ from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.error import OperationError, oefmt -from pypy.interpreter.gateway import interp2app, unwrap_spec +from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty +from pypy.interpreter.argument import Arguments from rpython.rlib import jit from rpython.rlib.rarithmetic import LONG_BIT, maxint from rpython.tool.sourcetools import func_with_new_name @@ -464,25 +465,36 @@ class W_UfuncGeneric(W_Ufunc): + ''' + Handle a number of python functions, each with a signature and dtypes. + The signature can specify how to create the inner loop, i.e. + (i,j),(j,k)->(i,k) for a dot-like matrix multiplication, and the dtypes + can specify the input, output args for the function. When called, the actual + function used will be resolved by examining the input arg's dtypes. + + If dtypes == 'match', only one argument is provided and the output dtypes + will match the input dtype (not cpython numpy compatible) + ''' _immutable_fields_ = ["funcs", "signature", "nin", "nout", "nargs", "dtypes", "data"] - def __init__(self, funcs, name, identity, data, nin, nout, dtypes, signature): + def __init__(self, space, funcs, name, identity, nin, nout, dtypes, signature): # XXX make sure funcs, signature, dtypes, nin, nout are consistent # These don't matter, we use the signature and dtypes for determining # output dtype promote_to_largest = promote_to_float = promote_bools = False - int_only = allow_bool = allow_complex = complex_to_float = False + allow_bool = allow_complex = True + int_only = complex_to_float = False W_Ufunc.__init__(self, name, promote_to_largest, promote_to_float, promote_bools, identity, int_only, allow_bool, allow_complex, complex_to_float) self.funcs = funcs self.dtypes = dtypes self.nin = nin self.nout = nout - self.data = data self.nargs = nin + max(nout, 1) # ufuncs can always be called with an out=<> kwarg - if len(dtypes) % len(funcs) != 0 or len(dtypes) / len(funcs) != self.nargs: + if dtypes != 'match' and (len(dtypes) % len(funcs) != 0 or + len(dtypes) / len(funcs) != self.nargs): raise oefmt(space.w_ValueError, "generic ufunc with %d functions, %d arguments, but %d dtypes", len(funcs), self.nargs, len(dtypes)) @@ -494,8 +506,6 @@ def call(self, space, args_w): #from pypy.module._cffi_backend import newtype, func as _func - from rpython.rlib.rawstorage import alloc_raw_storage, raw_storage_setitem - from rpython.rtyper.lltypesystem import rffi, lltype out = None inargs = [] if len(args_w) < self.nin: @@ -508,34 +518,17 @@ for i in range(min(self.nout, len(args_w)-self.nin)): out = args_w[i+self.nin] if space.is_w(out, space.w_None) or out is None: - outargs.append(None) + continue else: if not isinstance(out, W_NDimArray): raise oefmt(space.w_TypeError, 'output arg %d must be an array, not %s', i+self.nin, str(args_w[i+self.nin])) - outargs.append(out) + outargs[i] = out index = self.type_resolver(space, inargs, outargs) self.alloc_outargs(space, index, inargs, outargs) - func, dims, steps = self.prep_call(space, index, inargs, outargs) - psize = rffi.sizeof(rffi.VOIDP) - lsize = rffi.sizeof(rffi.LONG) - data = alloc_raw_storage(psize*self.nargs) - dims_p = alloc_raw_storage(lsize * len(dims)) - steps_p = alloc_raw_storage(lsize * len(steps)) - for i in range(len(inargs)): - pdata = inargs[i].implementation.get_storage_as_int(space) - raw_storage_setitem(data, i * psize, pdata) - for j in range(len(outargs)): - pdata = outargs[j].implementation.get_storage_as_int(space) - raw_storage_setitem(data, (i + j) * psize, pdata) - for i in range(len(dims)): - raw_storage_setitem(dims_p, i * lsize, dims[i]) - raw_storage_setitem(steps_p, i * lsize, steps[i]) - print 'calling',func, hex(rffi.cast(lltype.Signed, func)) - func(rffi.cast(rffi.CArrayPtr(rffi.CCHARP), data), rffi.cast(rffi.LONGP, dims_p), rffi.cast(rffi.LONGP, steps_p), rffi.cast(rffi.VOIDP, 0)) - if len(outargs)>1: - return outargs - return outargs[0] + # XXX handle inner-loop indexing + # XXX JIT_me + raise oefmt(space.w_NotImplementedError, 'not implemented yet') def type_resolver(self, space, index, outargs): # Find a match for the inargs.dtype in self.dtypes, like @@ -887,7 +880,108 @@ def get(space): return space.fromcache(UfuncState) -def ufunc_from_func_and_data_and_signature(funcs, data, dtypes, nin, nout, - identity, name, doc, check_return, signature): - return W_UfuncGeneric(funcs, name, identity, data, nin, nout, dtypes, signature) - pass + at unwrap_spec(nin=int, nout=int, signature=str, w_identity=WrappedDefault(None), + name=str, doc=str) +def frompyfunc(space, w_func, nin, nout, w_dtypes=None, signature='', + w_identity=None, name='', doc=''): + ''' frompyfunc(func, nin, nout) #cpython numpy compatible + frompyfunc(func, nin, nout, dtypes=None, signature='', + identity=None, name='', doc='') + + Takes an arbitrary Python function and returns a ufunc. + + Can be used, for example, to add broadcasting to a built-in Python + function (see Examples section). + + Parameters + ---------- + func : Python function object + An arbitrary Python function or list of functions (if dtypes is specified). + nin : int + The number of input arguments. + nout : int + The number of arrays returned by `func`. + dtypes: None or [dtype, ...] of the input, output args for each function, + or 'match' to force output to exactly match input dtype + signature*: str, default='' + The mapping of input args to output args, defining the + inner-loop indexing + identity*: None (default) or int + For reduce-type ufuncs, the default value + name: str, default='' + doc: str, default='' + + only one of out_dtype or signature may be specified + + Returns + ------- + out : ufunc + Returns a Numpy universal function (``ufunc``) object. + + Notes + ----- + If the signature and out_dtype are both missing, the returned ufunc always + returns PyObject arrays (cpython numpy compatability). + + Examples + -------- + Use frompyfunc to add broadcasting to the Python function ``oct``: + + >>> oct_obj_array = np.frompyfunc(oct, 1, 1) + >>> oct_obj_array(np.array((10, 30, 100))) + array([012, 036, 0144], dtype=object) + >>> np.array((oct(10), oct(30), oct(100))) # for comparison + array(['012', '036', '0144'], + dtype='|S4') + >>> oct_array = np.frompyfunc(oct, 1, 1, out_dtype=str) + >>> oct_obj_array(np.array((10, 30, 100))) + array([012, 036, 0144], dtype='|S4') + ''' + if (space.isinstance_w(w_func, space.w_tuple) or + space.isinstance_w(w_func, space.w_list)): + func = space.listview(w_func) + for w_f in func: + if not space.is_true(space.callable(w_f)): + raise oefmt(space.w_TypeError, 'func must be callable') + else: + if not space.is_true(space.callable(w_func)): + raise oefmt(space.w_TypeError, 'func must be callable') + func = [w_func] + + if space.is_none(w_dtypes) and not signature: + raise oefmt(space.w_NotImplementedError, + 'object dtype requested but not implemented') + #dtypes=[descriptor.get_dtype_cache(space).w_objectdtype] + elif space.isinstance_w(w_dtypes, space.w_str): + if not space.str_w(w_dtypes) == 'match': + raise oefmt(space.w_ValueError, + 'unknown out_dtype value "%s"', space.str_w(w_dtypes)) + dtypes = 'match' + elif (space.isinstance_w(w_dtypes, space.w_tuple) or + space.isinstance_w(w_dtypes, space.w_list)): + dtypes = space.listview(w_dtypes) + for i in range(len(dtypes)): + dtypes[i] = descriptor.decode_w_dtype(space, dtypes[i]) + else: + raise oefmt(space.w_ValueError, + 'dtypes must be None or a list of dtypes') + + if space.is_none(w_identity): + identity = None + elif space.isinstance_w(w_identity, space.int_w): + identity = space.int_w(w_identity) + else: + raise oefmt(space.w_ValueError, + 'identity must be 0, 1, or None') + if nin==1 and nout==1 and dtypes == 'match': + w_ret = W_Ufunc1(func[0], name) + elif nin==2 and nout==1 and dtypes == 'match': + def _func(calc_dtype, w_left, w_right): + arglist = space.wrap([w_left, w_right]) + return space.call_args(func[0], Arguments.frompacked(space, arglist)) + w_ret = W_Ufunc2(_func, name) + else: + w_ret = W_UfuncGeneric(space, func, name, identity, nin, nout, dtypes, signature) + if doc: + w_ret.w_doc = space.wrap(doc) + return w_ret From noreply at buildbot.pypy.org Tue Jun 10 00:13:13 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Tue, 10 Jun 2014 00:13:13 +0200 (CEST) Subject: [pypy-commit] pypy py3k: now an IOError subclass Message-ID: <20140609221313.9775E1C3382@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: py3k Changeset: r72011:b9f9b88bc64f Date: 2014-06-09 10:36 -0700 http://bitbucket.org/pypy/pypy/changeset/b9f9b88bc64f/ Log: now an IOError subclass diff --git a/lib_pypy/_gdbm.py b/lib_pypy/_gdbm.py --- a/lib_pypy/_gdbm.py +++ b/lib_pypy/_gdbm.py @@ -47,7 +47,7 @@ # failure must be due to missing gdbm dev libs raise ImportError('%s: %s' %(e.__class__.__name__, e)) -class error(Exception): +class error(IOError): pass def _fromstr(key): From noreply at buildbot.pypy.org Tue Jun 10 13:23:05 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 10 Jun 2014 13:23:05 +0200 (CEST) Subject: [pypy-commit] pypy default: It seems this syntax fails to compile on 2.7.3, but works on 2.7.6. Message-ID: <20140610112305.CB55F1C3395@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72016:49e9a74b7c9e Date: 2014-06-10 13:22 +0200 http://bitbucket.org/pypy/pypy/changeset/49e9a74b7c9e/ Log: It seems this syntax fails to compile on 2.7.3, but works on 2.7.6. diff --git a/rpython/rlib/rsre/test/test_search.py b/rpython/rlib/rsre/test/test_search.py --- a/rpython/rlib/rsre/test/test_search.py +++ b/rpython/rlib/rsre/test/test_search.py @@ -1,3 +1,4 @@ +import re, py from rpython.rlib.rsre import rsre_core from rpython.rlib.rsre.test.test_match import get_code, get_code_and_re @@ -166,7 +167,10 @@ assert res def test_empty_maxuntil_2(self): - r_code, r = get_code_and_re(r'X(.*?)+X') + try: + r_code, r = get_code_and_re(r'X(.*?)+X') + except re.error, e: + py.test.skip("older version of the stdlib: %s" % (e,)) assert r.match('XfooXbarX').span() == (0, 5) assert r.match('XfooXbarX').span(1) == (4, 4) res = rsre_core.match(r_code, 'XfooXbarX') From noreply at buildbot.pypy.org Tue Jun 10 18:49:15 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 10 Jun 2014 18:49:15 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder-perf: Isolated the next jit-only stringbuilder bug Message-ID: <20140610164915.F020E1D2A7F@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder-perf Changeset: r72017:ea18dbb1e055 Date: 2014-06-10 18:48 +0200 http://bitbucket.org/pypy/pypy/changeset/ea18dbb1e055/ Log: Isolated the next jit-only stringbuilder bug diff --git a/rpython/jit/metainterp/test/test_string.py b/rpython/jit/metainterp/test/test_string.py --- a/rpython/jit/metainterp/test/test_string.py +++ b/rpython/jit/metainterp/test/test_string.py @@ -804,6 +804,25 @@ res = self.meta_interp(f, [10], backendopt=True) assert res == 0 + def test_stringbuilder_bug3(self): + jitdriver = JitDriver(reds=['n'], greens=[]) + IN = ['a' * 37, 'b' * 38, '22', '1', '333'] + JOINED = ''.join(IN) + def f(n): + while n > 0: + jitdriver.jit_merge_point(n=n) + sb = StringBuilder(36) + for s in IN: + sb.append(s) + s = sb.build() + if s != JOINED: + raise ValueError + n -= 1 + return n + f(10) + res = self.meta_interp(f, [10], backendopt=True) + assert res == 0 + def test_shrink_array(self): jitdriver = JitDriver(reds=['result', 'n'], greens=[]) _str, _StringBuilder = self._str, self._StringBuilder From noreply at buildbot.pypy.org Tue Jun 10 18:58:03 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 10 Jun 2014 18:58:03 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder-perf: Fix the test Message-ID: <20140610165803.A02931D2B17@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder-perf Changeset: r72018:a919095552d2 Date: 2014-06-10 18:57 +0200 http://bitbucket.org/pypy/pypy/changeset/a919095552d2/ Log: Fix the test diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -283,8 +283,9 @@ ofs = ll_builder.current_ofs end = ofs + 2 * ll_builder.charsize if uint_gt(end, ll_builder.current_end): - ofs = ll_builder.grow(ll_builder, 2) - end = ofs + 2 * ll_builder.charsize + BaseStringBuilderRepr._ll_append_char(ll_builder, char0) + BaseStringBuilderRepr._ll_append_char(ll_builder, char1) + return ll_builder.current_ofs = end # --- no GC! --- raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) From noreply at buildbot.pypy.org Tue Jun 10 21:49:53 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 10 Jun 2014 21:49:53 +0200 (CEST) Subject: [pypy-commit] pypy.org extradoc: update the values Message-ID: <20140610194953.0F5261C3395@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: extradoc Changeset: r507:3817e5347637 Date: 2014-05-08 17:28 +0200 http://bitbucket.org/pypy/pypy.org/changeset/3817e5347637/ Log: update the values diff --git a/don1.html b/don1.html --- a/don1.html +++ b/don1.html @@ -9,13 +9,13 @@ - $50852 of $105000 (48.4%) + $51043 of $105000 (48.6%)
diff --git a/don3.html b/don3.html --- a/don3.html +++ b/don3.html @@ -15,7 +15,7 @@ - $48121 of $60000 (80.2%) + $48130 of $60000 (80.2%)
From noreply at buildbot.pypy.org Tue Jun 10 21:49:54 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 10 Jun 2014 21:49:54 +0200 (CEST) Subject: [pypy-commit] pypy.org extradoc: merge heads Message-ID: <20140610194954.2F21C1C3395@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: extradoc Changeset: r508:7385180beb5b Date: 2014-06-10 21:49 +0200 http://bitbucket.org/pypy/pypy.org/changeset/7385180beb5b/ Log: merge heads diff --git a/don1.html b/don1.html --- a/don1.html +++ b/don1.html @@ -9,13 +9,13 @@ - $50852 of $105000 (48.4%) + $51043 of $105000 (48.6%)
diff --git a/don3.html b/don3.html --- a/don3.html +++ b/don3.html @@ -15,7 +15,7 @@ - $48121 of $60000 (80.2%) + $48130 of $60000 (80.2%)
From noreply at buildbot.pypy.org Wed Jun 11 22:42:16 2014 From: noreply at buildbot.pypy.org (mattip) Date: Wed, 11 Jun 2014 22:42:16 +0200 (CEST) Subject: [pypy-commit] pypy default: fix test, raise instead of assert Message-ID: <20140611204216.837B81D29E4@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72019:6046a1811363 Date: 2014-06-11 19:06 +0300 http://bitbucket.org/pypy/pypy/changeset/6046a1811363/ Log: fix test, raise instead of assert diff --git a/rpython/config/test/test_translationoption.py b/rpython/config/test/test_translationoption.py --- a/rpython/config/test/test_translationoption.py +++ b/rpython/config/test/test_translationoption.py @@ -1,3 +1,4 @@ +import sys import py from rpython.config.translationoption import get_combined_translation_config from rpython.config.translationoption import set_opt_level @@ -10,8 +11,8 @@ config.translation.gcrootfinder = "shadowstack" py.test.raises(ConflictConfigError, set_opt_level, config, '0') -if compiler.name == 'msvc': +if compiler.name == 'msvc' or sys.platform == 'darwin': def test_no_asmgcrot_on_msvc(): config = get_combined_translation_config() - py.test.raises(ConfigError, config.translation.setoption, - 'gcrootfinder', 'asmgcc', 'user') + config.translation.gcrootfinder = "asmgcc" + py.test.raises(ConfigError, set_opt_level, config, 'jit') diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py --- a/rpython/config/translationoption.py +++ b/rpython/config/translationoption.py @@ -369,8 +369,8 @@ # disallow asmgcc on OS/X and on Win32 if config.translation.gcrootfinder == "asmgcc": - assert sys.platform != "darwin", "'asmgcc' not supported on OS/X" - assert sys.platform != "win32", "'asmgcc' not supported on Win32" + if sys.platform == "darwin" or sys.platform =="win32": + raise ConfigError("'asmgcc' not supported on this platform") # ---------------------------------------------------------------- From noreply at buildbot.pypy.org Wed Jun 11 22:42:22 2014 From: noreply at buildbot.pypy.org (mattip) Date: Wed, 11 Jun 2014 22:42:22 +0200 (CEST) Subject: [pypy-commit] pypy default: check and raise when shell = True Message-ID: <20140611204222.BF7DB1D29E4@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72024:eed854395434 Date: 2014-06-11 21:56 +0300 http://bitbucket.org/pypy/pypy/changeset/eed854395434/ Log: check and raise when shell = True diff --git a/rpython/tool/runsubprocess.py b/rpython/tool/runsubprocess.py --- a/rpython/tool/runsubprocess.py +++ b/rpython/tool/runsubprocess.py @@ -35,6 +35,10 @@ pipe = Popen(args, stdout=PIPE, stderr=PIPE, shell=shell, env=env, cwd=cwd) stdout, stderr = pipe.communicate() + if (sys.platform == 'win32' and pipe.returncode == 1 and + 'is not recognized' in stderr): + # Setting shell=True on windows messes up expected exceptions + raise EnvironmentError(stderr) return pipe.returncode, stdout, stderr From noreply at buildbot.pypy.org Wed Jun 11 22:42:17 2014 From: noreply at buildbot.pypy.org (mattip) Date: Wed, 11 Jun 2014 22:42:17 +0200 (CEST) Subject: [pypy-commit] pypy default: prevent opening error dialog box on windows when not using testrunner/runner Message-ID: <20140611204217.BA0C01D29E4@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72020:8493b2b0a91b Date: 2014-06-11 20:57 +0300 http://bitbucket.org/pypy/pypy/changeset/8493b2b0a91b/ Log: prevent opening error dialog box on windows when not using testrunner/runner diff --git a/rpython/translator/c/genc.py b/rpython/translator/c/genc.py --- a/rpython/translator/c/genc.py +++ b/rpython/translator/c/genc.py @@ -313,8 +313,25 @@ def cmdexec(self, args='', env=None, err=False, expect_crash=False): assert self._compiled + if expect_crash and sys.platform == 'win32': + #Prevent opening a dialog box + import ctypes + winapi = ctypes.windll.kernel32 + SetErrorMode = winapi.SetErrorMode + SetErrorMode.argtypes=[ctypes.c_int] + + SEM_FAILCRITICALERRORS = 1 + SEM_NOGPFAULTERRORBOX = 2 + SEM_NOOPENFILEERRORBOX = 0x8000 + flags = SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX \ + | SEM_NOOPENFILEERRORBOX + #Since there is no GetErrorMode, do a double Set + old_mode = SetErrorMode(flags) + SetErrorMode(old_mode | flags) res = self.translator.platform.execute(self.executable_name, args, env=env) + if expect_crash and sys.platform == 'win32': + SetErrorMode(old_mode) if res.returncode != 0: if expect_crash: return res.out, res.err From noreply at buildbot.pypy.org Wed Jun 11 22:42:23 2014 From: noreply at buildbot.pypy.org (mattip) Date: Wed, 11 Jun 2014 22:42:23 +0200 (CEST) Subject: [pypy-commit] pypy default: copy log_errors handling from base class Message-ID: <20140611204223.E035A1D29E4@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72025:3c313890cbea Date: 2014-06-11 22:06 +0300 http://bitbucket.org/pypy/pypy/changeset/3c313890cbea/ Log: copy log_errors handling from base class diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -240,9 +240,12 @@ stderr = stdout + stderr errorfile = outname.new(ext='errors') errorfile.write(stderr, mode='wb') - stderrlines = stderr.splitlines() - for line in stderrlines: - log.ERROR(line) + if self.log_errors: + stderrlines = stderr.splitlines() + for line in stderrlines: + log.Error(line) + # ^^^ don't use ERROR, because it might actually be fine. + # Also, ERROR confuses lib-python/conftest.py. raise CompilationError(stdout, stderr) From noreply at buildbot.pypy.org Wed Jun 11 22:42:19 2014 From: noreply at buildbot.pypy.org (mattip) Date: Wed, 11 Jun 2014 22:42:19 +0200 (CEST) Subject: [pypy-commit] pypy default: document more of the test environment Message-ID: <20140611204219.0F3131D29E4@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72021:cf7a32ba3aa5 Date: 2014-06-11 21:33 +0300 http://bitbucket.org/pypy/pypy/changeset/cf7a32ba3aa5/ Log: document more of the test environment diff --git a/testrunner/runner.py b/testrunner/runner.py --- a/testrunner/runner.py +++ b/testrunner/runner.py @@ -233,6 +233,8 @@ run_param.startup() N = run_param.parallel_runs + if N > 1: + out.write("running %d parallel test workers") failure = False for testname in testdirs: @@ -353,7 +355,7 @@ help="configuration python file (optional)") parser.add_option("--root", dest="root", default=".", help="root directory for the run") - parser.add_option("--parallel-runs", dest="parallel_runs", default=0, + parser.add_option("--parallel-runs", dest="parallel_runs", default=1, type="int", help="number of parallel test runs") parser.add_option("--dry-run", dest="dry_run", default=False, @@ -389,6 +391,8 @@ if py.path.local(config_py_file).check(file=1): print >>out, "using config", config_py_file execfile(config_py_file, run_param.__dict__) + else: + print >>out, "ignoring non-existant config", config_py_file if run_param.cherrypick: for p in run_param.cherrypick: From noreply at buildbot.pypy.org Wed Jun 11 22:42:25 2014 From: noreply at buildbot.pypy.org (mattip) Date: Wed, 11 Jun 2014 22:42:25 +0200 (CEST) Subject: [pypy-commit] pypy default: skip and massage until MSVC passes tests Message-ID: <20140611204225.1C4001D29E4@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72026:535d835f3a8a Date: 2014-06-11 23:41 +0300 http://bitbucket.org/pypy/pypy/changeset/535d835f3a8a/ Log: skip and massage until MSVC passes tests diff --git a/rpython/rtyper/tool/test/test_rffi_platform.py b/rpython/rtyper/tool/test/test_rffi_platform.py --- a/rpython/rtyper/tool/test/test_rffi_platform.py +++ b/rpython/rtyper/tool/test/test_rffi_platform.py @@ -121,10 +121,11 @@ assert value == 1.5 value = rffi_platform.getdefineddouble('BLAH', '#define BLAH 1.0e20') assert value == 1.0e20 - value = rffi_platform.getdefineddouble('BLAH', '#define BLAH 1.0e50000') - assert value == float("inf") - value = rffi_platform.getdefineddouble('BLAH', '#define BLAH (double)0/0') - assert isnan(value) + if platform.name != 'msvc': + value = rffi_platform.getdefineddouble('BLAH', '#define BLAH 1.0e50000') + assert value == float("inf") + value = rffi_platform.getdefineddouble('BLAH', '#define BLAH (double)0/0') + assert isnan(value) def test_defined_constant_string(): value = rffi_platform.getdefinedstring('MCDONC', '') @@ -135,14 +136,18 @@ def test_getintegerfunctionresult(): func = 'int sum(int a, int b) {return a + b;}' + if platform.name == 'msvc': + func = '__declspec(dllexport) ' + func value = rffi_platform.getintegerfunctionresult('sum', [6, 7], func) assert value == 13 - value = rffi_platform.getintegerfunctionresult('lround', [6.7], + if not platform.name == 'msvc': + # MSVC gets lround in VS2013! + value = rffi_platform.getintegerfunctionresult('lround', [6.7], '#include ') - assert value == 7 - value = rffi_platform.getintegerfunctionresult('lround', [9.1], + assert value == 7 + value = rffi_platform.getintegerfunctionresult('lround', [9.1], includes=['math.h']) - assert value == 9 + assert value == 9 def test_configure(): test_h = udir.join('test_ctypes_platform.h') @@ -295,7 +300,7 @@ return (a + b); } """ - if platform.name == 'mscv': + if platform.name == 'msvc': c_source = '__declspec(dllexport) ' + c_source libname = 'libc_lib' else: From noreply at buildbot.pypy.org Wed Jun 11 22:42:20 2014 From: noreply at buildbot.pypy.org (mattip) Date: Wed, 11 Jun 2014 22:42:20 +0200 (CEST) Subject: [pypy-commit] pypy default: skip test on windows Message-ID: <20140611204220.51A9D1D29E4@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72022:a5b405d11bc3 Date: 2014-06-11 21:41 +0300 http://bitbucket.org/pypy/pypy/changeset/a5b405d11bc3/ Log: skip test on windows diff --git a/rpython/translator/c/test/test_extfunc.py b/rpython/translator/c/test/test_extfunc.py --- a/rpython/translator/c/test/test_extfunc.py +++ b/rpython/translator/c/test/test_extfunc.py @@ -537,6 +537,8 @@ def test_kill_to_send_sigusr1(): import signal from rpython.rlib import rsignal + if not 'SIGUSR1' in dir(signal): + py.test.skip("no SIGUSR1 available") def does_stuff(): rsignal.pypysig_setflag(signal.SIGUSR1) os.kill(os.getpid(), signal.SIGUSR1) From noreply at buildbot.pypy.org Wed Jun 11 22:42:21 2014 From: noreply at buildbot.pypy.org (mattip) Date: Wed, 11 Jun 2014 22:42:21 +0200 (CEST) Subject: [pypy-commit] pypy default: fix translation of test Message-ID: <20140611204221.964B21D29E4@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72023:18f696e31514 Date: 2014-06-11 21:44 +0300 http://bitbucket.org/pypy/pypy/changeset/18f696e31514/ Log: fix translation of test diff --git a/rpython/translator/c/test/test_extfunc.py b/rpython/translator/c/test/test_extfunc.py --- a/rpython/translator/c/test/test_extfunc.py +++ b/rpython/translator/c/test/test_extfunc.py @@ -185,6 +185,7 @@ os.stat("nonexistentdir/nonexistentfile") except WindowsError, e: return e.winerror + return 0 f = compile(call_stat, []) res = f() expected = call_stat() From noreply at buildbot.pypy.org Wed Jun 11 22:43:17 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Wed, 11 Jun 2014 22:43:17 +0200 (CEST) Subject: [pypy-commit] pypy gc-two-end-nursery: add malloc_fixed() for obj that don't need zeroing memory before allocation Message-ID: <20140611204317.11A311D29E4@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc-two-end-nursery Changeset: r72027:46de6de2efd2 Date: 2014-06-11 16:38 -0400 http://bitbucket.org/pypy/pypy/changeset/46de6de2efd2/ Log: add malloc_fixed() for obj that don't need zeroing memory before allocation diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -291,7 +291,23 @@ self.nursery = NULL self.nursery_free = NULL self.nursery_top = NULL - self.nursery_real_top = NULL + # _______ nursery ________________ + # / \ + # first-part second-part + # +----------------------------------+ + # | | | | | | | | | + # | | | | | | | | | + # |o|o|o|o| | |o|o| + # |b|b|b|b|zeroed|non-zeroed mem |b|b| + # |j|j|j|j|---> | <-------|j|j| + # | | | | | | | | | + # +--------------+-------------------+ + # ^ ^ ^ ^ + # free top free real-top + #new:add the pointer for objects don't need zero memory before allocation + self.nursery_second_part_free = NULL + self.nursery_second_part_top = NULL + self.nursery_real_top = NULL self.debug_tiny_nursery = -1 self.debug_rotating_nurseries = lltype.nullptr(NURSARRAY) self.extra_threshold = 0 @@ -464,6 +480,8 @@ # the end of the nursery: self.nursery_top = self.nursery + self.nursery_size self.nursery_real_top = self.nursery_top + self.nursery_second_part_free = self.nursery_real_top + self.nursery_second_part_top = self.nursery_top # initialize the threshold self.min_heap_size = max(self.min_heap_size, self.nursery_size * self.major_collection_threshold) @@ -596,6 +614,31 @@ # return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF) + def malloc_fixedsize(self, typeid, length, size, + needs_finalizer=False, + is_finalizer_light=False, + contains_weakptr=False): + size_gc_header = self.gcheaderbuilder.size_gc_header + totalsize = size_gc_header + size + rawtotalsize = raw_malloc_usage(totalsize) + min_size = raw_malloc_usage(self.minimal_size_in_nursery) + if rawtotalsize < min_size: + #round up the raw totalsize to min_size + totalsize = rawtotalsize = min_size + result = self.nursery_second_part_free + + #allocate the obj in the opposite direction as obj in malloc_fixedsize_clear() + self.nursery_second_part_free = result - totalsize + #make sure the new object won't overwrite existing objects + if self.nursery_second_part_free < self.nursery_free: + ##TODO:deal with different GC states + result = self.minor_collection() + #move the pointer + result -= totalsize + llarena.arena_reserve(result, totalsize) + #real object beginning address + obj = result + size_gc_header + return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF) def malloc_varsize_clear(self, typeid, length, size, itemsize, offset_to_length): @@ -671,6 +714,7 @@ "totalsize > nursery_cleanup") llarena.arena_reset(self.nursery_top, size, 2) self.nursery_top += size + self.nursery_second_part_top = self.nursery_top move_nursery_top._always_inline_ = True def collect_and_reserve(self, prev_result, totalsize): @@ -1457,6 +1501,8 @@ self.nursery_free = self.nursery self.nursery_top = self.nursery + self.initial_cleanup self.nursery_real_top = self.nursery + self.nursery_size + self.nursery_second_part_free = self.nursery_real_top + self.nursery_second_part_top = self.nursery_top # debug_print("minor collect, total memory used:", self.get_total_memory_used()) From noreply at buildbot.pypy.org Wed Jun 11 22:43:18 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Wed, 11 Jun 2014 22:43:18 +0200 (CEST) Subject: [pypy-commit] pypy gc-two-end-nursery: add llop do_malloc_fixedsize Message-ID: <20140611204318.543F51D29E4@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc-two-end-nursery Changeset: r72028:bdac1cc04cda Date: 2014-06-11 16:40 -0400 http://bitbucket.org/pypy/pypy/changeset/bdac1cc04cda/ Log: add llop do_malloc_fixedsize diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -450,6 +450,7 @@ 'jit_conditional_call': LLOp(), 'get_exception_addr': LLOp(), 'get_exc_value_addr': LLOp(), + 'do_malloc_fixedsize': LLOp(canmallocgc=True), 'do_malloc_fixedsize_clear':LLOp(canmallocgc=True), 'do_malloc_varsize_clear': LLOp(canmallocgc=True), 'get_write_barrier_failing_case': LLOp(sideeffects=False), From noreply at buildbot.pypy.org Wed Jun 11 22:48:46 2014 From: noreply at buildbot.pypy.org (mattip) Date: Wed, 11 Jun 2014 22:48:46 +0200 (CEST) Subject: [pypy-commit] pypy default: MSVC requires explicit export Message-ID: <20140611204846.1E0D71D29E4@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72029:01934af31a64 Date: 2014-06-11 23:48 +0300 http://bitbucket.org/pypy/pypy/changeset/01934af31a64/ Log: MSVC requires explicit export diff --git a/rpython/rtyper/tool/test/test_mkrffi.py b/rpython/rtyper/tool/test/test_mkrffi.py --- a/rpython/rtyper/tool/test/test_mkrffi.py +++ b/rpython/rtyper/tool/test/test_mkrffi.py @@ -60,16 +60,20 @@ from rpython.translator.platform import platform from rpython.translator.tool.cbuild import ExternalCompilationInfo + if platform.name == 'msvc': + export = '__declspec(dllexport) ' + else: + export = '' c_source = """ - void *int_to_void_p(int arg) {} + {0} void *int_to_void_p(int arg) {{}} - struct random_strucutre { + {0} struct random_strucutre {{ int one; int *two; - }; + }}; - struct random_structure* int_int_to_struct_p(int one, int two) {} - """ + {0} struct random_structure* int_int_to_struct_p(int one, int two) {{}} + """.format(export) c_file = udir.join('rffilib.c') c_file.write(c_source) From noreply at buildbot.pypy.org Wed Jun 11 23:06:19 2014 From: noreply at buildbot.pypy.org (mattip) Date: Wed, 11 Jun 2014 23:06:19 +0200 (CEST) Subject: [pypy-commit] pypy default: pypy gets os.stat().st_mtime from win32api, cpython from crt Message-ID: <20140611210619.4620E1D2B48@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72030:ca96d56fe60e Date: 2014-06-12 00:00 +0300 http://bitbucket.org/pypy/pypy/changeset/ca96d56fe60e/ Log: pypy gets os.stat().st_mtime from win32api, cpython from crt diff --git a/rpython/rtyper/module/test/test_ll_os_stat.py b/rpython/rtyper/module/test/test_ll_os_stat.py --- a/rpython/rtyper/module/test/test_ll_os_stat.py +++ b/rpython/rtyper/module/test/test_ll_os_stat.py @@ -22,13 +22,14 @@ stat = ll_os_stat.make_win32_stat_impl('stat', ll_os.StringTraits()) wstat = ll_os_stat.make_win32_stat_impl('stat', ll_os.UnicodeTraits()) def check(f): - expected = os.stat(f).st_mtime - assert stat(f).st_mtime == expected - assert wstat(unicode(f)).st_mtime == expected + # msec resolution + expected = int(os.stat(f).st_mtime*1000) + assert int(stat(f).st_mtime*1000) == expected + assert int(wstat(unicode(f)).st_mtime*1000) == expected check('c:/') check(os.environ['TEMP']) - check('c:/pagefile.sys') + check(sys.executable) def test_fstat(self): fstat = ll_os_stat.make_win32_stat_impl('fstat', ll_os.StringTraits()) From noreply at buildbot.pypy.org Wed Jun 11 23:06:20 2014 From: noreply at buildbot.pypy.org (mattip) Date: Wed, 11 Jun 2014 23:06:20 +0200 (CEST) Subject: [pypy-commit] pypy default: pypy and cpython on windows both raise a KeyError Message-ID: <20140611210620.8F3E01D2B48@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72031:7c92b1fd565a Date: 2014-06-12 00:05 +0300 http://bitbucket.org/pypy/pypy/changeset/7c92b1fd565a/ Log: pypy and cpython on windows both raise a KeyError diff --git a/rpython/rtyper/module/test/test_ll_os_environ.py b/rpython/rtyper/module/test/test_ll_os_environ.py --- a/rpython/rtyper/module/test/test_ll_os_environ.py +++ b/rpython/rtyper/module/test/test_ll_os_environ.py @@ -20,7 +20,7 @@ del os.environ['TEST'] try: del os.environ['key='] - except OSError: + except (KeyError, OSError): return 1 return 2 else: From noreply at buildbot.pypy.org Thu Jun 12 14:03:58 2014 From: noreply at buildbot.pypy.org (arigo) Date: Thu, 12 Jun 2014 14:03:58 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder-perf: Update the test Message-ID: <20140612120358.0F7DC1D2D4D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder-perf Changeset: r72032:ef6d3281e46e Date: 2014-06-12 14:03 +0200 http://bitbucket.org/pypy/pypy/changeset/ef6d3281e46e/ Log: Update the test diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py --- a/pypy/module/pypyjit/test_pypy_c/test_string.py +++ b/pypy/module/pypyjit/test_pypy_c/test_string.py @@ -102,38 +102,42 @@ assert log.result == main(1000) loop, = log.loops_by_filename(self.filepath) assert loop.match(""" - i7 = int_gt(i4, 0) - guard_true(i7, descr=...) + i82 = int_gt(i77, 0) + guard_true(i82, descr=...) guard_not_invalidated(descr=...) - p9 = call(ConstClass(ll_int2dec__Signed), i4, descr=) + p83 = call(ConstClass(ll_int2dec__Signed), i77, descr=) guard_no_exception(descr=...) - i10 = strlen(p9) - i11 = int_is_true(i10) - guard_true(i11, descr=...) - i13 = strgetitem(p9, 0) - i15 = int_eq(i13, 45) - guard_false(i15, descr=...) - i17 = int_neg(i10) - i19 = int_gt(i10, 23) - guard_false(i19, descr=...) - p21 = newstr(23) - copystrcontent(p9, p21, 0, 0, i10) - i25 = int_add(1, i10) - i26 = int_gt(i25, 23) - guard_false(i26, descr=...) - strsetitem(p21, i10, 32) - i30 = int_add(i10, i25) - i31 = int_gt(i30, 23) - guard_false(i31, descr=...) - copystrcontent(p9, p21, 0, i25, i10) - i33 = int_lt(i30, 23) - guard_true(i33, descr=...) - p35 = call(ConstClass(ll_shrink_array__rpy_stringPtr_Signed), p21, i30, descr=) + i84 = strlen(p83) + i85 = int_is_true(i84) + guard_true(i85, descr=...) + i86 = strgetitem(p83, 0) + i87 = int_eq(i86, 45) + guard_false(i87, descr=...) + i88 = int_neg(i84) + i89 = int_add(24, i84) + i90 = uint_le(i89, 56) + guard_true(i90, descr=...) + p92 = newstr(32) + copystrcontent(p83, p92, 0, 0, i84) + i93 = uint_lt(i89, 56) + guard_true(i93, descr=...) + i94 = int_add(i89, 1) + strsetitem(p92, i84, 32) + i95 = int_add(i94, i84) + i96 = uint_le(i95, 56) + guard_true(i96, descr=...) + i97 = int_sub(i94, 24) + copystrcontent(p83, p92, 0, i97, i84) + i98 = int_sub(56, i95) + i99 = int_sub(32, i98) + i100 = int_ne(32, i99) + guard_true(i100, descr=...) + p101 = call(ConstClass(ll_shrink_array__rpy_stringPtr_Signed), p92, i99, descr=) guard_no_exception(descr=...) - i37 = strlen(p35) - i38 = int_add_ovf(i5, i37) + i102 = strlen(p101) + i103 = int_add_ovf(i75, i102) guard_no_overflow(descr=...) - i40 = int_sub(i4, 1) + i104 = int_sub(i77, 1) --TICK-- jump(..., descr=...) """) From noreply at buildbot.pypy.org Thu Jun 12 14:10:12 2014 From: noreply at buildbot.pypy.org (arigo) Date: Thu, 12 Jun 2014 14:10:12 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder-perf: Tweak: don't give a minimal size (if we know exactly how many chars Message-ID: <20140612121012.9441D1D2D54@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder-perf Changeset: r72033:b28423c21e95 Date: 2014-06-12 14:09 +0200 http://bitbucket.org/pypy/pypy/changeset/b28423c21e95/ Log: Tweak: don't give a minimal size (if we know exactly how many chars we need and it's less than 32). But resize more aggressively when we're still small. diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -54,12 +54,12 @@ @enforceargs(None, int) def stringbuilder_grow(ll_builder, needed): - needed += 7 try: needed = ovfcheck(needed + ll_builder.total_size) except OverflowError: raise MemoryError - needed &= ~7 + needed += 63 + needed &= ~63 # new_piece = lltype.malloc(STRINGPIECE) charsize = ll_builder.charsize @@ -68,7 +68,7 @@ except OverflowError: raise MemoryError new_piece.piece_lgt = needed_chars - raw_ptr = lltype.malloc(rffi.CCHARP.TO, needed * charsize, flavor='raw') + raw_ptr = lltype.malloc(rffi.CCHARP.TO, needed_chars, flavor='raw') new_piece.raw_ptr = raw_ptr new_piece.prev_piece = ll_builder.extra_pieces ll_builder.extra_pieces = new_piece @@ -201,7 +201,7 @@ @classmethod def ll_new(cls, init_size): - init_size = max(min(init_size, 1280), 32) + init_size = min(init_size, 1280) ll_builder = lltype.malloc(cls.lowleveltype.TO) ll_builder.current_buf = cls.mallocfn(init_size) ofs = ll_baseofs(ll_builder.current_buf) From noreply at buildbot.pypy.org Thu Jun 12 14:12:51 2014 From: noreply at buildbot.pypy.org (arigo) Date: Thu, 12 Jun 2014 14:12:51 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder-perf: More tweaks Message-ID: <20140612121251.32E531D2411@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder-perf Changeset: r72034:df22a674b249 Date: 2014-06-12 14:11 +0200 http://bitbucket.org/pypy/pypy/changeset/df22a674b249/ Log: More tweaks diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -54,19 +54,15 @@ @enforceargs(None, int) def stringbuilder_grow(ll_builder, needed): + charsize = ll_builder.charsize try: needed = ovfcheck(needed + ll_builder.total_size) + needed = ovfcheck(needed + 63) & ~63 + needed_chars = ovfcheck(needed * charsize) except OverflowError: raise MemoryError - needed += 63 - needed &= ~63 # new_piece = lltype.malloc(STRINGPIECE) - charsize = ll_builder.charsize - try: - needed_chars = needed * charsize - except OverflowError: - raise MemoryError new_piece.piece_lgt = needed_chars raw_ptr = lltype.malloc(rffi.CCHARP.TO, needed_chars, flavor='raw') new_piece.raw_ptr = raw_ptr From noreply at buildbot.pypy.org Thu Jun 12 17:30:51 2014 From: noreply at buildbot.pypy.org (mattip) Date: Thu, 12 Jun 2014 17:30:51 +0200 (CEST) Subject: [pypy-commit] pypy default: whoops on parallel_runs of tests Message-ID: <20140612153051.88C611D2411@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72035:bf4a503315e1 Date: 2014-06-12 18:30 +0300 http://bitbucket.org/pypy/pypy/changeset/bf4a503315e1/ Log: whoops on parallel_runs of tests diff --git a/testrunner/runner.py b/testrunner/runner.py --- a/testrunner/runner.py +++ b/testrunner/runner.py @@ -234,7 +234,7 @@ N = run_param.parallel_runs if N > 1: - out.write("running %d parallel test workers") + out.write("running %d parallel test workers" % N) failure = False for testname in testdirs: @@ -355,7 +355,7 @@ help="configuration python file (optional)") parser.add_option("--root", dest="root", default=".", help="root directory for the run") - parser.add_option("--parallel-runs", dest="parallel_runs", default=1, + parser.add_option("--parallel-runs", dest="parallel_runs", default=0, type="int", help="number of parallel test runs") parser.add_option("--dry-run", dest="dry_run", default=False, From noreply at buildbot.pypy.org Thu Jun 12 20:14:56 2014 From: noreply at buildbot.pypy.org (Hubert Hesse) Date: Thu, 12 Jun 2014 20:14:56 +0200 (CEST) Subject: [pypy-commit] lang-smalltalk stmgc-c7: Float asString produced wrong results because image instances were read wrongly Message-ID: <20140612181456.5A9BC1D2D4A@cobra.cs.uni-duesseldorf.de> Author: Hubert Hesse Branch: stmgc-c7 Changeset: r845:33317024f2e6 Date: 2014-06-12 20:14 +0200 http://bitbucket.org/pypy/lang-smalltalk/changeset/33317024f2e6/ Log: Float asString produced wrong results because image instances were read wrongly diff --git a/spyvm/squeakimage.py b/spyvm/squeakimage.py --- a/spyvm/squeakimage.py +++ b/spyvm/squeakimage.py @@ -561,8 +561,9 @@ return bytes[:stop] # omit odd bytes def get_ruints(self, required_len=-1): - from rpython.rlib.rarithmetic import r_uint - words = [r_uint(x) for x in self.chunk.data] + from rpython.rlib.rarithmetic import r_uint32 + # XXX: Fix for 64bit image support + words = [r_uint32(x) for x in self.chunk.data] if required_len != -1 and len(words) != required_len: raise CorruptImageError("Expected %d words, got %d" % (required_len, len(words))) return words From noreply at buildbot.pypy.org Thu Jun 12 23:58:14 2014 From: noreply at buildbot.pypy.org (mattip) Date: Thu, 12 Jun 2014 23:58:14 +0200 (CEST) Subject: [pypy-commit] pypy default: typo Message-ID: <20140612215814.AA3321D2D4D@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72036:56d1c343a38c Date: 2014-06-12 20:20 +0300 http://bitbucket.org/pypy/pypy/changeset/56d1c343a38c/ Log: typo diff --git a/testrunner/runner.py b/testrunner/runner.py --- a/testrunner/runner.py +++ b/testrunner/runner.py @@ -234,7 +234,7 @@ N = run_param.parallel_runs if N > 1: - out.write("running %d parallel test workers" % N) + out.write("running %d parallel test workers\n" % N) failure = False for testname in testdirs: From noreply at buildbot.pypy.org Thu Jun 12 23:58:15 2014 From: noreply at buildbot.pypy.org (mattip) Date: Thu, 12 Jun 2014 23:58:15 +0200 (CEST) Subject: [pypy-commit] pypy ufuncapi: wip Message-ID: <20140612215815.F18161D2D4D@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: ufuncapi Changeset: r72037:baa6d5815247 Date: 2014-06-12 23:45 +0300 http://bitbucket.org/pypy/pypy/changeset/baa6d5815247/ Log: wip diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py --- a/pypy/module/micronumpy/loop.py +++ b/pypy/module/micronumpy/loop.py @@ -85,6 +85,29 @@ greens = ['shapelen', 'dtype'], reds = 'auto') +call_many_to_one_driver = jit.JitDriver( + name='numpy_call_many_to_one', + greens=['shapelen', 'func', 'res_dtype'], + reds='auto') + +def call_many_to_one(space, shape, func, res_dtype, w_in, out): + # out must hav been built. func needs no calc_type, is usually an + # external ufunc + iters_and_states = [i.create_iter(shape) for i in w_in] + shapelen = len(shape) + while not out_iter.done(out_state): + call_many_to_one_driver.jit_merge_point(shapelen=shapelen, func=func, + res_dtype=res_dtype) + vals = [None] + [i_s[0].getitem(i_s[1]) for i_s in iters_and_states] + arglist = space.wrap(vals) + out_val = space.call_args(func, Arguments.frompacked(space, arglist)) + out_iter.setitem(out_state, out_val.convert_to(space, res_dtype)) + for i in range(len(iters_and_states)): + iters_and_states[i][1] = iters_and_states[i][0].next(iters_and_states[i][1]) + out_state = out_iter.next(out_state) + return out + + def setslice(space, shape, target, source): # note that unlike everything else, target and source here are # array implementations, not arrays diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py --- a/pypy/module/micronumpy/test/test_ufuncs.py +++ b/pypy/module/micronumpy/test/test_ufuncs.py @@ -208,9 +208,9 @@ except NotImplementedError as e: assert 'object' in str(e) # Use pypy specific extension for out_dtype - myufunc = frompyfunc(adder, 2, 1, out_dtype='match') - int_func22 = frompyfunc(int, 2, 2, out_dtype='match') - int_func12 = frompyfunc(int, 1, 2, out_dtype='match') + myufunc = frompyfunc(adder, 2, 1, dtypes=['match']) + int_func22 = frompyfunc(int, 2, 2, dtypes=['match']) + int_func12 = frompyfunc(int, 1, 2, dtypes=['match']) retype = dtype(int) assert isinstance(myufunc, ufunc) res = myufunc(arange(10), arange(10)) diff --git a/pypy/module/micronumpy/ufuncs.py b/pypy/module/micronumpy/ufuncs.py --- a/pypy/module/micronumpy/ufuncs.py +++ b/pypy/module/micronumpy/ufuncs.py @@ -527,8 +527,11 @@ index = self.type_resolver(space, inargs, outargs) self.alloc_outargs(space, index, inargs, outargs) # XXX handle inner-loop indexing - # XXX JIT_me - raise oefmt(space.w_NotImplementedError, 'not implemented yet') + if len(outargs) < 2: + return loop.call_many_to_one(space, new_shape, self.func, + res_dtype, inargs, outargs[0]) + return loop.call_many_to_many(space, new_shape, self.func, + res_dtype, inargs, out) def type_resolver(self, space, index, outargs): # Find a match for the inargs.dtype in self.dtypes, like @@ -951,8 +954,7 @@ if space.is_none(w_dtypes) and not signature: raise oefmt(space.w_NotImplementedError, 'object dtype requested but not implemented') - #dtypes=[descriptor.get_dtype_cache(space).w_objectdtype] - elif space.isinstance_w(w_dtypes, space.w_str): + if space.isinstance_w(w_dtypes, space.w_str): if not space.str_w(w_dtypes) == 'match': raise oefmt(space.w_ValueError, 'unknown out_dtype value "%s"', space.str_w(w_dtypes)) @@ -974,14 +976,17 @@ raise oefmt(space.w_ValueError, 'identity must be 0, 1, or None') if nin==1 and nout==1 and dtypes == 'match': - w_ret = W_Ufunc1(func[0], name) + w_ret = W_Ufunc1(wrap_ext_func(func[0], name) elif nin==2 and nout==1 and dtypes == 'match': - def _func(calc_dtype, w_left, w_right): - arglist = space.wrap([w_left, w_right]) - return space.call_args(func[0], Arguments.frompacked(space, arglist)) - w_ret = W_Ufunc2(_func, name) + w_ret = W_Ufunc2(wrap_ext_func(func[0]), name) else: w_ret = W_UfuncGeneric(space, func, name, identity, nin, nout, dtypes, signature) if doc: w_ret.w_doc = space.wrap(doc) return w_ret + +def wrap_ext_func(func): + def _func(calc_dtype, w_left, w_right): + arglist = space.wrap([w_left, w_right]) + return space.call_args(func, Arguments.frompacked(space, arglist)) + return _func From noreply at buildbot.pypy.org Thu Jun 12 23:58:17 2014 From: noreply at buildbot.pypy.org (mattip) Date: Thu, 12 Jun 2014 23:58:17 +0200 (CEST) Subject: [pypy-commit] pypy default: fix test Message-ID: <20140612215817.6B6851D2D4D@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72038:c2bd8af00b27 Date: 2014-06-13 00:57 +0300 http://bitbucket.org/pypy/pypy/changeset/c2bd8af00b27/ Log: fix test diff --git a/pypy/tool/release/test/test_package.py b/pypy/tool/release/test/test_package.py --- a/pypy/tool/release/test/test_package.py +++ b/pypy/tool/release/test/test_package.py @@ -115,9 +115,8 @@ basedir = dirname(dirname(dirname(dirname(dirname(abspath(__file__)))))) options.no_tk = False if sys.platform == 'win32': - # Following recommended build setup at - # http://doc.pypy.org/en/latest/windows.html#abridged-method-for-ojit-builds-using-visual-studio-2008 - options.license_base = dirname(basedir) + '/local' + # as on buildbot YMMV + options.license_base = os.path.join(basedir, r'..\..\..\local') else: options.license_base = '/usr/share/doc' license = package.generate_license(py.path.local(basedir), options) From noreply at buildbot.pypy.org Fri Jun 13 14:34:35 2014 From: noreply at buildbot.pypy.org (arigo) Date: Fri, 13 Jun 2014 14:34:35 +0200 (CEST) Subject: [pypy-commit] pypy default: Compress the numerous lists 'rd_locs' into 2 bytes instead of WORD bytes Message-ID: <20140613123435.530521D23C1@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72039:517570d93b88 Date: 2014-06-13 14:06 +0200 http://bitbucket.org/pypy/pypy/changeset/517570d93b88/ Log: Compress the numerous lists 'rd_locs' into 2 bytes instead of WORD bytes per entry. diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py --- a/rpython/jit/backend/llsupport/assembler.py +++ b/rpython/jit/backend/llsupport/assembler.py @@ -141,15 +141,16 @@ else: coeff = 2 for pos in descr.rd_locs: - if pos == -1: + pos = rffi.cast(lltype.Signed, pos) + if pos == 0xFFFF: continue - elif pos < GPR_REGS * WORD: - locs.append(self.cpu.gen_regs[pos // WORD]) - elif pos < (GPR_REGS + XMM_REGS * coeff) * WORD: - pos = (pos // WORD - GPR_REGS) // coeff + elif pos < GPR_REGS: + locs.append(self.cpu.gen_regs[pos]) + elif pos < GPR_REGS + XMM_REGS * coeff: + pos = (pos - GPR_REGS) // coeff locs.append(self.cpu.float_regs[pos]) else: - i = pos // WORD - self.cpu.JITFRAME_FIXED_SIZE + i = pos - self.cpu.JITFRAME_FIXED_SIZE assert i >= 0 tp = inputargs[input_i].type locs.append(self.new_stack_loc(i, pos, tp)) @@ -167,12 +168,15 @@ fail_descr = cast_instance_to_gcref(guardtok.faildescr) fail_descr = rffi.cast(lltype.Signed, fail_descr) base_ofs = self.cpu.get_baseofs_of_frame_field() - positions = [0] * len(guardtok.fail_locs) + positions = [rffi.cast(rffi.USHORT, 0)] * len(guardtok.fail_locs) for i, loc in enumerate(guardtok.fail_locs): if loc is None: - positions[i] = -1 + position = 0xFFFF elif loc.is_stack(): - positions[i] = loc.value - base_ofs + assert (loc.value & (WORD - 1)) == 0, \ + "store_info_on_descr: misaligned" + position = (loc.value - base_ofs) // WORD + assert 0 < position < 0xFFFF, "store_info_on_descr: overflow!" else: assert loc is not self.cpu.frame_reg # for now if self.cpu.IS_64_BIT: @@ -180,10 +184,10 @@ else: coeff = 2 if loc.is_float(): - v = len(self.cpu.gen_regs) + loc.value * coeff + position = len(self.cpu.gen_regs) + loc.value * coeff else: - v = self.cpu.all_reg_indexes[loc.value] - positions[i] = v * WORD + position = self.cpu.all_reg_indexes[loc.value] + positions[i] = rffi.cast(rffi.USHORT, position) # write down the positions of locs guardtok.faildescr.rd_locs = positions # we want the descr to keep alive diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py --- a/rpython/jit/backend/llsupport/llmodel.py +++ b/rpython/jit/backend/llsupport/llmodel.py @@ -4,7 +4,7 @@ from rpython.rtyper.annlowlevel import llhelper, MixLevelHelperAnnotator from rpython.rtyper.llannotation import lltype_to_annotation from rpython.rlib.objectmodel import we_are_translated, specialize -from rpython.jit.metainterp import history +from rpython.jit.metainterp import history, compile from rpython.jit.codewriter import heaptracker, longlong from rpython.jit.backend.model import AbstractCPU from rpython.jit.backend.llsupport import symbolic, jitframe @@ -342,10 +342,7 @@ def _decode_pos(self, deadframe, index): descr = self.get_latest_descr(deadframe) - if descr.final_descr: - assert index == 0 - return 0 - return descr.rd_locs[index] + return rffi.cast(lltype.Signed, descr.rd_locs[index]) * WORD def get_int_value(self, deadframe, index): pos = self._decode_pos(deadframe, index) @@ -659,3 +656,8 @@ calldescr.verify_types(args_i, args_r, args_f, history.VOID) # the 'i' return value is ignored (and nonsense anyway) calldescr.call_stub_i(func, args_i, args_r, args_f) + + +final_descr_rd_locs = [rffi.cast(rffi.USHORT, 0)] +history.BasicFinalDescr.rd_locs = final_descr_rd_locs +compile._DoneWithThisFrameDescr.rd_locs = final_descr_rd_locs diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py --- a/rpython/jit/backend/test/runner_test.py +++ b/rpython/jit/backend/test/runner_test.py @@ -344,9 +344,12 @@ assert res == 2 + i def test_finish(self): + from rpython.jit.backend.llsupport.llmodel import final_descr_rd_locs + i0 = BoxInt() class UntouchableFailDescr(AbstractFailDescr): final_descr = True + rd_locs = final_descr_rd_locs def __setattr__(self, name, value): if (name == 'index' or name == '_carry_around_for_tests' From noreply at buildbot.pypy.org Fri Jun 13 15:40:59 2014 From: noreply at buildbot.pypy.org (mattip) Date: Fri, 13 Jun 2014 15:40:59 +0200 (CEST) Subject: [pypy-commit] pypy default: fix test for windows Message-ID: <20140613134059.7FFE61C1363@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72040:9cf39ce9af62 Date: 2014-06-13 12:12 +0300 http://bitbucket.org/pypy/pypy/changeset/9cf39ce9af62/ Log: fix test for windows diff --git a/pypy/tool/release/test/test_package.py b/pypy/tool/release/test/test_package.py --- a/pypy/tool/release/test/test_package.py +++ b/pypy/tool/release/test/test_package.py @@ -18,9 +18,16 @@ pypy_c = py.path.local(pypydir).join('goal', basename) if not pypy_c.check(): if sys.platform == 'win32': - assert False, "test on win32 requires exe" - pypy_c.write("#!/bin/sh") - pypy_c.chmod(0755) + import os, shutil + for d in os.environ['PATH'].split(';'): + if os.path.exists(os.path.join(d, 'cmd.exe')): + shutil.copy(os.path.join(d, 'cmd.exe'), str(pypy_c)) + break + else: + assert False, 'could not find cmd.exe' + else: + pypy_c.write("#!/bin/sh") + pypy_c.chmod(0755) fake_pypy_c = True else: fake_pypy_c = False @@ -108,7 +115,7 @@ check(pypy, 0755) def test_generate_license(): - from os.path import dirname, abspath + from os.path import dirname, abspath, join class Options(object): pass options = Options() @@ -116,7 +123,7 @@ options.no_tk = False if sys.platform == 'win32': # as on buildbot YMMV - options.license_base = os.path.join(basedir, r'..\..\..\local') + options.license_base = join(basedir, r'..\..\..\local') else: options.license_base = '/usr/share/doc' license = package.generate_license(py.path.local(basedir), options) From noreply at buildbot.pypy.org Fri Jun 13 15:41:00 2014 From: noreply at buildbot.pypy.org (mattip) Date: Fri, 13 Jun 2014 15:41:00 +0200 (CEST) Subject: [pypy-commit] pypy default: skip most of the test if no objdump available Message-ID: <20140613134100.C5D521C1363@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72041:319216b229b6 Date: 2014-06-13 12:28 +0300 http://bitbucket.org/pypy/pypy/changeset/319216b229b6/ Log: skip most of the test if no objdump available diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py --- a/rpython/jit/backend/test/runner_test.py +++ b/rpython/jit/backend/test/runner_test.py @@ -3814,7 +3814,7 @@ from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU if not isinstance(self.cpu, AbstractLLCPU): py.test.skip("pointless test on non-asm") - from rpython.jit.backend.tool.viewcode import machine_code_dump + from rpython.jit.backend.tool.viewcode import machine_code_dump, ObjdumpNotFound import ctypes targettoken = TargetToken() ops = """ @@ -3852,13 +3852,17 @@ assert mc[i].split("\t")[2].startswith(ops[i]) data = ctypes.string_at(info.asmaddr, info.asmlen) - mc = list(machine_code_dump(data, info.asmaddr, cpuname)) - lines = [line for line in mc if line.count('\t') >= 2] - checkops(lines, self.add_loop_instructions) - data = ctypes.string_at(bridge_info.asmaddr, bridge_info.asmlen) - mc = list(machine_code_dump(data, bridge_info.asmaddr, cpuname)) - lines = [line for line in mc if line.count('\t') >= 2] - checkops(lines, self.bridge_loop_instructions) + try: + mc = list(machine_code_dump(data, info.asmaddr, cpuname)) + lines = [line for line in mc if line.count('\t') >= 2] + checkops(lines, self.add_loop_instructions) + data = ctypes.string_at(bridge_info.asmaddr, bridge_info.asmlen) + mc = list(machine_code_dump(data, bridge_info.asmaddr, cpuname)) + lines = [line for line in mc if line.count('\t') >= 2] + checkops(lines, self.bridge_loop_instructions) + except ObjdumpNotFound: + py.test.skip("requires (g)objdump") + def test_compile_bridge_with_target(self): From noreply at buildbot.pypy.org Fri Jun 13 15:41:01 2014 From: noreply at buildbot.pypy.org (mattip) Date: Fri, 13 Jun 2014 15:41:01 +0200 (CEST) Subject: [pypy-commit] pypy default: work around windows limitations with PYPYLOG=C:\some\path and copy complete env to subprocess Message-ID: <20140613134101.EABE01C1363@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72042:8fe4873384ce Date: 2014-06-13 15:52 +0300 http://bitbucket.org/pypy/pypy/changeset/8fe4873384ce/ Log: work around windows limitations with PYPYLOG=C:\some\path and copy complete env to subprocess diff --git a/rpython/jit/backend/llsupport/test/zrpy_gc_test.py b/rpython/jit/backend/llsupport/test/zrpy_gc_test.py --- a/rpython/jit/backend/llsupport/test/zrpy_gc_test.py +++ b/rpython/jit/backend/llsupport/test/zrpy_gc_test.py @@ -107,7 +107,9 @@ def run(cbuilder, args=''): # pypylog = udir.join('test_zrpy_gc.log') - data = cbuilder.cmdexec(args, env={'PYPYLOG': ':%s' % pypylog}) + env = os.environ.copy() + env['PYPYLOG'] = ':%s' % pypylog + data = cbuilder.cmdexec(args, env=env) return data.strip() # ______________________________________________________________________ @@ -179,8 +181,9 @@ def run(self, name, n=2000): pypylog = udir.join('TestCompileFramework.log') - env = {'PYPYLOG': ':%s' % pypylog, - 'PYPY_NO_INLINE_MALLOC': '1'} + env = os.environ.copy() + env['PYPYLOG'] = ':%s' % pypylog + env['PYPY_NO_INLINE_MALLOC'] = '1' self._run(name, n, env) env['PYPY_NO_INLINE_MALLOC'] = '' self._run(name, n, env) From noreply at buildbot.pypy.org Fri Jun 13 15:41:03 2014 From: noreply at buildbot.pypy.org (mattip) Date: Fri, 13 Jun 2014 15:41:03 +0200 (CEST) Subject: [pypy-commit] pypy default: no asmgcc on MSVC Message-ID: <20140613134103.1B66C1C1363@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72043:ff3718cdc0c1 Date: 2014-06-13 16:12 +0300 http://bitbucket.org/pypy/pypy/changeset/ff3718cdc0c1/ Log: no asmgcc on MSVC diff --git a/rpython/jit/backend/x86/test/test_zrpy_releasegil.py b/rpython/jit/backend/x86/test/test_zrpy_releasegil.py --- a/rpython/jit/backend/x86/test/test_zrpy_releasegil.py +++ b/rpython/jit/backend/x86/test/test_zrpy_releasegil.py @@ -1,9 +1,11 @@ from rpython.jit.backend.llsupport.test.zrpy_releasegil_test import ReleaseGILTests +from rpython.translator.platform import platform as compiler class TestShadowStack(ReleaseGILTests): gcrootfinder = "shadowstack" -class TestAsmGcc(ReleaseGILTests): - gcrootfinder = "asmgcc" +if compiler.name != 'msvc': + class TestAsmGcc(ReleaseGILTests): + gcrootfinder = "asmgcc" From noreply at buildbot.pypy.org Fri Jun 13 15:41:04 2014 From: noreply at buildbot.pypy.org (mattip) Date: Fri, 13 Jun 2014 15:41:04 +0200 (CEST) Subject: [pypy-commit] pypy default: do not use asmgcc on MSVC Message-ID: <20140613134104.45E991C1363@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72044:72420a9c1aba Date: 2014-06-13 16:39 +0300 http://bitbucket.org/pypy/pypy/changeset/72420a9c1aba/ Log: do not use asmgcc on MSVC diff --git a/rpython/jit/backend/x86/test/test_ztranslation_external_exception.py b/rpython/jit/backend/x86/test/test_ztranslation_external_exception.py --- a/rpython/jit/backend/x86/test/test_ztranslation_external_exception.py +++ b/rpython/jit/backend/x86/test/test_ztranslation_external_exception.py @@ -1,12 +1,19 @@ from rpython.jit.backend.llsupport.test.ztranslation_test import TranslationRemoveTypePtrTest from rpython.translator.translator import TranslationContext from rpython.config.translationoption import DEFL_GC +from rpython.translator.platform import platform as compiler + +if compiler.name == 'msvc': + _MSVC = True +else: + _MSVC = False class TestTranslationRemoveTypePtrX86(TranslationRemoveTypePtrTest): def _get_TranslationContext(self): t = TranslationContext() t.config.translation.gc = DEFL_GC # 'hybrid' or 'minimark' - t.config.translation.gcrootfinder = 'asmgcc' + if not _MSVC: + t.config.translation.gcrootfinder = 'asmgcc' t.config.translation.list_comprehension_operations = True t.config.translation.gcremovetypeptr = True return t From noreply at buildbot.pypy.org Fri Jun 13 15:45:57 2014 From: noreply at buildbot.pypy.org (arigo) Date: Fri, 13 Jun 2014 15:45:57 +0200 (CEST) Subject: [pypy-commit] pypy default: Support "PYPYLOG=+filename", equivalent to "PYPYLOG=filename" but works Message-ID: <20140613134557.582C81C1363@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72045:ded4fbaff95a Date: 2014-06-13 15:45 +0200 http://bitbucket.org/pypy/pypy/changeset/ded4fbaff95a/ Log: Support "PYPYLOG=+filename", equivalent to "PYPYLOG=filename" but works also if the file name happens to contain a colon. diff --git a/rpython/translator/c/src/debug_print.c b/rpython/translator/c/src/debug_print.c --- a/rpython/translator/c/src/debug_print.c +++ b/rpython/translator/c/src/debug_print.c @@ -32,9 +32,14 @@ if (filename && filename[0]) { char *colon = strchr(filename, ':'); + if (filename[0] == '+') + { + filename += 1; + colon = NULL; + } if (!colon) { - /* PYPYLOG=filename --- profiling version */ + /* PYPYLOG=+filename (or just 'filename') --- profiling version */ debug_profile = 1; pypy_setup_profiling(); } diff --git a/rpython/translator/c/test/test_standalone.py b/rpython/translator/c/test/test_standalone.py --- a/rpython/translator/c/test/test_standalone.py +++ b/rpython/translator/c/test/test_standalone.py @@ -397,6 +397,22 @@ assert 'cat2}' in data assert 'baz' not in data assert 'bok' not in data + # check with PYPYLOG=+somefilename + path = udir.join('test_debug_xxx_prof_2.log') + out, err = cbuilder.cmdexec("", err=True, env={'PYPYLOG': '+%s' % path}) + size = os.stat(str(path)).st_size + assert out.strip() == 'got:a.' + str(size) + '.' + assert not err + assert path.check(file=1) + data = path.read() + assert 'toplevel' in data + assert '{mycat' in data + assert 'mycat}' in data + assert 'foo 2 bar 3' not in data + assert '{cat2' in data + assert 'cat2}' in data + assert 'baz' not in data + assert 'bok' not in data # check with PYPYLOG=myc:somefilename (includes mycat but not cat2) path = udir.join('test_debug_xxx_myc.log') out, err = cbuilder.cmdexec("", err=True, From noreply at buildbot.pypy.org Fri Jun 13 16:07:01 2014 From: noreply at buildbot.pypy.org (mattip) Date: Fri, 13 Jun 2014 16:07:01 +0200 (CEST) Subject: [pypy-commit] pypy default: document Message-ID: <20140613140701.C0F0F1D2D9F@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72046:737638800736 Date: 2014-06-13 17:01 +0300 http://bitbucket.org/pypy/pypy/changeset/737638800736/ Log: document diff --git a/pypy/doc/config/translation.log.txt b/pypy/doc/config/translation.log.txt --- a/pypy/doc/config/translation.log.txt +++ b/pypy/doc/config/translation.log.txt @@ -2,4 +2,4 @@ These must be enabled by setting the PYPYLOG environment variable. The exact set of features supported by PYPYLOG is described in -pypy/translation/c/src/debug_print.h. +rpython/translator/c/src/debug_print.h. diff --git a/pypy/doc/man/pypy.1.rst b/pypy/doc/man/pypy.1.rst --- a/pypy/doc/man/pypy.1.rst +++ b/pypy/doc/man/pypy.1.rst @@ -95,13 +95,12 @@ ``PYPYLOG`` If set to a non-empty value, enable logging, the format is: - *fname* + *fname* or *+fname* logging for profiling: includes all ``debug_start``/``debug_stop`` but not any nested ``debug_print``. *fname* can be ``-`` to log to *stderr*. - Note that using a : in fname is a bad idea, Windows - users, beware. + The *+fname* form can be used if there is a *:* in fname ``:``\ *fname* Full logging, including ``debug_print``. diff --git a/rpython/translator/c/src/debug_print.h b/rpython/translator/c/src/debug_print.h --- a/rpython/translator/c/src/debug_print.h +++ b/rpython/translator/c/src/debug_print.h @@ -6,12 +6,13 @@ /* values of the PYPYLOG environment variable: ("top-level" debug_prints means not between debug_start and debug_stop) - (empty) logging is turned off, apart from top-level debug_prints + (empty) logging is turned off, apart from top-level debug_prints that go to stderr - fname logging for profiling: includes all debug_start/debug_stop - but not any nested debug_print - :fname full logging - prefix:fname conditional logging + fname or +fname logging for profiling: includes all debug_start/debug_stop + but not any nested debug_print, use +fname if there is a + colon : in fname + :fname full logging + prefix:fname conditional logging prefix1,prefix2:fname conditional logging with multiple selections Conditional logging means that it only includes the debug_start/debug_stop From noreply at buildbot.pypy.org Fri Jun 13 17:15:11 2014 From: noreply at buildbot.pypy.org (arigo) Date: Fri, 13 Jun 2014 17:15:11 +0200 (CEST) Subject: [pypy-commit] extradoc extradoc: Add the PDF here Message-ID: <20140613151511.948FE1C1363@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: extradoc Changeset: r5344:09e5b02e3ed6 Date: 2014-06-13 17:14 +0200 http://bitbucket.org/pypy/extradoc/changeset/09e5b02e3ed6/ Log: Add the PDF here diff --git a/talk/dls2014/paper/paper.pdf b/talk/dls2014/paper/paper.pdf new file mode 100644 index 0000000000000000000000000000000000000000..952e31bae97ac98d48739e0a955a0662d3c64fe5 GIT binary patch [cut] From noreply at buildbot.pypy.org Sat Jun 14 03:21:48 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Sat, 14 Jun 2014 03:21:48 +0200 (CEST) Subject: [pypy-commit] pypy gc-two-end-nursery: record objects with weakref/light_finalizers/finalizers Message-ID: <20140614012148.831DB1C03AC@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc-two-end-nursery Changeset: r72047:0d353260bcad Date: 2014-06-13 21:08 -0400 http://bitbucket.org/pypy/pypy/changeset/0d353260bcad/ Log: record objects with weakref/light_finalizers/finalizers diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -621,23 +621,37 @@ size_gc_header = self.gcheaderbuilder.size_gc_header totalsize = size_gc_header + size rawtotalsize = raw_malloc_usage(totalsize) - min_size = raw_malloc_usage(self.minimal_size_in_nursery) - if rawtotalsize < min_size: - #round up the raw totalsize to min_size - totalsize = rawtotalsize = min_size - result = self.nursery_second_part_free + if needs_finalizer and not is_finalizer_light: + ll_assert(not contains_weakptr, + "'needs_finalizer' and 'contains_weakptr' both specified") + obj = self.external_malloc(typeid, 0, can_make_young=False) + self.objects_with_finalizers.append(obj) + else: + min_size = raw_malloc_usage(self.minimal_size_in_nursery) + if rawtotalsize < min_size: + #round up the raw totalsize to min_size + totalsize = rawtotalsize = min_size + result = self.nursery_second_part_free - #allocate the obj in the opposite direction as obj in malloc_fixedsize_clear() - self.nursery_second_part_free = result - totalsize - #make sure the new object won't overwrite existing objects - if self.nursery_second_part_free < self.nursery_free: - ##TODO:deal with different GC states - result = self.minor_collection() - #move the pointer - result -= totalsize - llarena.arena_reserve(result, totalsize) - #real object beginning address - obj = result + size_gc_header + #allocate the obj in the opposite direction as obj in malloc_fixedsize_clear() + self.nursery_second_part_free = result - totalsize + #make sure the new object won't overwrite existing objects + if self.nursery_second_part_free < self.nursery_free: + ##TODO:deal with different GC states + result = self.minor_collection() + #move the pointer + result -= totalsize + llarena.arena_reserve(result, totalsize) + #init gc flags to zero + self.init_gc_object(result, typeid, flags=0) + #real object beginning address + obj = result + size_gc_header + #record obj with light finalizer + if is_finalizer_light: + self.young_objects_with_light_finalizers.append(obj) + #record obj with weakptr + if contains_weakptr: + self.young_objects_with_weakrefs.append(obj) return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF) def malloc_varsize_clear(self, typeid, length, size, itemsize, From noreply at buildbot.pypy.org Sun Jun 15 12:17:22 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 12:17:22 +0200 (CEST) Subject: [pypy-commit] pypy default: Add a passing test Message-ID: <20140615101722.06A731D2DC2@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72048:24bcf07158cb Date: 2014-06-13 16:32 +0200 http://bitbucket.org/pypy/pypy/changeset/24bcf07158cb/ Log: Add a passing test diff --git a/rpython/jit/metainterp/compile.py b/rpython/jit/metainterp/compile.py --- a/rpython/jit/metainterp/compile.py +++ b/rpython/jit/metainterp/compile.py @@ -603,6 +603,11 @@ increment = jitdriver_sd.warmstate.increment_trace_eagerness return jitcounter.tick(hash, increment) + def get_index_of_guard_value(self): + if (self.status & self.ST_TYPE_MASK) == 0: + return -1 + return intmask(self.status >> self.ST_SHIFT) + def start_compiling(self): # start tracing and compiling from this guard. self.status |= self.ST_BUSY_FLAG diff --git a/rpython/jit/metainterp/test/test_ajit.py b/rpython/jit/metainterp/test/test_ajit.py --- a/rpython/jit/metainterp/test/test_ajit.py +++ b/rpython/jit/metainterp/test/test_ajit.py @@ -1435,6 +1435,16 @@ res = self.meta_interp(f, [299], listops=True) assert res == f(299) self.check_resops(guard_class=0, guard_value=6) + # + # The original 'guard_class' is rewritten to be directly 'guard_value'. + # Check that this rewrite does not interfere with the descr, which + # should be a full-fledged multivalued 'guard_value' descr. + if self.basic: + for loop in get_stats().get_all_loops(): + for op in loop.get_operations(): + if op.getopname() == "guard_value": + descr = op.getdescr() + assert descr.get_index_of_guard_value() >= 0 def test_merge_guardnonnull_guardclass(self): myjitdriver = JitDriver(greens = [], reds = ['x', 'l']) From noreply at buildbot.pypy.org Sun Jun 15 12:17:23 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 12:17:23 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder-perf: Add a comment Message-ID: <20140615101723.34F641D2DC2@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder-perf Changeset: r72049:85bec5137c7b Date: 2014-06-15 11:22 +0200 http://bitbucket.org/pypy/pypy/changeset/85bec5137c7b/ Log: Add a comment diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -35,6 +35,11 @@ # need a way to express c_memcpy() below --- similar to copystrcontent, # but without the assumption that it's about a string (or unicode). # +# XXX alternatively, a simpler solution might be to allocate all pieces +# as GC-managed rstr.STR. To avoid filling the old generation with +# garbage we could add a weakref holding the most recently built chain +# of STRs, and reuse it the next time if it's still there. +# # ------------------------------------------------------------ From noreply at buildbot.pypy.org Sun Jun 15 12:17:24 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 12:17:24 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder2-perf: Trying for a different approach, simplifying the JITted version Message-ID: <20140615101724.653CB1D2DC2@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder2-perf Changeset: r72050:b977712b912f Date: 2014-06-15 11:23 +0200 http://bitbucket.org/pypy/pypy/changeset/b977712b912f/ Log: Trying for a different approach, simplifying the JITted version From noreply at buildbot.pypy.org Sun Jun 15 12:17:25 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 12:17:25 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder2-perf: in-progress Message-ID: <20140615101725.887191D2DC2@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder2-perf Changeset: r72051:335eaac09c76 Date: 2014-06-15 12:15 +0200 http://bitbucket.org/pypy/pypy/changeset/335eaac09c76/ Log: in-progress diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -2,7 +2,6 @@ from rpython.rlib.objectmodel import enforceargs, specialize from rpython.rlib.rarithmetic import ovfcheck, r_uint from rpython.rlib.debug import ll_assert -from rpython.rlib.rgc import must_be_light_finalizer from rpython.rtyper.rptr import PtrRepr from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, rstr from rpython.rtyper.lltypesystem.lltype import staticAdtMethod, nullptr @@ -47,68 +46,38 @@ func._always_inline_ = True return func -def uint_lt(a, b): - return r_uint(a) < r_uint(b) -def uint_le(a, b): - return r_uint(a) <= r_uint(b) -def uint_gt(a, b): - return r_uint(a) > r_uint(b) - def new_grow_funcs(name, mallocfn): @enforceargs(None, int) def stringbuilder_grow(ll_builder, needed): - charsize = ll_builder.charsize try: needed = ovfcheck(needed + ll_builder.total_size) needed = ovfcheck(needed + 63) & ~63 - needed_chars = ovfcheck(needed * charsize) + total_size = ll_builder.total_size + needed except OverflowError: raise MemoryError # - new_piece = lltype.malloc(STRINGPIECE) - new_piece.piece_lgt = needed_chars - raw_ptr = lltype.malloc(rffi.CCHARP.TO, needed_chars, flavor='raw') - new_piece.raw_ptr = raw_ptr - new_piece.prev_piece = ll_builder.extra_pieces - ll_builder.extra_pieces = new_piece - ll_builder.current_ofs = rffi.cast(lltype.Signed, raw_ptr) - ll_builder.current_end = (rffi.cast(lltype.Signed, raw_ptr) + - needed_chars) - try: - ll_builder.total_size = ll_builder.total_size + needed - except OverflowError: - raise MemoryError - if ll_builder.current_buf: - STRTYPE = lltype.typeOf(ll_builder.current_buf).TO - ll_builder.initial_buf = ll_builder.current_buf - ll_builder.current_buf = lltype.nullptr(STRTYPE) - return ll_builder.current_ofs + new_string = mallocfn(needed) + # + old_piece = lltype.malloc(STRINGPIECE) + old_piece.buf = ll_builder.current_buf + old_piece.prev_piece = ll_builder.extra_pieces + ll_builder.extra_pieces = old_piece + ll_builder.current_buf = new_string + ll_builder.current_pos = 0 + ll_builder.current_end = needed + ll_builder.total_size = total_size - def stringbuilder_append_overflow(ll_builder, ll_str): + def stringbuilder_append_overflow(ll_builder, ll_str, size): # First, the part that still fits in the current piece - ofs = ll_builder.current_ofs - part1 = ll_builder.current_end - ofs # in bytes, not (uni)chars - # --- no GC! --- - raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) - rffi.c_memcpy(rffi.ptradd(raw, ofs), - ll_str2raw(ll_str, 0), - part1) - # --- end --- - # Next, the remaining part, in a new piece - part1 //= ll_builder.charsize - part2 = len(ll_str.chars) - part1 # in (uni)chars - ll_assert(part2 > 0, "append_overflow: no overflow") - ofs = stringbuilder_grow(ll_builder, part2) - ll_builder.current_ofs = ofs + part2 * ll_builder.charsize - # --- no GC! --- - ll_assert(not ll_builder.current_buf, "after grow(), current_buf!=NULL") - raw = lltype.nullptr(rffi.CCHARP.TO) - rffi.c_memcpy(rffi.ptradd(raw, ofs), - ll_str2raw(ll_str, part1), - part2 * ll_builder.charsize) - # --- end --- + part1 = ll_builder.current_end - ll_builder.current_pos + start = ll_builder.skip + ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, + start, ll_builder.current_pos, + part1) + ll_builder.skip += part1 + stringbuilder_grow(ll_builder, size - part1) return (func_with_new_name(stringbuilder_grow, '%s_grow' % name), func_with_new_name(stringbuilder_append_overflow, @@ -118,33 +87,22 @@ unicodebuilder_grows = new_grow_funcs('unicodebuilder', rstr.mallocunicode) STRINGPIECE = lltype.GcStruct('stringpiece', - ('raw_ptr', rffi.CCHARP), - ('piece_lgt', lltype.Signed), # in bytes - ('prev_piece', lltype.Ptr(lltype.GcForwardReference())), - rtti=True) + ('buf', lltype.Ptr(STR)), + ('prev_piece', lltype.Ptr(lltype.GcForwardReference()))) STRINGPIECE.prev_piece.TO.become(STRINGPIECE) - at must_be_light_finalizer -def ll_destroy_string_piece(piece): - if piece.raw_ptr: - lltype.free(piece.raw_ptr, flavor='raw') - -_SbRtti = lltype.getRuntimeTypeInfo(STRINGPIECE) -def ll_string_piece_rtti(piece): - return _SbRtti - STRINGBUILDER = lltype.GcStruct('stringbuilder', ('current_buf', lltype.Ptr(STR)), - ('current_ofs', lltype.Signed), + ('current_pos', lltype.Signed), ('current_end', lltype.Signed), ('total_size', lltype.Signed), + ('skip', lltype.Signed), ('extra_pieces', lltype.Ptr(STRINGPIECE)), - ('initial_buf', lltype.Ptr(STR)), adtmeths={ 'grow': staticAdtMethod(stringbuilder_grows[0]), 'append_overflow': staticAdtMethod(stringbuilder_grows[1]), + 'copy_string_contents': staticAdtMethod(rstr.copy_string_contents), 'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_string), - 'charsize': 1, } ) @@ -159,17 +117,11 @@ 'grow': staticAdtMethod(unicodebuilder_grows[0]), 'append_overflow': staticAdtMethod(unicodebuilder_grows[1]), 'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_unicode), - 'charsize': rffi.sizeof(lltype.UniChar), } ) -def ll_baseofs(ll_str): - STRTYPE = lltype.typeOf(ll_str).TO - ofs = rffi.offsetof(STRTYPE, 'chars') + rffi.itemoffsetof(STRTYPE.chars, 0) - return llmemory.raw_malloc_usage(ofs) # for direct run -ll_baseofs._always_inline_ = True - + at always_inline def ll_str2raw(ll_str, charoffset): STRTYPE = lltype.typeOf(ll_str).TO ofs = (rffi.offsetof(STRTYPE, 'chars') + @@ -177,26 +129,9 @@ ofs = llmemory.raw_malloc_usage(ofs) # for direct run ofs += rffi.sizeof(STRTYPE.chars.OF) * charoffset return rffi.ptradd(rffi.cast(rffi.CCHARP, ll_str), ofs) -ll_str2raw._always_inline_ = True - -def ll_rawsetitem(raw, byteoffset, char): - raw = rffi.ptradd(raw, byteoffset) - if lltype.typeOf(char) == lltype.Char: - raw[0] = char - else: - rffi.cast(rffi.CWCHARP, raw)[0] = char -ll_rawsetitem._always_inline_ = True class BaseStringBuilderRepr(AbstractStringBuilderRepr): - - def rtyper_new(self, hop): - destrptr = hop.rtyper.annotate_helper_fn( - ll_destroy_string_piece, [SomePtr(lltype.Ptr(STRINGPIECE))]) - hop.rtyper.attachRuntimeTypeInfoFunc(STRINGPIECE, ll_string_piece_rtti, - STRINGPIECE, destrptr) - return AbstractStringBuilderRepr.rtyper_new(self, hop) - def empty(self): return nullptr(self.lowleveltype.TO) @@ -205,71 +140,25 @@ init_size = min(init_size, 1280) ll_builder = lltype.malloc(cls.lowleveltype.TO) ll_builder.current_buf = cls.mallocfn(init_size) - ofs = ll_baseofs(ll_builder.current_buf) - ll_builder.current_ofs = ofs - ll_builder.current_end = ofs + init_size * ll_builder.charsize + ll_builder.current_pos = 0 + ll_builder.current_end = init_size ll_builder.total_size = init_size return ll_builder @staticmethod @always_inline def ll_append(ll_builder, ll_str): - if jit.we_are_jitted(): - if BaseStringBuilderRepr._ll_jit_try_append_slice( - ll_builder, ll_str, 0, len(ll_str.chars)): - return - BaseStringBuilderRepr._ll_append(ll_builder, ll_str) - - @staticmethod - @always_inline - @jit.dont_look_inside - def _ll_append(ll_builder, ll_str): - lgt = len(ll_str.chars) * ll_builder.charsize # in bytes - ofs = ll_builder.current_ofs - newofs = ofs + lgt - if uint_gt(newofs, ll_builder.current_end): - ll_builder.append_overflow(ll_builder, ll_str) - else: - ll_builder.current_ofs = newofs - # --- no GC! --- - raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) - rffi.c_memcpy(rffi.ptradd(raw, ofs), - ll_str2raw(ll_str, 0), - lgt) - # --- end --- + BaseStringBuilderRepr.ll_append_slice(ll_builder, ll_str, + 0, len(ll_str.chars)) @staticmethod @always_inline def ll_append_char(ll_builder, char): - if jit.we_are_jitted(): - BaseStringBuilderRepr._ll_jit_append_char(ll_builder, char) - else: - BaseStringBuilderRepr._ll_append_char(ll_builder, char) - - @staticmethod - @always_inline - @jit.dont_look_inside - def _ll_append_char(ll_builder, char): - ofs = ll_builder.current_ofs - if ofs == ll_builder.current_end: - ofs = ll_builder.grow(ll_builder, 1) - # --- no GC! --- - raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) - ll_rawsetitem(raw, ofs, char) - # --- end --- - ll_builder.current_ofs = ofs + ll_builder.charsize - - @staticmethod - def _ll_jit_append_char(ll_builder, char): - ofs = ll_builder.current_ofs - if bool(ll_builder.current_buf) and uint_lt(ofs, - ll_builder.current_end): - ll_builder.current_ofs = ofs + ll_builder.charsize - buf = ll_builder.current_buf - index = (ofs - ll_baseofs(buf)) // ll_builder.charsize - buf.chars[index] = char - return - BaseStringBuilderRepr._ll_append_char(ll_builder, char) + jit.conditional_call(ll_builder.current_pos == ll_builder.current_end, + ll_builder.grow, ll_builder, 1) + pos = ll_builder.current_pos + ll_builder.current_pos = pos + 1 + ll_builder.current_buf.chars[pos] = char @staticmethod def ll_append_char_2(ll_builder, char0, char1): @@ -311,33 +200,24 @@ @staticmethod @always_inline def ll_append_slice(ll_builder, ll_str, start, end): + size = end - start if jit.we_are_jitted(): if BaseStringBuilderRepr._ll_jit_try_append_slice( - ll_builder, ll_str, start, end - start): + ll_builder, ll_str, size): return - BaseStringBuilderRepr._ll_append_slice(ll_builder, ll_str, - start, end) + ll_builder.skip = start + jit.conditional_call( + size > ll_builder.current_end - ll_builder.current_pos, + ll_builder.append_overflow, ll_builder, ll_str, size) + start = ll_builder.skip + size = end - start + pos = ll_builder.current_pos + ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, + start, pos, size) + ll_builder.current_pos = pos + size @staticmethod - @jit.dont_look_inside - def _ll_append_slice(ll_builder, ll_str, start, end): - lgt = (end - start) * ll_builder.charsize # in bytes - ofs = ll_builder.current_ofs - newofs = ofs + lgt - if uint_gt(newofs, ll_builder.current_end): - ll_str = rstr.LLHelpers.ll_stringslice_startstop(ll_str, start, end) - ll_builder.append_overflow(ll_builder, ll_str) - else: - ll_builder.current_ofs = newofs - # --- no GC! --- - raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) - rffi.c_memcpy(rffi.ptradd(raw, ofs), - ll_str2raw(ll_str, start), - lgt) - # --- end --- - - @staticmethod - def _ll_jit_try_append_slice(ll_builder, ll_str, start, size): + def _ll_jit_try_append_slice(ll_builder, ll_str, size): if jit.isconstant(size): if size == 0: return True @@ -376,20 +256,20 @@ @staticmethod @jit.dont_look_inside def _ll_append_multiple_char(ll_builder, char, times): - lgt = times * ll_builder.charsize # in bytes - ofs = ll_builder.current_ofs - newofs = ofs + lgt - if uint_gt(newofs, ll_builder.current_end): - ll_str = rstr.LLHelpers.ll_char_mul(char, times) - ll_builder.append_overflow(ll_builder, ll_str) - else: - ll_builder.current_ofs = newofs - # --- no GC! --- - raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) - while uint_lt(ofs, newofs): - ll_rawsetitem(raw, ofs, char) - ofs += ll_builder.charsize - # --- end --- + part1 = ll_builder.current_end - ll_builder.current_pos + if times > part1: + times -= part1 + buf = ll_builder.current_buf + for i in xrange(ll_builder.current_pos, ll_builder.current_end): + buf.chars[i] = char + ll_builder.grow(ll_builder, times) + # + buf = ll_builder.current_buf + pos = ll_builder.current_pos + end = pos + times + ll_builder.current_pos = end + for i in xrange(pos, end): + buf.chars[i] = char @staticmethod def _ll_jit_try_append_multiple_char(ll_builder, char, size): @@ -437,26 +317,22 @@ @always_inline def ll_getlength(ll_builder): num_chars_missing_from_last_piece = ( - (ll_builder.current_end - ll_builder.current_ofs) - // ll_builder.charsize) + ll_builder.current_end - ll_builder.current_pos) return ll_builder.total_size - num_chars_missing_from_last_piece @classmethod def ll_build(cls, ll_builder): - buf = ll_builder.current_buf - if buf: + if not ll_builder.extra_pieces: # fast-path: the result fits in a single buf. - # it is already a GC string - final_size = cls.ll_getlength(ll_builder) - ll_assert(final_size >= 0, "negative final_size") + final_size = ll_builder.current_pos + buf = ll_builder.current_buf if ll_builder.total_size != final_size: ll_assert(final_size < ll_builder.total_size, "final_size > ll_builder.total_size?") buf = rgc.ll_shrink_array(buf, final_size) + ll_builder.current_buf = buf + ll_builder.current_end = final_size ll_builder.total_size = final_size - ll_builder.current_buf = buf - ll_builder.current_ofs = 0 - ll_builder.current_end = 0 return buf else: return BaseStringBuilderRepr._ll_build_extra(cls, ll_builder) @@ -467,37 +343,29 @@ final_size = cls.ll_getlength(ll_builder) ll_assert(final_size >= 0, "negative final_size") extra = ll_builder.extra_pieces - ll_assert(bool(extra), "build() twice on a StringBuilder") ll_builder.extra_pieces = lltype.nullptr(STRINGPIECE) + # result = cls.mallocfn(final_size) - piece_lgt = ll_builder.current_ofs - rffi.cast(lltype.Signed, # in bytes - extra.raw_ptr) - ll_assert(piece_lgt == extra.piece_lgt - (ll_builder.current_end - - ll_builder.current_ofs), + piece = ll_builder.current_buf + piece_lgt = ll_builder.current_pos + ll_assert(ll_builder.current_end == len(piece.chars), "bogus last piece_lgt") ll_builder.total_size = final_size ll_builder.current_buf = result - ll_builder.current_ofs = 0 - ll_builder.current_end = 0 + ll_builder.current_pos = final_size + ll_builder.current_end = final_size - # --- no GC! --- - dst = ll_str2raw(result, final_size) + dst = final_size while True: - dst = rffi.ptradd(dst, -piece_lgt) - rffi.c_memcpy(dst, extra.raw_ptr, piece_lgt) - lltype.free(extra.raw_ptr, flavor='raw') - extra.raw_ptr = lltype.nullptr(rffi.CCHARP.TO) - extra = extra.prev_piece + dst -= piece_lgt + ll_assert(dst >= 0, "rbuilder build: overflow") + ll_builder.copy_string_contents(piece, result, 0, dst, piece_lgt) if not extra: break - piece_lgt = extra.piece_lgt - # --- end --- - - initial_len = len(ll_builder.initial_buf.chars) - ll_assert(dst == ll_str2raw(result, initial_len), - "bad first piece size") - cls.copy_string_contents_fn(ll_builder.initial_buf, result, - 0, 0, initial_len) + piece = extra.buf + piece_lgt = len(piece.chars) + extra = extra.prev_piece + ll_assert(dst == 0, "rbuilder build: underflow") return result @classmethod @@ -508,7 +376,6 @@ lowleveltype = lltype.Ptr(STRINGBUILDER) basetp = STR mallocfn = staticmethod(rstr.mallocstr) - copy_string_contents_fn = staticmethod(rstr.copy_string_contents) string_repr = string_repr char_repr = char_repr raw_ptr_repr = PtrRepr( @@ -519,7 +386,6 @@ lowleveltype = lltype.Ptr(UNICODEBUILDER) basetp = UNICODE mallocfn = staticmethod(rstr.mallocunicode) - copy_string_contents_fn = staticmethod(rstr.copy_unicode_contents) string_repr = unicode_repr char_repr = unichar_repr raw_ptr_repr = PtrRepr( diff --git a/rpython/rtyper/test/test_rbuilder.py b/rpython/rtyper/test/test_rbuilder.py --- a/rpython/rtyper/test/test_rbuilder.py +++ b/rpython/rtyper/test/test_rbuilder.py @@ -10,20 +10,32 @@ class TestStringBuilderDirect(object): + def test_nooveralloc(self): + sb = StringBuilderRepr.ll_new(33) + StringBuilderRepr.ll_append(sb, llstr("abc" * 11)) + assert StringBuilderRepr.ll_getlength(sb) == 33 + s = StringBuilderRepr.ll_build(sb) + assert hlstr(s) == "abc" * 11 + assert StringBuilderRepr.ll_getlength(sb) == 33 + + def test_shrinking(self): + sb = StringBuilderRepr.ll_new(100) + StringBuilderRepr.ll_append(sb, llstr("abc" * 11)) + assert StringBuilderRepr.ll_getlength(sb) == 33 + s = StringBuilderRepr.ll_build(sb) + assert hlstr(s) == "abc" * 11 + assert StringBuilderRepr.ll_getlength(sb) == 33 + def test_simple(self): sb = StringBuilderRepr.ll_new(3) StringBuilderRepr.ll_append_char(sb, 'x') StringBuilderRepr.ll_append(sb, llstr("abc")) StringBuilderRepr.ll_append_slice(sb, llstr("foobar"), 2, 5) StringBuilderRepr.ll_append_multiple_char(sb, 'y', 3) + assert StringBuilderRepr.ll_getlength(sb) == 10 s = StringBuilderRepr.ll_build(sb) assert hlstr(s) == "xabcobayyy" - - def test_nooveralloc(self): - sb = StringBuilderRepr.ll_new(33) - StringBuilderRepr.ll_append(sb, llstr("abc" * 11)) - s = StringBuilderRepr.ll_build(sb) - assert hlstr(s) == "abc" * 11 + assert StringBuilderRepr.ll_getlength(sb) == 10 def test_grow_when_append_char(self): sb = StringBuilderRepr.ll_new(33) From noreply at buildbot.pypy.org Sun Jun 15 12:17:27 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 12:17:27 +0200 (CEST) Subject: [pypy-commit] pypy default: merge heads Message-ID: <20140615101727.0F45F1D2DC3@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72052:6203aeedc273 Date: 2014-06-15 12:16 +0200 http://bitbucket.org/pypy/pypy/changeset/6203aeedc273/ Log: merge heads diff --git a/pypy/doc/config/translation.log.txt b/pypy/doc/config/translation.log.txt --- a/pypy/doc/config/translation.log.txt +++ b/pypy/doc/config/translation.log.txt @@ -2,4 +2,4 @@ These must be enabled by setting the PYPYLOG environment variable. The exact set of features supported by PYPYLOG is described in -pypy/translation/c/src/debug_print.h. +rpython/translator/c/src/debug_print.h. diff --git a/pypy/doc/man/pypy.1.rst b/pypy/doc/man/pypy.1.rst --- a/pypy/doc/man/pypy.1.rst +++ b/pypy/doc/man/pypy.1.rst @@ -95,13 +95,12 @@ ``PYPYLOG`` If set to a non-empty value, enable logging, the format is: - *fname* + *fname* or *+fname* logging for profiling: includes all ``debug_start``/``debug_stop`` but not any nested ``debug_print``. *fname* can be ``-`` to log to *stderr*. - Note that using a : in fname is a bad idea, Windows - users, beware. + The *+fname* form can be used if there is a *:* in fname ``:``\ *fname* Full logging, including ``debug_print``. diff --git a/rpython/translator/c/src/debug_print.h b/rpython/translator/c/src/debug_print.h --- a/rpython/translator/c/src/debug_print.h +++ b/rpython/translator/c/src/debug_print.h @@ -6,12 +6,13 @@ /* values of the PYPYLOG environment variable: ("top-level" debug_prints means not between debug_start and debug_stop) - (empty) logging is turned off, apart from top-level debug_prints + (empty) logging is turned off, apart from top-level debug_prints that go to stderr - fname logging for profiling: includes all debug_start/debug_stop - but not any nested debug_print - :fname full logging - prefix:fname conditional logging + fname or +fname logging for profiling: includes all debug_start/debug_stop + but not any nested debug_print, use +fname if there is a + colon : in fname + :fname full logging + prefix:fname conditional logging prefix1,prefix2:fname conditional logging with multiple selections Conditional logging means that it only includes the debug_start/debug_stop From noreply at buildbot.pypy.org Sun Jun 15 12:37:14 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 12:37:14 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder2-perf: append_charpsize() Message-ID: <20140615103714.8EE0F1D2E00@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder2-perf Changeset: r72053:85c98f36d29b Date: 2014-06-15 12:22 +0200 http://bitbucket.org/pypy/pypy/changeset/85c98f36d29b/ Log: append_charpsize() diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -295,23 +295,18 @@ @staticmethod @jit.dont_look_inside def ll_append_charpsize(ll_builder, charp, size): - lgt = size * ll_builder.charsize # in bytes - ofs = ll_builder.current_ofs - newofs = ofs + lgt - if uint_gt(newofs, ll_builder.current_end): - if ll_builder.charsize == 1: - ll_str = llstr(rffi.charpsize2str(charp, size)) - else: - ll_str = llunicode(rffi.wcharpsize2unicode(charp, size)) - ll_builder.append_overflow(ll_builder, ll_str) - else: - ll_builder.current_ofs = newofs - # --- no GC! --- - raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) - rffi.c_memcpy(rffi.ptradd(raw, ofs), - rffi.cast(rffi.CCHARP, charp), - lgt) - # --- end --- + part1 = ll_builder.current_end - ll_builder.current_pos + if size > part1: + # First, the part that still fits + ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, + ll_builder.current_pos, part1) + charp = rffi.ptradd(charp, part1) + size -= part1 + ll_builder.grow(ll_builder, size) + # + pos = ll_builder.current_pos + ll_builder.current_pos = pos + size + ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, pos, size) @staticmethod @always_inline From noreply at buildbot.pypy.org Sun Jun 15 12:37:15 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 12:37:15 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder2-perf: unicode fixes Message-ID: <20140615103715.B4E231D2E00@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder2-perf Changeset: r72054:31ad9da78807 Date: 2014-06-15 12:24 +0200 http://bitbucket.org/pypy/pypy/changeset/31ad9da78807/ Log: unicode fixes diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -60,7 +60,8 @@ # new_string = mallocfn(needed) # - old_piece = lltype.malloc(STRINGPIECE) + PIECE = lltype.typeOf(ll_builder.extra_pieces).TO + old_piece = lltype.malloc(PIECE) old_piece.buf = ll_builder.current_buf old_piece.prev_piece = ll_builder.extra_pieces ll_builder.extra_pieces = old_piece @@ -106,16 +107,22 @@ } ) +UNICODEPIECE = lltype.GcStruct('unicodepiece', + ('buf', lltype.Ptr(UNICODE)), + ('prev_piece', lltype.Ptr(lltype.GcForwardReference()))) +UNICODEPIECE.prev_piece.TO.become(UNICODEPIECE) + UNICODEBUILDER = lltype.GcStruct('unicodebuilder', ('current_buf', lltype.Ptr(UNICODE)), - ('current_ofs', lltype.Signed), # position measured in *bytes* - ('current_end', lltype.Signed), # position measured in *bytes* + ('current_pos', lltype.Signed), + ('current_end', lltype.Signed), ('total_size', lltype.Signed), - ('extra_pieces', lltype.Ptr(STRINGPIECE)), - ('initial_buf', lltype.Ptr(UNICODE)), + ('skip', lltype.Signed), + ('extra_pieces', lltype.Ptr(UNICODEPIECE)), adtmeths={ 'grow': staticAdtMethod(unicodebuilder_grows[0]), 'append_overflow': staticAdtMethod(unicodebuilder_grows[1]), + 'copy_string_contents': staticAdtMethod(rstr.copy_unicode_contents), 'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_unicode), } ) @@ -338,7 +345,7 @@ final_size = cls.ll_getlength(ll_builder) ll_assert(final_size >= 0, "negative final_size") extra = ll_builder.extra_pieces - ll_builder.extra_pieces = lltype.nullptr(STRINGPIECE) + ll_builder.extra_pieces = lltype.nullptr(lltype.typeOf(extra).TO) # result = cls.mallocfn(final_size) piece = ll_builder.current_buf From noreply at buildbot.pypy.org Sun Jun 15 12:37:16 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 12:37:16 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder2-perf: ll_append_char_2. Message-ID: <20140615103716.E5C411D2E00@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder2-perf Changeset: r72055:46acbdfc4837 Date: 2014-06-15 12:36 +0200 http://bitbucket.org/pypy/pypy/changeset/46acbdfc4837/ Log: ll_append_char_2. diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -1,6 +1,6 @@ from rpython.rlib import rgc, jit from rpython.rlib.objectmodel import enforceargs, specialize -from rpython.rlib.rarithmetic import ovfcheck, r_uint +from rpython.rlib.rarithmetic import ovfcheck from rpython.rlib.debug import ll_assert from rpython.rtyper.rptr import PtrRepr from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, rstr @@ -80,9 +80,19 @@ ll_builder.skip += part1 stringbuilder_grow(ll_builder, size - part1) + def stringbuilder_append_overflow_2(ll_builder, char0): + # Overflow when writing two chars. There are two cases depending + # on whether one char still fits or not. + if ll_builder.current_pos < ll_builder.current_end: + ll_builder.current_buf.chars[ll_builder.current_pos] = char0 + ll_builder.skip = 1 + stringbuilder_grow(ll_builder, 2) + return (func_with_new_name(stringbuilder_grow, '%s_grow' % name), func_with_new_name(stringbuilder_append_overflow, - '%s_append_overflow' % name)) + '%s_append_overflow' % name), + func_with_new_name(stringbuilder_append_overflow_2, + '%s_append_overflow_2' % name)) stringbuilder_grows = new_grow_funcs('stringbuilder', rstr.mallocstr) unicodebuilder_grows = new_grow_funcs('unicodebuilder', rstr.mallocunicode) @@ -102,6 +112,7 @@ adtmeths={ 'grow': staticAdtMethod(stringbuilder_grows[0]), 'append_overflow': staticAdtMethod(stringbuilder_grows[1]), + 'append_overflow_2': staticAdtMethod(stringbuilder_grows[2]), 'copy_string_contents': staticAdtMethod(rstr.copy_string_contents), 'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_string), } @@ -122,6 +133,7 @@ adtmeths={ 'grow': staticAdtMethod(unicodebuilder_grows[0]), 'append_overflow': staticAdtMethod(unicodebuilder_grows[1]), + 'append_overflow_2': staticAdtMethod(unicodebuilder_grows[2]), 'copy_string_contents': staticAdtMethod(rstr.copy_unicode_contents), 'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_unicode), } @@ -169,40 +181,20 @@ @staticmethod def ll_append_char_2(ll_builder, char0, char1): - if jit.we_are_jitted(): - BaseStringBuilderRepr._ll_jit_append_char_2(ll_builder, char0,char1) - else: - BaseStringBuilderRepr._ll_append_char_2(ll_builder, char0, char1) - - @staticmethod - @jit.dont_look_inside - def _ll_append_char_2(ll_builder, char0, char1): - ofs = ll_builder.current_ofs - end = ofs + 2 * ll_builder.charsize - if uint_gt(end, ll_builder.current_end): - BaseStringBuilderRepr._ll_append_char(ll_builder, char0) - BaseStringBuilderRepr._ll_append_char(ll_builder, char1) - return - ll_builder.current_ofs = end - # --- no GC! --- - raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf) - ll_rawsetitem(raw, ofs, char0) - ll_rawsetitem(raw, ofs + ll_builder.charsize, char1) - # --- end --- - - @staticmethod - def _ll_jit_append_char_2(ll_builder, char0, char1): - ofs = ll_builder.current_ofs - end = ofs + 2 * ll_builder.charsize - if bool(ll_builder.current_buf) and uint_le(end, - ll_builder.current_end): - ll_builder.current_ofs = end - buf = ll_builder.current_buf - index = (ofs - ll_baseofs(buf)) // ll_builder.charsize - buf.chars[index] = char0 - buf.chars[index + 1] = char1 - return - BaseStringBuilderRepr._ll_append_char_2(ll_builder, char0, char1) + ll_builder.skip = 2 + jit.conditional_call( + ll_builder.current_end - ll_builder.current_pos < 2, + ll_builder.append_overflow_2, ll_builder, char0) + pos = ll_builder.current_pos + buf = ll_builder.current_buf + buf.chars[pos] = char0 + pos += ll_builder.skip + ll_builder.current_pos = pos + buf.chars[pos - 1] = char1 + # NB. this usually writes into buf.chars[current_pos] and + # buf.chars[current_pos+1], except if we had an overflow right + # in the middle of the two chars. In that case, 'skip' is set to + # 1 and only one char is written: the 'char1' overrides the 'char0'. @staticmethod @always_inline @@ -210,7 +202,7 @@ size = end - start if jit.we_are_jitted(): if BaseStringBuilderRepr._ll_jit_try_append_slice( - ll_builder, ll_str, size): + ll_builder, ll_str, start, size): return ll_builder.skip = start jit.conditional_call( @@ -224,7 +216,7 @@ ll_builder.current_pos = pos + size @staticmethod - def _ll_jit_try_append_slice(ll_builder, ll_str, size): + def _ll_jit_try_append_slice(ll_builder, ll_str, start, size): if jit.isconstant(size): if size == 0: return True @@ -237,18 +229,6 @@ ll_str.chars[start], ll_str.chars[start + 1]) return True - if bool(ll_builder.current_buf): - ofs = ll_builder.current_ofs - end = ofs + size * ll_builder.charsize - if uint_le(end, ll_builder.current_end): - ll_builder.current_ofs = end - buf = ll_builder.current_buf - index = (ofs - ll_baseofs(buf)) // ll_builder.charsize - if lltype.typeOf(buf).TO.chars.OF == lltype.Char: - rstr.copy_string_contents(ll_str, buf, start, index, size) - else: - rstr.copy_unicode_contents(ll_str, buf, start, index, size) - return True return False # use the fall-back path @staticmethod From noreply at buildbot.pypy.org Sun Jun 15 12:45:43 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 12:45:43 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder2-perf: clean-ups Message-ID: <20140615104543.21FA21C021D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder2-perf Changeset: r72056:070ac1adb8f6 Date: 2014-06-15 12:44 +0200 http://bitbucket.org/pypy/pypy/changeset/070ac1adb8f6/ Log: clean-ups diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -1,16 +1,14 @@ from rpython.rlib import rgc, jit -from rpython.rlib.objectmodel import enforceargs, specialize +from rpython.rlib.objectmodel import enforceargs from rpython.rlib.rarithmetic import ovfcheck from rpython.rlib.debug import ll_assert from rpython.rtyper.rptr import PtrRepr -from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, rstr +from rpython.rtyper.lltypesystem import lltype, rffi, rstr from rpython.rtyper.lltypesystem.lltype import staticAdtMethod, nullptr from rpython.rtyper.lltypesystem.rstr import (STR, UNICODE, char_repr, string_repr, unichar_repr, unicode_repr) from rpython.rtyper.rbuilder import AbstractStringBuilderRepr from rpython.tool.sourcetools import func_with_new_name -from rpython.rtyper.llannotation import SomePtr -from rpython.rtyper.annlowlevel import llstr, llunicode # ------------------------------------------------------------ @@ -19,25 +17,19 @@ # - A StringBuilder has a rstr.STR of the specified initial size # (100 by default), which is filled gradually. # -# - When it is full, we allocate extra buffers as *raw* memory -# held by STRINGPIECE objects. The STRINGPIECE has a destructor -# that frees the memory, but usually the memory is freed explicitly -# at build() time. +# - When it is full, we allocate extra buffers as an extra rstr.STR, +# and the already-filled one is added to a chained list of STRINGPIECE +# objects. +# +# - At build() time, we consolidate all these pieces into a single +# rstr.STR, which is both returned and re-attached to the StringBuilder, +# replacing the STRINGPIECEs. # # - The data is copied at most twice, and only once in case it fits # into the initial size (and the GC supports shrinking the STR). # -# XXX too much a mess to handle the case where the JIT sees this code. -# Think about an easier alternative, like using raw_store(current_buf, ..) -# uniformly, where current_buf is a GC pointer that can be NULL. We'd -# need support in the JIT to map that to virtual string index. We'd also -# need a way to express c_memcpy() below --- similar to copystrcontent, -# but without the assumption that it's about a string (or unicode). -# -# XXX alternatively, a simpler solution might be to allocate all pieces -# as GC-managed rstr.STR. To avoid filling the old generation with -# garbage we could add a weakref holding the most recently built chain -# of STRs, and reuse it the next time if it's still there. +# XXX in build(), we could try keeping around a global weakref to the +# chain of STRINGPIECEs and reuse them the next time. # # ------------------------------------------------------------ @@ -140,16 +132,6 @@ ) - at always_inline -def ll_str2raw(ll_str, charoffset): - STRTYPE = lltype.typeOf(ll_str).TO - ofs = (rffi.offsetof(STRTYPE, 'chars') + - rffi.itemoffsetof(STRTYPE.chars, 0)) - ofs = llmemory.raw_malloc_usage(ofs) # for direct run - ofs += rffi.sizeof(STRTYPE.chars.OF) * charoffset - return rffi.ptradd(rffi.cast(rffi.CCHARP, ll_str), ofs) - - class BaseStringBuilderRepr(AbstractStringBuilderRepr): def empty(self): return nullptr(self.lowleveltype.TO) @@ -181,6 +163,9 @@ @staticmethod def ll_append_char_2(ll_builder, char0, char1): + # this is only used by the JIT, when appending a small, known-length + # string. Unlike two consecutive ll_append_char(), it can do that + # with only one conditional_call. ll_builder.skip = 2 jit.conditional_call( ll_builder.current_end - ll_builder.current_pos < 2, From noreply at buildbot.pypy.org Sun Jun 15 12:58:15 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 12:58:15 +0200 (CEST) Subject: [pypy-commit] pypy default: Remove GUARD_NO_EXCEPTION after a removed COND_CALL Message-ID: <20140615105815.B62571C021D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72057:7a6bc2abea20 Date: 2014-06-15 12:57 +0200 http://bitbucket.org/pypy/pypy/changeset/7a6bc2abea20/ Log: Remove GUARD_NO_EXCEPTION after a removed COND_CALL diff --git a/rpython/jit/metainterp/optimizeopt/rewrite.py b/rpython/jit/metainterp/optimizeopt/rewrite.py --- a/rpython/jit/metainterp/optimizeopt/rewrite.py +++ b/rpython/jit/metainterp/optimizeopt/rewrite.py @@ -394,6 +394,7 @@ val = self.getvalue(arg) if val.is_constant(): if val.box.same_constant(CONST_0): + self.last_emitted_operation = REMOVED return op = op.copy_and_change(rop.CALL, args=op.getarglist()[1:]) self.emit_operation(op) diff --git a/rpython/jit/metainterp/test/test_call.py b/rpython/jit/metainterp/test/test_call.py --- a/rpython/jit/metainterp/test/test_call.py +++ b/rpython/jit/metainterp/test/test_call.py @@ -56,3 +56,18 @@ assert self.interp_operations(main, [10]) == 1 assert self.interp_operations(main, [5]) == 0 + def test_cond_call_disappears(self): + driver = jit.JitDriver(greens = [], reds = ['n']) + + def f(n): + raise ValueError + + def main(n): + while n > 0: + driver.jit_merge_point(n=n) + jit.conditional_call(False, f, 10) + n -= 1 + return 42 + + assert self.meta_interp(main, [10]) == 42 + self.check_resops(guard_no_exception=0) From noreply at buildbot.pypy.org Sun Jun 15 13:11:13 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 13:11:13 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder2-perf: Transplant 7a6bc2abea20 Message-ID: <20140615111113.968341C021D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder2-perf Changeset: r72058:99f12ac1639b Date: 2014-06-15 12:58 +0200 http://bitbucket.org/pypy/pypy/changeset/99f12ac1639b/ Log: Transplant 7a6bc2abea20 diff --git a/rpython/jit/metainterp/optimizeopt/rewrite.py b/rpython/jit/metainterp/optimizeopt/rewrite.py --- a/rpython/jit/metainterp/optimizeopt/rewrite.py +++ b/rpython/jit/metainterp/optimizeopt/rewrite.py @@ -394,6 +394,7 @@ val = self.getvalue(arg) if val.is_constant(): if val.box.same_constant(CONST_0): + self.last_emitted_operation = REMOVED return op = op.copy_and_change(rop.CALL, args=op.getarglist()[1:]) self.emit_operation(op) diff --git a/rpython/jit/metainterp/test/test_call.py b/rpython/jit/metainterp/test/test_call.py --- a/rpython/jit/metainterp/test/test_call.py +++ b/rpython/jit/metainterp/test/test_call.py @@ -56,3 +56,18 @@ assert self.interp_operations(main, [10]) == 1 assert self.interp_operations(main, [5]) == 0 + def test_cond_call_disappears(self): + driver = jit.JitDriver(greens = [], reds = ['n']) + + def f(n): + raise ValueError + + def main(n): + while n > 0: + driver.jit_merge_point(n=n) + jit.conditional_call(False, f, 10) + n -= 1 + return 42 + + assert self.meta_interp(main, [10]) == 42 + self.check_resops(guard_no_exception=0) From noreply at buildbot.pypy.org Sun Jun 15 13:11:14 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 13:11:14 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder2-perf: Un-skip these tests again Message-ID: <20140615111114.E09581C021D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder2-perf Changeset: r72059:60e786a3cd63 Date: 2014-06-15 13:02 +0200 http://bitbucket.org/pypy/pypy/changeset/60e786a3cd63/ Log: Un-skip these tests again diff --git a/rpython/memory/test/gc_test_base.py b/rpython/memory/test/gc_test_base.py --- a/rpython/memory/test/gc_test_base.py +++ b/rpython/memory/test/gc_test_base.py @@ -754,7 +754,6 @@ self.interpret(fn, []) def test_stringbuilder(self): - py.test.skip("cannot test here, because it's using ll2ctypes") def fn(): s = StringBuilder(4) s.append("abcd") diff --git a/rpython/memory/test/test_transformed_gc.py b/rpython/memory/test/test_transformed_gc.py --- a/rpython/memory/test/test_transformed_gc.py +++ b/rpython/memory/test/test_transformed_gc.py @@ -701,7 +701,6 @@ return fn def test_string_builder_over_allocation(self): - py.test.skip("cannot test here, because it's using ll2ctypes") fn = self.runner("string_builder_over_allocation") res = fn([]) assert res == ord('y') From noreply at buildbot.pypy.org Sun Jun 15 13:11:16 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 13:11:16 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder2-perf: Cancel this change Message-ID: <20140615111116.0FD7F1C021D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder2-perf Changeset: r72060:8f86672be0bd Date: 2014-06-15 13:03 +0200 http://bitbucket.org/pypy/pypy/changeset/8f86672be0bd/ Log: Cancel this change diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py --- a/pypy/module/pypyjit/test_pypy_c/test_string.py +++ b/pypy/module/pypyjit/test_pypy_c/test_string.py @@ -102,42 +102,38 @@ assert log.result == main(1000) loop, = log.loops_by_filename(self.filepath) assert loop.match(""" - i82 = int_gt(i77, 0) - guard_true(i82, descr=...) + i7 = int_gt(i4, 0) + guard_true(i7, descr=...) guard_not_invalidated(descr=...) - p83 = call(ConstClass(ll_int2dec__Signed), i77, descr=) + p9 = call(ConstClass(ll_int2dec__Signed), i4, descr=) guard_no_exception(descr=...) - i84 = strlen(p83) - i85 = int_is_true(i84) - guard_true(i85, descr=...) - i86 = strgetitem(p83, 0) - i87 = int_eq(i86, 45) - guard_false(i87, descr=...) - i88 = int_neg(i84) - i89 = int_add(24, i84) - i90 = uint_le(i89, 56) - guard_true(i90, descr=...) - p92 = newstr(32) - copystrcontent(p83, p92, 0, 0, i84) - i93 = uint_lt(i89, 56) - guard_true(i93, descr=...) - i94 = int_add(i89, 1) - strsetitem(p92, i84, 32) - i95 = int_add(i94, i84) - i96 = uint_le(i95, 56) - guard_true(i96, descr=...) - i97 = int_sub(i94, 24) - copystrcontent(p83, p92, 0, i97, i84) - i98 = int_sub(56, i95) - i99 = int_sub(32, i98) - i100 = int_ne(32, i99) - guard_true(i100, descr=...) - p101 = call(ConstClass(ll_shrink_array__rpy_stringPtr_Signed), p92, i99, descr=) + i10 = strlen(p9) + i11 = int_is_true(i10) + guard_true(i11, descr=...) + i13 = strgetitem(p9, 0) + i15 = int_eq(i13, 45) + guard_false(i15, descr=...) + i17 = int_neg(i10) + i19 = int_gt(i10, 23) + guard_false(i19, descr=...) + p21 = newstr(23) + copystrcontent(p9, p21, 0, 0, i10) + i25 = int_add(1, i10) + i26 = int_gt(i25, 23) + guard_false(i26, descr=...) + strsetitem(p21, i10, 32) + i30 = int_add(i10, i25) + i31 = int_gt(i30, 23) + guard_false(i31, descr=...) + copystrcontent(p9, p21, 0, i25, i10) + i33 = int_lt(i30, 23) + guard_true(i33, descr=...) + p35 = call(ConstClass(ll_shrink_array__rpy_stringPtr_Signed), p21, i30, descr=) guard_no_exception(descr=...) - i102 = strlen(p101) - i103 = int_add_ovf(i75, i102) + i37 = strlen(p35) + i38 = int_add_ovf(i5, i37) guard_no_overflow(descr=...) - i104 = int_sub(i77, 1) + i40 = int_sub(i4, 1) --TICK-- jump(..., descr=...) """) From noreply at buildbot.pypy.org Sun Jun 15 16:17:09 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 16:17:09 +0200 (CEST) Subject: [pypy-commit] pypy default: Bug fix in ll2ctypes Message-ID: <20140615141709.E50E41C021D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72061:611f53f81fb1 Date: 2014-06-15 16:09 +0200 http://bitbucket.org/pypy/pypy/changeset/611f53f81fb1/ Log: Bug fix in ll2ctypes diff --git a/rpython/rtyper/lltypesystem/ll2ctypes.py b/rpython/rtyper/lltypesystem/ll2ctypes.py --- a/rpython/rtyper/lltypesystem/ll2ctypes.py +++ b/rpython/rtyper/lltypesystem/ll2ctypes.py @@ -948,6 +948,11 @@ REAL_T = lltype.Ptr(REAL_TYPE) cobj = ctypes.cast(cobj, get_ctypes_type(REAL_T)) container = lltype._struct(REAL_TYPE) + # obscuuuuuuuuure: 'cobj' is a ctypes pointer, which is + # mutable; and so if we save away the 'cobj' object + # itself, it might suddenly later be unexpectedly + # modified! Make a copy. + cobj = ctypes.cast(cobj, type(cobj)) struct_use_ctypes_storage(container, cobj) if REAL_TYPE != T.TO: p = container._as_ptr() From noreply at buildbot.pypy.org Sun Jun 15 16:17:11 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 16:17:11 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder2-perf: Add a few ll_asserts to check the range of arguments to copy_string_contents(). Message-ID: <20140615141711.1C7691C021D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder2-perf Changeset: r72062:12f9cd24a1b1 Date: 2014-06-15 16:14 +0200 http://bitbucket.org/pypy/pypy/changeset/12f9cd24a1b1/ Log: Add a few ll_asserts to check the range of arguments to copy_string_contents(). diff --git a/rpython/rtyper/lltypesystem/rstr.py b/rpython/rtyper/lltypesystem/rstr.py --- a/rpython/rtyper/lltypesystem/rstr.py +++ b/rpython/rtyper/lltypesystem/rstr.py @@ -77,6 +77,10 @@ # are obscurely essential to make sure that the strings stay alive # longer than the raw_memcopy(). assert length >= 0 + ll_assert(srcstart >= 0, "copystrc: negative srcstart") + ll_assert(srcstart + length <= len(src.chars), "copystrc: src ovf") + ll_assert(dststart >= 0, "copystrc: negative dststart") + ll_assert(dststart + length <= len(dst.chars), "copystrc: dst ovf") # from here, no GC operations can happen src = _get_raw_buf(SRC_TP, src, srcstart) dst = _get_raw_buf(DST_TP, dst, dststart) From noreply at buildbot.pypy.org Sun Jun 15 16:17:12 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 16:17:12 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder2-perf: Tweaks Message-ID: <20140615141712.58F9C1C021D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder2-perf Changeset: r72063:621cc7ba7368 Date: 2014-06-15 16:14 +0200 http://bitbucket.org/pypy/pypy/changeset/621cc7ba7368/ Log: Tweaks diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -1,6 +1,6 @@ from rpython.rlib import rgc, jit from rpython.rlib.objectmodel import enforceargs -from rpython.rlib.rarithmetic import ovfcheck +from rpython.rlib.rarithmetic import ovfcheck, r_uint, intmask from rpython.rlib.debug import ll_assert from rpython.rtyper.rptr import PtrRepr from rpython.rtyper.lltypesystem import lltype, rffi, rstr @@ -56,11 +56,12 @@ old_piece = lltype.malloc(PIECE) old_piece.buf = ll_builder.current_buf old_piece.prev_piece = ll_builder.extra_pieces - ll_builder.extra_pieces = old_piece + ll_assert(bool(old_piece.buf), "no buf??") ll_builder.current_buf = new_string ll_builder.current_pos = 0 ll_builder.current_end = needed ll_builder.total_size = total_size + ll_builder.extra_pieces = old_piece def stringbuilder_append_overflow(ll_builder, ll_str, size): # First, the part that still fits in the current piece @@ -138,7 +139,9 @@ @classmethod def ll_new(cls, init_size): - init_size = min(init_size, 1280) + # Clamp 'init_size' to be a value between 0 and 1280. + # Negative values are mapped to 1280. + init_size = intmask(min(r_uint(init_size), r_uint(1280))) ll_builder = lltype.malloc(cls.lowleveltype.TO) ll_builder.current_buf = cls.mallocfn(init_size) ll_builder.current_pos = 0 From noreply at buildbot.pypy.org Sun Jun 15 16:17:14 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 16:17:14 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder2-perf: Remove these hacks, not necessary any more Message-ID: <20140615141714.08D331C021D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder2-perf Changeset: r72064:64569e9d9e0b Date: 2014-06-15 16:16 +0200 http://bitbucket.org/pypy/pypy/changeset/64569e9d9e0b/ Log: Remove these hacks, not necessary any more diff --git a/rpython/rtyper/lltypesystem/ll2ctypes.py b/rpython/rtyper/lltypesystem/ll2ctypes.py --- a/rpython/rtyper/lltypesystem/ll2ctypes.py +++ b/rpython/rtyper/lltypesystem/ll2ctypes.py @@ -1336,10 +1336,6 @@ """ T = lltype.typeOf(ptr) typecheck_ptradd(T) - if not ptr and T.TO.OF == lltype.Char: - # special-case: support 'NULL + real_address_as_int' - assert not (0 <= n < 4096) - return force_cast(T, n) ctypes_item_type = get_ctypes_type(T.TO.OF) ctypes_arrayptr_type = get_ctypes_type(T) cptr = lltype2ctypes(ptr) diff --git a/rpython/rtyper/lltypesystem/opimpl.py b/rpython/rtyper/lltypesystem/opimpl.py --- a/rpython/rtyper/lltypesystem/opimpl.py +++ b/rpython/rtyper/lltypesystem/opimpl.py @@ -180,10 +180,11 @@ checkptr(obj) assert is_valid_int(index) if not obj: - assert isinstance(index, int) - assert not (0 <= index < 4096) - from rpython.rtyper.lltypesystem import rffi - return rffi.cast(lltype.typeOf(obj), index) + raise AssertionError("direct_ptradd on null pointer") + ## assert isinstance(index, int) + ## assert not (0 <= index < 4096) + ## from rpython.rtyper.lltypesystem import rffi + ## return rffi.cast(lltype.typeOf(obj), index) return lltype.direct_ptradd(obj, index) From noreply at buildbot.pypy.org Sun Jun 15 16:17:15 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 16:17:15 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder2-perf: Bug fix in ll2ctypes Message-ID: <20140615141715.2B5411C021D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder2-perf Changeset: r72065:ce60e2f63c83 Date: 2014-06-15 16:09 +0200 http://bitbucket.org/pypy/pypy/changeset/ce60e2f63c83/ Log: Bug fix in ll2ctypes diff --git a/rpython/rtyper/lltypesystem/ll2ctypes.py b/rpython/rtyper/lltypesystem/ll2ctypes.py --- a/rpython/rtyper/lltypesystem/ll2ctypes.py +++ b/rpython/rtyper/lltypesystem/ll2ctypes.py @@ -948,6 +948,11 @@ REAL_T = lltype.Ptr(REAL_TYPE) cobj = ctypes.cast(cobj, get_ctypes_type(REAL_T)) container = lltype._struct(REAL_TYPE) + # obscuuuuuuuuure: 'cobj' is a ctypes pointer, which is + # mutable; and so if we save away the 'cobj' object + # itself, it might suddenly later be unexpectedly + # modified! Make a copy. + cobj = ctypes.cast(cobj, type(cobj)) struct_use_ctypes_storage(container, cobj) if REAL_TYPE != T.TO: p = container._as_ptr() From noreply at buildbot.pypy.org Sun Jun 15 16:51:46 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 16:51:46 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder2-perf: Use jit.conditional_call() also in ll_build() Message-ID: <20140615145146.D3AA41C021D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder2-perf Changeset: r72066:b451f9d13045 Date: 2014-06-15 16:51 +0200 http://bitbucket.org/pypy/pypy/changeset/b451f9d13045/ Log: Use jit.conditional_call() also in ll_build() diff --git a/rpython/jit/metainterp/blackhole.py b/rpython/jit/metainterp/blackhole.py --- a/rpython/jit/metainterp/blackhole.py +++ b/rpython/jit/metainterp/blackhole.py @@ -1090,6 +1090,11 @@ if condition: cpu.bh_call_v(func, args_i, None, None, calldescr) + @arguments("cpu", "i", "i", "R", "d") + def bhimpl_conditional_call_r_v(cpu, condition, func, args_r, calldescr): + if condition: + cpu.bh_call_v(func, None, args_r, None, calldescr) + @arguments("cpu", "i", "i", "I", "R", "d") def bhimpl_conditional_call_ir_v(cpu, condition, func, args_i, args_r, calldescr): diff --git a/rpython/jit/metainterp/pyjitpl.py b/rpython/jit/metainterp/pyjitpl.py --- a/rpython/jit/metainterp/pyjitpl.py +++ b/rpython/jit/metainterp/pyjitpl.py @@ -888,6 +888,8 @@ pc): self.do_conditional_call(condbox, funcbox, argboxes, calldescr, pc) + opimpl_conditional_call_r_v = opimpl_conditional_call_i_v + @arguments("box", "box", "boxes2", "descr", "orgpc") def opimpl_conditional_call_ir_v(self, condbox, funcbox, argboxes, calldescr, pc): diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -108,6 +108,7 @@ 'append_overflow_2': staticAdtMethod(stringbuilder_grows[2]), 'copy_string_contents': staticAdtMethod(rstr.copy_string_contents), 'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_string), + 'mallocfn': staticAdtMethod(rstr.mallocstr), } ) @@ -129,6 +130,7 @@ 'append_overflow_2': staticAdtMethod(unicodebuilder_grows[2]), 'copy_string_contents': staticAdtMethod(rstr.copy_unicode_contents), 'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_unicode), + 'mallocfn': staticAdtMethod(rstr.mallocunicode), } ) @@ -290,32 +292,36 @@ ll_builder.current_end - ll_builder.current_pos) return ll_builder.total_size - num_chars_missing_from_last_piece - @classmethod - def ll_build(cls, ll_builder): - if not ll_builder.extra_pieces: - # fast-path: the result fits in a single buf. - final_size = ll_builder.current_pos - buf = ll_builder.current_buf - if ll_builder.total_size != final_size: - ll_assert(final_size < ll_builder.total_size, - "final_size > ll_builder.total_size?") - buf = rgc.ll_shrink_array(buf, final_size) - ll_builder.current_buf = buf - ll_builder.current_end = final_size - ll_builder.total_size = final_size - return buf - else: - return BaseStringBuilderRepr._ll_build_extra(cls, ll_builder) + @staticmethod + def ll_build(ll_builder): + jit.conditional_call(bool(ll_builder.extra_pieces), + BaseStringBuilderRepr._ll_fold_pieces, ll_builder) + # Here is the one remaining "unexpected" branch with the JIT. + # Too bad, but it seems it's the only reasonable way to support + # both virtual builders and avoid-shrink-if-size-doesn't-change + final_size = ll_builder.current_pos + if final_size != ll_builder.total_size: + BaseStringBuilderRepr._ll_shrink_final(ll_builder) + return ll_builder.current_buf @staticmethod - @jit.dont_look_inside - def _ll_build_extra(cls, ll_builder): - final_size = cls.ll_getlength(ll_builder) + def _ll_shrink_final(ll_builder): + final_size = ll_builder.current_pos + ll_assert(final_size <= ll_builder.total_size, + "final_size > ll_builder.total_size?") + buf = rgc.ll_shrink_array(ll_builder.current_buf, final_size) + ll_builder.current_buf = buf + ll_builder.current_end = final_size + ll_builder.total_size = final_size + + @staticmethod + def _ll_fold_pieces(ll_builder): + final_size = BaseStringBuilderRepr.ll_getlength(ll_builder) ll_assert(final_size >= 0, "negative final_size") extra = ll_builder.extra_pieces ll_builder.extra_pieces = lltype.nullptr(lltype.typeOf(extra).TO) # - result = cls.mallocfn(final_size) + result = ll_builder.mallocfn(final_size) piece = ll_builder.current_buf piece_lgt = ll_builder.current_pos ll_assert(ll_builder.current_end == len(piece.chars), @@ -336,7 +342,6 @@ piece_lgt = len(piece.chars) extra = extra.prev_piece ll_assert(dst == 0, "rbuilder build: underflow") - return result @classmethod def ll_bool(cls, ll_builder): From noreply at buildbot.pypy.org Sun Jun 15 21:09:05 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 21:09:05 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder2-perf: Tweak: except in the simplest cases where the ll_builder remains virtual, Message-ID: <20140615190905.4B4BD1C134F@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder2-perf Changeset: r72067:3e094220dc86 Date: 2014-06-15 21:08 +0200 http://bitbucket.org/pypy/pypy/changeset/3e094220dc86/ Log: Tweak: except in the simplest cases where the ll_builder remains virtual, write ll_build() as just a simple residual call diff --git a/rpython/jit/metainterp/pyjitpl.py b/rpython/jit/metainterp/pyjitpl.py --- a/rpython/jit/metainterp/pyjitpl.py +++ b/rpython/jit/metainterp/pyjitpl.py @@ -1422,6 +1422,8 @@ return self.execute_varargs(rop.CALL, allboxes, descr, exc, pure) def do_conditional_call(self, condbox, funcbox, argboxes, descr, pc): + if isinstance(condbox, ConstInt) and condbox.value == 0: + return # so that the heapcache can keep argboxes virtual allboxes = self._build_allboxes(funcbox, argboxes, descr) effectinfo = descr.get_extra_info() assert not effectinfo.check_forces_virtual_or_virtualizable() diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -293,14 +293,13 @@ return ll_builder.total_size - num_chars_missing_from_last_piece @staticmethod + @jit.look_inside_iff(lambda ll_builder: jit.isvirtual(ll_builder)) def ll_build(ll_builder): - jit.conditional_call(bool(ll_builder.extra_pieces), - BaseStringBuilderRepr._ll_fold_pieces, ll_builder) - # Here is the one remaining "unexpected" branch with the JIT. - # Too bad, but it seems it's the only reasonable way to support - # both virtual builders and avoid-shrink-if-size-doesn't-change - final_size = ll_builder.current_pos - if final_size != ll_builder.total_size: + # NB. usually the JIT doesn't look inside this function; it does + # so only in the simplest example where it could virtualize everything + if ll_builder.extra_pieces: + BaseStringBuilderRepr._ll_fold_pieces(ll_builder) + elif ll_builder.current_pos != ll_builder.total_size: BaseStringBuilderRepr._ll_shrink_final(ll_builder) return ll_builder.current_buf From noreply at buildbot.pypy.org Sun Jun 15 22:13:58 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 15 Jun 2014 22:13:58 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder2-perf: Fix this test. Explain why it's longer. Message-ID: <20140615201358.315EA1C134F@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder2-perf Changeset: r72068:2667bb527c66 Date: 2014-06-15 22:13 +0200 http://bitbucket.org/pypy/pypy/changeset/2667bb527c66/ Log: Fix this test. Explain why it's longer. diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py --- a/pypy/module/pypyjit/test_pypy_c/test_string.py +++ b/pypy/module/pypyjit/test_pypy_c/test_string.py @@ -101,39 +101,64 @@ log = self.run(main, [1000]) assert log.result == main(1000) loop, = log.loops_by_filename(self.filepath) + # NB: since the stringbuilder2-perf branch we get more operations than + # before, but a lot less branches that might fail randomly. assert loop.match(""" - i7 = int_gt(i4, 0) - guard_true(i7, descr=...) + i100 = int_gt(i95, 0) + guard_true(i100, descr=...) guard_not_invalidated(descr=...) - p9 = call(ConstClass(ll_int2dec__Signed), i4, descr=) + p101 = call(ConstClass(ll_int2dec__Signed), i95, descr=) guard_no_exception(descr=...) - i10 = strlen(p9) - i11 = int_is_true(i10) - guard_true(i11, descr=...) - i13 = strgetitem(p9, 0) - i15 = int_eq(i13, 45) - guard_false(i15, descr=...) - i17 = int_neg(i10) - i19 = int_gt(i10, 23) - guard_false(i19, descr=...) - p21 = newstr(23) - copystrcontent(p9, p21, 0, 0, i10) - i25 = int_add(1, i10) - i26 = int_gt(i25, 23) - guard_false(i26, descr=...) - strsetitem(p21, i10, 32) - i30 = int_add(i10, i25) - i31 = int_gt(i30, 23) - guard_false(i31, descr=...) - copystrcontent(p9, p21, 0, i25, i10) - i33 = int_lt(i30, 23) - guard_true(i33, descr=...) - p35 = call(ConstClass(ll_shrink_array__rpy_stringPtr_Signed), p21, i30, descr=) + i102 = strlen(p101) + i103 = int_is_true(i102) + guard_true(i103, descr=...) + i104 = strgetitem(p101, 0) + i105 = int_eq(i104, 45) + guard_false(i105, descr=...) + i106 = int_neg(i102) + i107 = int_gt(i102, 23) + p108 = new(descr=) + p110 = newstr(23) + setfield_gc(..., descr=) + setfield_gc(..., descr=) + setfield_gc(..., descr=) + cond_call(i107, ConstClass(stringbuilder_append_overflow__stringbuilderPtr_rpy_stringPtr_Signed), p108, p101, i102, descr=) guard_no_exception(descr=...) - i37 = strlen(p35) - i38 = int_add_ovf(i5, i37) + i111 = getfield_gc(p108, descr=) + i112 = int_sub(i102, i111) + i113 = getfield_gc(p108, descr=) + p114 = getfield_gc(p108, descr=) + copystrcontent(p101, p114, i111, i113, i112) + i115 = int_add(i113, i112) + i116 = getfield_gc(p108, descr=) + setfield_gc(p108, i115, descr=) + i117 = int_eq(i115, i116) + cond_call(i117, ConstClass(stringbuilder_grow__stringbuilderPtr_Signed), p108, 1, descr=) + guard_no_exception(descr=...) + i118 = getfield_gc(p108, descr=) + i119 = int_add(i118, 1) + p120 = getfield_gc(p108, descr=) + strsetitem(p120, i118, 32) + i121 = getfield_gc(p108, descr=) + i122 = int_sub(i121, i119) + setfield_gc(..., descr=) + setfield_gc(..., descr=) + i123 = int_gt(i102, i122) + cond_call(i123, ConstClass(stringbuilder_append_overflow__stringbuilderPtr_rpy_stringPtr_Signed), p108, p101, i102, descr=) + guard_no_exception(descr=...) + i124 = getfield_gc(p108, descr=) + i125 = int_sub(i102, i124) + i126 = getfield_gc(p108, descr=) + p127 = getfield_gc(p108, descr=) + copystrcontent(p101, p127, i124, i126, i125) + i128 = int_add(i126, i125) + setfield_gc(p108, i128, descr=) + p135 = call(..., descr= Author: mattip Branch: Changeset: r72069:e01f896e6bfc Date: 2014-06-15 20:38 +0300 http://bitbucket.org/pypy/pypy/changeset/e01f896e6bfc/ Log: since pack() uses copysign, avoid testing copysign(1.0, float('nan')) which is platform dependent diff --git a/rpython/rlib/rstruct/test/test_ieee.py b/rpython/rlib/rstruct/test/test_ieee.py --- a/rpython/rlib/rstruct/test/test_ieee.py +++ b/rpython/rlib/rstruct/test/test_ieee.py @@ -198,8 +198,9 @@ def check_roundtrip(x, size): s = c_pack(x, size) - assert s == pack(x, size) if not isnan(x): + # pack uses copysign which is ambiguous for NAN + assert s == pack(x, size) assert unpack(s) == x assert c_unpack(s) == x else: From noreply at buildbot.pypy.org Sun Jun 15 22:29:48 2014 From: noreply at buildbot.pypy.org (mattip) Date: Sun, 15 Jun 2014 22:29:48 +0200 (CEST) Subject: [pypy-commit] pypy default: test if getaddrinfo is threadsafe, fails on windows Message-ID: <20140615202948.209BE1C134F@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72070:b84ebcce7ebe Date: 2014-06-15 23:29 +0300 http://bitbucket.org/pypy/pypy/changeset/b84ebcce7ebe/ Log: test if getaddrinfo is threadsafe, fails on windows diff --git a/rpython/rlib/test/test_rsocket.py b/rpython/rlib/test/test_rsocket.py --- a/rpython/rlib/test/test_rsocket.py +++ b/rpython/rlib/test/test_rsocket.py @@ -342,15 +342,32 @@ e = py.test.raises(GAIError, getaddrinfo, 'www.very-invalidaddress.com', None) assert isinstance(e.value.get_msg(), str) -def test_getaddrinfo_pydotorg(): +def getaddrinfo_pydotorg(i, result): lst = getaddrinfo('python.org', None) assert isinstance(lst, list) found = False for family, socktype, protocol, canonname, addr in lst: if addr.get_host() == '140.211.10.69': found = True - assert found, lst + result[i] += found +def test_getaddrinfo_pydotorg(): + result = [0,] + getaddrinfo_pydotorg(0, result) + assert result[0] == 1 + +def test_getaddrinfo_pydotorg_threadsafe(): + import threading + nthreads = 10 + result = [0] * nthreads + threads = [None] * nthreads + for i in range(nthreads): + threads[i] = threading.Thread(target = getaddrinfo_pydotorg, args=[i, result]) + threads[i].start() + for i in range(nthreads): + threads[i].join() + assert sum(result) == nthreads + def test_getaddrinfo_no_reverse_lookup(): # It seems that getaddrinfo never runs a reverse lookup on Linux. # Python2.3 on Windows returns the hostname. From noreply at buildbot.pypy.org Mon Jun 16 09:36:23 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 16 Jun 2014 09:36:23 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder2-perf: Ready for merge Message-ID: <20140616073623.4141E1D2C4A@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder2-perf Changeset: r72071:ecfe7e4c7001 Date: 2014-06-16 09:31 +0200 http://bitbucket.org/pypy/pypy/changeset/ecfe7e4c7001/ Log: Ready for merge From noreply at buildbot.pypy.org Mon Jun 16 09:36:25 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 16 Jun 2014 09:36:25 +0200 (CEST) Subject: [pypy-commit] pypy default: hg merge stringbuilder2-perf Message-ID: <20140616073625.212921D2C4A@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72072:cba6e9bc3afb Date: 2014-06-16 09:35 +0200 http://bitbucket.org/pypy/pypy/changeset/cba6e9bc3afb/ Log: hg merge stringbuilder2-perf Give the StringBuilder a more flexible internal structure, with a chained list of strings instead of just one string. This make it more efficient when building large strings, e.g. with cStringIO(). Also, use systematically jit.conditional_call() instead of regular branches. This lets the JIT make more linear code, at the cost of forcing a bit more data (to be passed as arguments to conditional_calls). I would expect the net result to be a slight slow-down on some simple benchmarks and a speed-up on bigger programs. diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py --- a/pypy/module/pypyjit/test_pypy_c/test_string.py +++ b/pypy/module/pypyjit/test_pypy_c/test_string.py @@ -101,39 +101,64 @@ log = self.run(main, [1000]) assert log.result == main(1000) loop, = log.loops_by_filename(self.filepath) + # NB: since the stringbuilder2-perf branch we get more operations than + # before, but a lot less branches that might fail randomly. assert loop.match(""" - i7 = int_gt(i4, 0) - guard_true(i7, descr=...) + i100 = int_gt(i95, 0) + guard_true(i100, descr=...) guard_not_invalidated(descr=...) - p9 = call(ConstClass(ll_int2dec__Signed), i4, descr=) + p101 = call(ConstClass(ll_int2dec__Signed), i95, descr=) guard_no_exception(descr=...) - i10 = strlen(p9) - i11 = int_is_true(i10) - guard_true(i11, descr=...) - i13 = strgetitem(p9, 0) - i15 = int_eq(i13, 45) - guard_false(i15, descr=...) - i17 = int_neg(i10) - i19 = int_gt(i10, 23) - guard_false(i19, descr=...) - p21 = newstr(23) - copystrcontent(p9, p21, 0, 0, i10) - i25 = int_add(1, i10) - i26 = int_gt(i25, 23) - guard_false(i26, descr=...) - strsetitem(p21, i10, 32) - i30 = int_add(i10, i25) - i31 = int_gt(i30, 23) - guard_false(i31, descr=...) - copystrcontent(p9, p21, 0, i25, i10) - i33 = int_lt(i30, 23) - guard_true(i33, descr=...) - p35 = call(ConstClass(ll_shrink_array__rpy_stringPtr_Signed), p21, i30, descr=) + i102 = strlen(p101) + i103 = int_is_true(i102) + guard_true(i103, descr=...) + i104 = strgetitem(p101, 0) + i105 = int_eq(i104, 45) + guard_false(i105, descr=...) + i106 = int_neg(i102) + i107 = int_gt(i102, 23) + p108 = new(descr=) + p110 = newstr(23) + setfield_gc(..., descr=) + setfield_gc(..., descr=) + setfield_gc(..., descr=) + cond_call(i107, ConstClass(stringbuilder_append_overflow__stringbuilderPtr_rpy_stringPtr_Signed), p108, p101, i102, descr=) guard_no_exception(descr=...) - i37 = strlen(p35) - i38 = int_add_ovf(i5, i37) + i111 = getfield_gc(p108, descr=) + i112 = int_sub(i102, i111) + i113 = getfield_gc(p108, descr=) + p114 = getfield_gc(p108, descr=) + copystrcontent(p101, p114, i111, i113, i112) + i115 = int_add(i113, i112) + i116 = getfield_gc(p108, descr=) + setfield_gc(p108, i115, descr=) + i117 = int_eq(i115, i116) + cond_call(i117, ConstClass(stringbuilder_grow__stringbuilderPtr_Signed), p108, 1, descr=) + guard_no_exception(descr=...) + i118 = getfield_gc(p108, descr=) + i119 = int_add(i118, 1) + p120 = getfield_gc(p108, descr=) + strsetitem(p120, i118, 32) + i121 = getfield_gc(p108, descr=) + i122 = int_sub(i121, i119) + setfield_gc(..., descr=) + setfield_gc(..., descr=) + i123 = int_gt(i102, i122) + cond_call(i123, ConstClass(stringbuilder_append_overflow__stringbuilderPtr_rpy_stringPtr_Signed), p108, p101, i102, descr=) + guard_no_exception(descr=...) + i124 = getfield_gc(p108, descr=) + i125 = int_sub(i102, i124) + i126 = getfield_gc(p108, descr=) + p127 = getfield_gc(p108, descr=) + copystrcontent(p101, p127, i124, i126, i125) + i128 = int_add(i126, i125) + setfield_gc(p108, i128, descr=) + p135 = call(..., descr= 0: + jitdriver.jit_merge_point(n=n) + sb = UnicodeBuilder() + if sb.build() != u"": + raise ValueError + n -= 1 + return n + res = self.meta_interp(f, [10], backendopt=True) + assert res == 0 + self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2, + 'jump': 1}) + + def test_stringbuilder_append_char(self): + jitdriver = JitDriver(reds=['n'], greens=[]) + def f(n): + while n > 0: + jitdriver.jit_merge_point(n=n) + sb = UnicodeBuilder() + sb.append(u"a") + sb.append(unichr(n)) + s = sb.build() + if len(s) != 2: raise ValueError + if s[0] != u"a": raise ValueError + if s[1] != unichr(n): raise ValueError + n -= 1 + return n + res = self.meta_interp(f, [10], backendopt=True) + assert res == 0 + self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2, + 'jump': 1}) + + def test_stringbuilder_append_1(self): + jitdriver = JitDriver(reds=['n'], greens=[]) + def f(n): + while n > 0: + jitdriver.jit_merge_point(n=n) + sb = UnicodeBuilder() + sb.append(u"ab") + s = sb.build() + if len(s) != 2: raise ValueError + if s[0] != u"a": raise ValueError + if s[1] != u"b": raise ValueError + n -= 1 + return n + res = self.meta_interp(f, [10], backendopt=True) + assert res == 0 + self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2, + 'jump': 1}) + + def test_stringbuilder_append_2(self): + jitdriver = JitDriver(reds=['n'], greens=[]) + def f(n): + while n > 0: + jitdriver.jit_merge_point(n=n) + sb = UnicodeBuilder() + sb.append(u"abc") + s = sb.build() + if len(s) != 3: raise ValueError + if s[0] != u"a": raise ValueError + if s[1] != u"b": raise ValueError + if s[2] != u"c": raise ValueError + n -= 1 + return n + res = self.meta_interp(f, [10], backendopt=True) + assert res == 0 + self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2, + 'jump': 1}) + + def test_stringbuilder_append_empty(self): + jitdriver = JitDriver(reds=['n'], greens=[]) + def f(n): + while n > 0: + jitdriver.jit_merge_point(n=n) + sb = UnicodeBuilder() + sb.append(u"") + s = sb.build() + if len(s) != 0: raise ValueError + n -= 1 + return n + res = self.meta_interp(f, [10], backendopt=True) + assert res == 0 + self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2, + 'jump': 1}) + + def test_stringbuilder_append_len2_1(self): + jitdriver = JitDriver(reds=['n', 'str1'], greens=[]) + def f(n): + str1 = unicode(str(n)) + while n > 0: + jitdriver.jit_merge_point(n=n, str1=str1) + sb = UnicodeBuilder() + sb.append(str1) + sb.append(u"ab") + s = sb.build() + if len(s) != 4: raise ValueError + if s[0] != u"1": raise ValueError + if s[1] != u"0": raise ValueError + if s[2] != u"a": raise ValueError + if s[3] != u"b": raise ValueError + n -= 1 + return n + res = self.meta_interp(f, [10], backendopt=True) + assert res == 0 + self.check_resops(call=2) # (ll_shrink_array) * 2 unroll + + def test_stringbuilder_append_len2_2(self): + jitdriver = JitDriver(reds=['n', 'str1'], greens=[]) + def f(n): + str1 = str(n) + while n > 0: + jitdriver.jit_merge_point(n=n, str1=str1) + sb = StringBuilder(4) + sb.append("a") + sb.append(str1) + s = sb.build() + if len(s) != 3: raise ValueError + if s[0] != "a": raise ValueError + if s[1] != "1": raise ValueError + if s[2] != "0": raise ValueError + n -= 1 + return n + res = self.meta_interp(f, [10], backendopt=True) + assert res == 0 + self.check_resops(call=2) # (ll_shrink_array) * 2 unroll + + def test_stringbuilder_append_slice_1(self): + jitdriver = JitDriver(reds=['n'], greens=[]) + def f(n): + while n > 0: + jitdriver.jit_merge_point(n=n) + sb = UnicodeBuilder() + sb.append_slice(u"abcdefghij", 1, n) + sb.append_slice(u"abcdefghij", 0, n) + s = sb.build() + if len(s) != 2 * n - 1: raise ValueError + n -= 1 + return n + res = self.meta_interp(f, [10], backendopt=True) + assert res == 0 + self.check_resops(call=2, # (ll_shrink_array) * 2 unroll + copyunicodecontent=4) + + def test_stringbuilder_append_slice_2(self): + jitdriver = JitDriver(reds=['n'], greens=[]) + def f(n): + while n > 0: + jitdriver.jit_merge_point(n=n) + sb = UnicodeBuilder() + sb.append_slice(u"fOo!", 1, 3) + s = sb.build() + if len(s) != 2: raise ValueError + if s[0] != u"O": raise ValueError + if s[1] != u"o": raise ValueError + n -= 1 + return n + res = self.meta_interp(f, [10], backendopt=True) + assert res == 0 + self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2, + 'jump': 1}) + + def test_stringbuilder_append_multiple_char_1(self): + jitdriver = JitDriver(reds=['n'], greens=[]) + def f(n): + while n > 0: + jitdriver.jit_merge_point(n=n) + sb = UnicodeBuilder() + sb.append_multiple_char(u"x", 3) + s = sb.build() + if len(s) != 3: raise ValueError + if s[0] != u"x": raise ValueError + if s[1] != u"x": raise ValueError + if s[2] != u"x": raise ValueError + n -= 1 + return n + res = self.meta_interp(f, [10], backendopt=True) + assert res == 0 + self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2, + 'jump': 1}) + + def test_stringbuilder_append_multiple_char_2(self): + jitdriver = JitDriver(reds=['n'], greens=[]) + def f(n): + while n > 0: + jitdriver.jit_merge_point(n=n) + sb = UnicodeBuilder() + sb.append_multiple_char(u"x", 5) + s = sb.build() + if len(s) != 5: raise ValueError + if s[0] != u"x": raise ValueError + if s[1] != u"x": raise ValueError + if s[2] != u"x": raise ValueError + if s[3] != u"x": raise ValueError + if s[4] != u"x": raise ValueError + n -= 1 + return n + res = self.meta_interp(f, [10], backendopt=True) + assert res == 0 + self.check_resops(call=4) # (append, build) * 2 unroll + + def test_stringbuilder_bug1(self): + jitdriver = JitDriver(reds=['n', 's1'], greens=[]) + @dont_look_inside + def escape(x): + pass + def f(n): + s1 = unicode(str(n) * 16) + while n > 0: + jitdriver.jit_merge_point(n=n, s1=s1) + sb = UnicodeBuilder(32) + sb.append(s1) + sb.append(u"\n\n") + s = sb.build() + if len(s) != 34: raise ValueError + n -= 1 + return n + f(10) + res = self.meta_interp(f, [10], backendopt=True) + assert res == 0 + + def test_stringbuilder_bug3(self): + jitdriver = JitDriver(reds=['n'], greens=[]) + IN = ['a' * 37, 'b' * 38, '22', '1', '333'] + JOINED = ''.join(IN) + def f(n): + while n > 0: + jitdriver.jit_merge_point(n=n) + sb = StringBuilder(36) + for s in IN: + sb.append(s) + s = sb.build() + if s != JOINED: + raise ValueError + n -= 1 + return n + f(10) + res = self.meta_interp(f, [10], backendopt=True) + assert res == 0 + def test_shrink_array(self): jitdriver = JitDriver(reds=['result', 'n'], greens=[]) _str, _StringBuilder = self._str, self._StringBuilder @@ -596,7 +837,7 @@ n -= 1 return result - res = self.meta_interp(f, [9]) + res = self.meta_interp(f, [9], backendopt=True) assert res == f(9) self.check_resops({ 'jump': 1, 'guard_true': 2, 'int_ge': 2, 'int_add': 2, 'int_sub': 2 diff --git a/rpython/rlib/rdynload.py b/rpython/rlib/rdynload.py --- a/rpython/rlib/rdynload.py +++ b/rpython/rlib/rdynload.py @@ -3,6 +3,7 @@ from rpython.rtyper.tool import rffi_platform from rpython.rtyper.lltypesystem import rffi +from rpython.rlib.objectmodel import we_are_translated from rpython.rlib.rarithmetic import r_uint from rpython.translator.tool.cbuild import ExternalCompilationInfo from rpython.translator.platform import platform @@ -83,6 +84,8 @@ # XXX this would never work on top of ll2ctypes, because # ctypes are calling dlerror itself, unsure if I can do much in this # area (nor I would like to) + if not we_are_translated(): + return "error info not available, not translated" res = c_dlerror() if not res: return "" diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py --- a/rpython/rlib/rgc.py +++ b/rpython/rlib/rgc.py @@ -231,6 +231,7 @@ @jit.oopspec('rgc.ll_shrink_array(p, smallerlength)') + at enforceargs(None, int) @specialize.ll() def ll_shrink_array(p, smallerlength): from rpython.rtyper.lltypesystem.lloperation import llop diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -356,31 +356,30 @@ class AbstractStringBuilder(object): + # This is not the real implementation! + def __init__(self, init_size=INIT_SIZE): - self.l = [] - self.size = 0 + self._l = [] + self._size = 0 def _grow(self, size): - try: - self.size = ovfcheck(self.size + size) - except OverflowError: - raise MemoryError + self._size += size def append(self, s): - assert isinstance(s, self.tp) - self.l.append(s) + assert isinstance(s, self._tp) + self._l.append(s) self._grow(len(s)) def append_slice(self, s, start, end): - assert isinstance(s, self.tp) + assert isinstance(s, self._tp) assert 0 <= start <= end <= len(s) s = s[start:end] - self.l.append(s) + self._l.append(s) self._grow(len(s)) def append_multiple_char(self, c, times): - assert isinstance(c, self.tp) - self.l.append(c * times) + assert isinstance(c, self._tp) + self._l.append(c * times) self._grow(times) def append_charpsize(self, s, size): @@ -388,22 +387,25 @@ l = [] for i in xrange(size): l.append(s[i]) - self.l.append(self.tp("").join(l)) + self._l.append(self._tp("").join(l)) self._grow(size) def build(self): - return self.tp("").join(self.l) + result = self._tp("").join(self._l) + assert len(result) == self._size + self._l = [result] + return result def getlength(self): - return len(self.build()) + return self._size class StringBuilder(AbstractStringBuilder): - tp = str + _tp = str class UnicodeBuilder(AbstractStringBuilder): - tp = unicode + _tp = unicode # ------------------------------------------------------------ diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py --- a/rpython/rlib/test/test_rstring.py +++ b/rpython/rlib/test/test_rstring.py @@ -160,7 +160,11 @@ s.append("a") s.append_slice("abc", 1, 2) s.append_multiple_char('d', 4) - assert s.build() == "aabcabdddd" + result = s.build() + assert result == "aabcabdddd" + assert result == s.build() + s.append("x") + assert s.build() == result + "x" def test_unicode_builder(): s = UnicodeBuilder() @@ -169,8 +173,9 @@ s.append_slice(u'abcdef', 1, 2) assert s.getlength() == len('aabcb') s.append_multiple_char(u'd', 4) - assert s.build() == 'aabcbdddd' - assert isinstance(s.build(), unicode) + result = s.build() + assert result == 'aabcbdddd' + assert isinstance(result, unicode) class TestTranslates(BaseRtypingTest): diff --git a/rpython/rtyper/annlowlevel.py b/rpython/rtyper/annlowlevel.py --- a/rpython/rtyper/annlowlevel.py +++ b/rpython/rtyper/annlowlevel.py @@ -79,13 +79,6 @@ return LowLevelAnnotatorPolicy.lowlevelspecialize(funcdesc, args_s, {}) default_specialize = staticmethod(default_specialize) - def specialize__semierased(funcdesc, args_s): - a2l = annotation_to_lltype - l2a = lltype_to_annotation - args_s[:] = [l2a(a2l(s)) for s in args_s] - return LowLevelAnnotatorPolicy.default_specialize(funcdesc, args_s) - specialize__semierased = staticmethod(specialize__semierased) - specialize__ll = default_specialize def specialize__ll_and_arg(funcdesc, args_s, *argindices): diff --git a/rpython/rtyper/lltypesystem/opimpl.py b/rpython/rtyper/lltypesystem/opimpl.py --- a/rpython/rtyper/lltypesystem/opimpl.py +++ b/rpython/rtyper/lltypesystem/opimpl.py @@ -179,6 +179,12 @@ def op_direct_ptradd(obj, index): checkptr(obj) assert is_valid_int(index) + if not obj: + raise AssertionError("direct_ptradd on null pointer") + ## assert isinstance(index, int) + ## assert not (0 <= index < 4096) + ## from rpython.rtyper.lltypesystem import rffi + ## return rffi.cast(lltype.typeOf(obj), index) return lltype.direct_ptradd(obj, index) diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -1,64 +1,139 @@ from rpython.rlib import rgc, jit from rpython.rlib.objectmodel import enforceargs -from rpython.rlib.rarithmetic import ovfcheck +from rpython.rlib.rarithmetic import ovfcheck, r_uint, intmask +from rpython.rlib.debug import ll_assert from rpython.rtyper.rptr import PtrRepr -from rpython.rtyper.lltypesystem import lltype, rstr +from rpython.rtyper.lltypesystem import lltype, rffi, rstr from rpython.rtyper.lltypesystem.lltype import staticAdtMethod, nullptr from rpython.rtyper.lltypesystem.rstr import (STR, UNICODE, char_repr, string_repr, unichar_repr, unicode_repr) from rpython.rtyper.rbuilder import AbstractStringBuilderRepr from rpython.tool.sourcetools import func_with_new_name -# Think about heuristics below, maybe we can come up with something -# better or at least compare it with list heuristics -GROW_FAST_UNTIL = 100 * 1024 * 1024 # 100 MB +# ------------------------------------------------------------ +# Basic idea: +# +# - A StringBuilder has a rstr.STR of the specified initial size +# (100 by default), which is filled gradually. +# +# - When it is full, we allocate extra buffers as an extra rstr.STR, +# and the already-filled one is added to a chained list of STRINGPIECE +# objects. +# +# - At build() time, we consolidate all these pieces into a single +# rstr.STR, which is both returned and re-attached to the StringBuilder, +# replacing the STRINGPIECEs. +# +# - The data is copied at most twice, and only once in case it fits +# into the initial size (and the GC supports shrinking the STR). +# +# XXX in build(), we could try keeping around a global weakref to the +# chain of STRINGPIECEs and reuse them the next time. +# +# ------------------------------------------------------------ -def new_grow_func(name, mallocfn, copycontentsfn): + +def always_inline(func): + func._always_inline_ = True + return func + + +def new_grow_funcs(name, mallocfn): + @enforceargs(None, int) def stringbuilder_grow(ll_builder, needed): - allocated = ll_builder.allocated - #if allocated < GROW_FAST_UNTIL: - # new_allocated = allocated << 1 - #else: - extra_size = allocated >> 2 try: - new_allocated = ovfcheck(allocated + extra_size) - new_allocated = ovfcheck(new_allocated + needed) + needed = ovfcheck(needed + ll_builder.total_size) + needed = ovfcheck(needed + 63) & ~63 + total_size = ll_builder.total_size + needed except OverflowError: raise MemoryError - newbuf = mallocfn(new_allocated) - copycontentsfn(ll_builder.buf, newbuf, 0, 0, ll_builder.used) - ll_builder.buf = newbuf - ll_builder.allocated = new_allocated - return func_with_new_name(stringbuilder_grow, name) + # + new_string = mallocfn(needed) + # + PIECE = lltype.typeOf(ll_builder.extra_pieces).TO + old_piece = lltype.malloc(PIECE) + old_piece.buf = ll_builder.current_buf + old_piece.prev_piece = ll_builder.extra_pieces + ll_assert(bool(old_piece.buf), "no buf??") + ll_builder.current_buf = new_string + ll_builder.current_pos = 0 + ll_builder.current_end = needed + ll_builder.total_size = total_size + ll_builder.extra_pieces = old_piece -stringbuilder_grow = new_grow_func('stringbuilder_grow', rstr.mallocstr, - rstr.copy_string_contents) -unicodebuilder_grow = new_grow_func('unicodebuilder_grow', rstr.mallocunicode, - rstr.copy_unicode_contents) + def stringbuilder_append_overflow(ll_builder, ll_str, size): + # First, the part that still fits in the current piece + part1 = ll_builder.current_end - ll_builder.current_pos + start = ll_builder.skip + ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, + start, ll_builder.current_pos, + part1) + ll_builder.skip += part1 + stringbuilder_grow(ll_builder, size - part1) + + def stringbuilder_append_overflow_2(ll_builder, char0): + # Overflow when writing two chars. There are two cases depending + # on whether one char still fits or not. + if ll_builder.current_pos < ll_builder.current_end: + ll_builder.current_buf.chars[ll_builder.current_pos] = char0 + ll_builder.skip = 1 + stringbuilder_grow(ll_builder, 2) + + return (func_with_new_name(stringbuilder_grow, '%s_grow' % name), + func_with_new_name(stringbuilder_append_overflow, + '%s_append_overflow' % name), + func_with_new_name(stringbuilder_append_overflow_2, + '%s_append_overflow_2' % name)) + +stringbuilder_grows = new_grow_funcs('stringbuilder', rstr.mallocstr) +unicodebuilder_grows = new_grow_funcs('unicodebuilder', rstr.mallocunicode) + +STRINGPIECE = lltype.GcStruct('stringpiece', + ('buf', lltype.Ptr(STR)), + ('prev_piece', lltype.Ptr(lltype.GcForwardReference()))) +STRINGPIECE.prev_piece.TO.become(STRINGPIECE) STRINGBUILDER = lltype.GcStruct('stringbuilder', - ('allocated', lltype.Signed), - ('used', lltype.Signed), - ('buf', lltype.Ptr(STR)), + ('current_buf', lltype.Ptr(STR)), + ('current_pos', lltype.Signed), + ('current_end', lltype.Signed), + ('total_size', lltype.Signed), + ('skip', lltype.Signed), + ('extra_pieces', lltype.Ptr(STRINGPIECE)), adtmeths={ - 'grow': staticAdtMethod(stringbuilder_grow), + 'grow': staticAdtMethod(stringbuilder_grows[0]), + 'append_overflow': staticAdtMethod(stringbuilder_grows[1]), + 'append_overflow_2': staticAdtMethod(stringbuilder_grows[2]), + 'copy_string_contents': staticAdtMethod(rstr.copy_string_contents), 'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_string), + 'mallocfn': staticAdtMethod(rstr.mallocstr), } ) +UNICODEPIECE = lltype.GcStruct('unicodepiece', + ('buf', lltype.Ptr(UNICODE)), + ('prev_piece', lltype.Ptr(lltype.GcForwardReference()))) +UNICODEPIECE.prev_piece.TO.become(UNICODEPIECE) + UNICODEBUILDER = lltype.GcStruct('unicodebuilder', - ('allocated', lltype.Signed), - ('used', lltype.Signed), - ('buf', lltype.Ptr(UNICODE)), + ('current_buf', lltype.Ptr(UNICODE)), + ('current_pos', lltype.Signed), + ('current_end', lltype.Signed), + ('total_size', lltype.Signed), + ('skip', lltype.Signed), + ('extra_pieces', lltype.Ptr(UNICODEPIECE)), adtmeths={ - 'grow': staticAdtMethod(unicodebuilder_grow), + 'grow': staticAdtMethod(unicodebuilder_grows[0]), + 'append_overflow': staticAdtMethod(unicodebuilder_grows[1]), + 'append_overflow_2': staticAdtMethod(unicodebuilder_grows[2]), + 'copy_string_contents': staticAdtMethod(rstr.copy_unicode_contents), 'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_unicode), + 'mallocfn': staticAdtMethod(rstr.mallocunicode), } ) -MAX = 16*1024*1024 class BaseStringBuilderRepr(AbstractStringBuilderRepr): def empty(self): @@ -66,72 +141,206 @@ @classmethod def ll_new(cls, init_size): - if init_size < 0: - init_size = MAX + # Clamp 'init_size' to be a value between 0 and 1280. + # Negative values are mapped to 1280. + init_size = intmask(min(r_uint(init_size), r_uint(1280))) ll_builder = lltype.malloc(cls.lowleveltype.TO) - ll_builder.allocated = init_size - ll_builder.used = 0 - ll_builder.buf = cls.mallocfn(init_size) + ll_builder.current_buf = cls.mallocfn(init_size) + ll_builder.current_pos = 0 + ll_builder.current_end = init_size + ll_builder.total_size = init_size return ll_builder @staticmethod + @always_inline def ll_append(ll_builder, ll_str): - used = ll_builder.used - lgt = len(ll_str.chars) - needed = lgt + used - if needed > ll_builder.allocated: - ll_builder.grow(ll_builder, lgt) - ll_str.copy_contents(ll_str, ll_builder.buf, 0, used, lgt) - ll_builder.used = needed + BaseStringBuilderRepr.ll_append_slice(ll_builder, ll_str, + 0, len(ll_str.chars)) @staticmethod + @always_inline def ll_append_char(ll_builder, char): - if ll_builder.used == ll_builder.allocated: - ll_builder.grow(ll_builder, 1) - ll_builder.buf.chars[ll_builder.used] = char - ll_builder.used += 1 + jit.conditional_call(ll_builder.current_pos == ll_builder.current_end, + ll_builder.grow, ll_builder, 1) + pos = ll_builder.current_pos + ll_builder.current_pos = pos + 1 + ll_builder.current_buf.chars[pos] = char @staticmethod - def ll_append_slice(ll_builder, ll_str, start, end): - needed = end - start - used = ll_builder.used - if needed + used > ll_builder.allocated: - ll_builder.grow(ll_builder, needed) - assert needed >= 0 - ll_str.copy_contents(ll_str, ll_builder.buf, start, used, needed) - ll_builder.used = needed + used + def ll_append_char_2(ll_builder, char0, char1): + # this is only used by the JIT, when appending a small, known-length + # string. Unlike two consecutive ll_append_char(), it can do that + # with only one conditional_call. + ll_builder.skip = 2 + jit.conditional_call( + ll_builder.current_end - ll_builder.current_pos < 2, + ll_builder.append_overflow_2, ll_builder, char0) + pos = ll_builder.current_pos + buf = ll_builder.current_buf + buf.chars[pos] = char0 + pos += ll_builder.skip + ll_builder.current_pos = pos + buf.chars[pos - 1] = char1 + # NB. this usually writes into buf.chars[current_pos] and + # buf.chars[current_pos+1], except if we had an overflow right + # in the middle of the two chars. In that case, 'skip' is set to + # 1 and only one char is written: the 'char1' overrides the 'char0'. @staticmethod - @jit.look_inside_iff(lambda ll_builder, char, times: jit.isconstant(times) and times <= 4) - def ll_append_multiple_char(ll_builder, char, times): - used = ll_builder.used - if times + used > ll_builder.allocated: - ll_builder.grow(ll_builder, times) - for i in range(times): - ll_builder.buf.chars[used] = char - used += 1 - ll_builder.used = used + @always_inline + def ll_append_slice(ll_builder, ll_str, start, end): + size = end - start + if jit.we_are_jitted(): + if BaseStringBuilderRepr._ll_jit_try_append_slice( + ll_builder, ll_str, start, size): + return + ll_builder.skip = start + jit.conditional_call( + size > ll_builder.current_end - ll_builder.current_pos, + ll_builder.append_overflow, ll_builder, ll_str, size) + start = ll_builder.skip + size = end - start + pos = ll_builder.current_pos + ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, + start, pos, size) + ll_builder.current_pos = pos + size @staticmethod - def ll_append_charpsize(ll_builder, charp, size): - used = ll_builder.used - if used + size > ll_builder.allocated: - ll_builder.grow(ll_builder, size) - ll_builder.copy_raw_to_string(charp, ll_builder.buf, used, size) - ll_builder.used += size + def _ll_jit_try_append_slice(ll_builder, ll_str, start, size): + if jit.isconstant(size): + if size == 0: + return True + if size == 1: + BaseStringBuilderRepr.ll_append_char(ll_builder, + ll_str.chars[start]) + return True + if size == 2: + BaseStringBuilderRepr.ll_append_char_2(ll_builder, + ll_str.chars[start], + ll_str.chars[start + 1]) + return True + return False # use the fall-back path @staticmethod - def ll_getlength(ll_builder): - return ll_builder.used + @always_inline + def ll_append_multiple_char(ll_builder, char, times): + if jit.we_are_jitted(): + if BaseStringBuilderRepr._ll_jit_try_append_multiple_char( + ll_builder, char, times): + return + BaseStringBuilderRepr._ll_append_multiple_char(ll_builder, char, times) @staticmethod + @jit.dont_look_inside + def _ll_append_multiple_char(ll_builder, char, times): + part1 = ll_builder.current_end - ll_builder.current_pos + if times > part1: + times -= part1 + buf = ll_builder.current_buf + for i in xrange(ll_builder.current_pos, ll_builder.current_end): + buf.chars[i] = char + ll_builder.grow(ll_builder, times) + # + buf = ll_builder.current_buf + pos = ll_builder.current_pos + end = pos + times + ll_builder.current_pos = end + for i in xrange(pos, end): + buf.chars[i] = char + + @staticmethod + def _ll_jit_try_append_multiple_char(ll_builder, char, size): + if jit.isconstant(size): + if size == 0: + return True + if size == 1: + BaseStringBuilderRepr.ll_append_char(ll_builder, char) + return True + if size == 2: + BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char) + return True + if size == 3: + BaseStringBuilderRepr.ll_append_char(ll_builder, char) + BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char) + return True + if size == 4: + BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char) + BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char) + return True + return False # use the fall-back path + + @staticmethod + @jit.dont_look_inside + def ll_append_charpsize(ll_builder, charp, size): + part1 = ll_builder.current_end - ll_builder.current_pos + if size > part1: + # First, the part that still fits + ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, + ll_builder.current_pos, part1) + charp = rffi.ptradd(charp, part1) + size -= part1 + ll_builder.grow(ll_builder, size) + # + pos = ll_builder.current_pos + ll_builder.current_pos = pos + size + ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, pos, size) + + @staticmethod + @always_inline + def ll_getlength(ll_builder): + num_chars_missing_from_last_piece = ( + ll_builder.current_end - ll_builder.current_pos) + return ll_builder.total_size - num_chars_missing_from_last_piece + + @staticmethod + @jit.look_inside_iff(lambda ll_builder: jit.isvirtual(ll_builder)) def ll_build(ll_builder): - final_size = ll_builder.used - assert final_size >= 0 - if final_size < ll_builder.allocated: - ll_builder.allocated = final_size - ll_builder.buf = rgc.ll_shrink_array(ll_builder.buf, final_size) - return ll_builder.buf + # NB. usually the JIT doesn't look inside this function; it does + # so only in the simplest example where it could virtualize everything + if ll_builder.extra_pieces: + BaseStringBuilderRepr._ll_fold_pieces(ll_builder) + elif ll_builder.current_pos != ll_builder.total_size: + BaseStringBuilderRepr._ll_shrink_final(ll_builder) + return ll_builder.current_buf + + @staticmethod + def _ll_shrink_final(ll_builder): + final_size = ll_builder.current_pos + ll_assert(final_size <= ll_builder.total_size, + "final_size > ll_builder.total_size?") + buf = rgc.ll_shrink_array(ll_builder.current_buf, final_size) + ll_builder.current_buf = buf + ll_builder.current_end = final_size + ll_builder.total_size = final_size + + @staticmethod + def _ll_fold_pieces(ll_builder): + final_size = BaseStringBuilderRepr.ll_getlength(ll_builder) + ll_assert(final_size >= 0, "negative final_size") + extra = ll_builder.extra_pieces + ll_builder.extra_pieces = lltype.nullptr(lltype.typeOf(extra).TO) + # + result = ll_builder.mallocfn(final_size) + piece = ll_builder.current_buf + piece_lgt = ll_builder.current_pos + ll_assert(ll_builder.current_end == len(piece.chars), + "bogus last piece_lgt") + ll_builder.total_size = final_size + ll_builder.current_buf = result + ll_builder.current_pos = final_size + ll_builder.current_end = final_size + + dst = final_size + while True: + dst -= piece_lgt + ll_assert(dst >= 0, "rbuilder build: overflow") + ll_builder.copy_string_contents(piece, result, 0, dst, piece_lgt) + if not extra: + break + piece = extra.buf + piece_lgt = len(piece.chars) + extra = extra.prev_piece + ll_assert(dst == 0, "rbuilder build: underflow") @classmethod def ll_bool(cls, ll_builder): diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -681,25 +681,25 @@ from rpython.rtyper.lltypesystem.rstr import (STR as STRTYPE, copy_string_to_raw, copy_raw_to_string, - copy_string_contents) + copy_string_contents, + mallocstr as mallocfn) from rpython.rtyper.annlowlevel import llstr as llstrtype from rpython.rtyper.annlowlevel import hlstr as hlstrtype TYPEP = CCHARP ll_char_type = lltype.Char lastchar = '\x00' - builder_class = StringBuilder else: from rpython.rtyper.lltypesystem.rstr import ( UNICODE as STRTYPE, copy_unicode_to_raw as copy_string_to_raw, copy_raw_to_unicode as copy_raw_to_string, - copy_unicode_contents as copy_string_contents) + copy_unicode_contents as copy_string_contents, + mallocunicode as mallocfn) from rpython.rtyper.annlowlevel import llunicode as llstrtype from rpython.rtyper.annlowlevel import hlunicode as hlstrtype TYPEP = CWCHARP ll_char_type = lltype.UniChar lastchar = u'\x00' - builder_class = UnicodeBuilder # str -> char* def str2charp(s, track_allocation=True): @@ -728,12 +728,7 @@ size = 0 while cp[size] != lastchar: size += 1 - b = builder_class(size) - i = 0 - while cp[i] != lastchar: - b.append(cp[i]) - i += 1 - return assert_str0(b.build()) + return assert_str0(charpsize2str(cp, size)) # str -> char* # Can't inline this because of the raw address manipulation. @@ -829,18 +824,18 @@ # char* -> str, with an upper bound on the length in case there is no \x00 @enforceargs(None, int) def charp2strn(cp, maxlen): - b = builder_class(maxlen) - i = 0 - while i < maxlen and cp[i] != lastchar: - b.append(cp[i]) - i += 1 - return assert_str0(b.build()) + size = 0 + while size < maxlen and cp[size] != lastchar: + size += 1 + return assert_str0(charpsize2str(cp, size)) # char* and size -> str (which can contain null bytes) def charpsize2str(cp, size): - b = builder_class(size) - b.append_charpsize(cp, size) - return b.build() + ll_str = mallocfn(size) + copy_raw_to_string(cp, ll_str, 0, size) + result = hlstrtype(ll_str) + assert result is not None + return result charpsize2str._annenforceargs_ = [None, int] return (str2charp, free_charp, charp2str, diff --git a/rpython/rtyper/lltypesystem/rstr.py b/rpython/rtyper/lltypesystem/rstr.py --- a/rpython/rtyper/lltypesystem/rstr.py +++ b/rpython/rtyper/lltypesystem/rstr.py @@ -4,7 +4,7 @@ from rpython.rlib import jit, types from rpython.rlib.debug import ll_assert from rpython.rlib.objectmodel import (malloc_zero_filled, we_are_translated, - _hash_string, keepalive_until_here, specialize) + _hash_string, keepalive_until_here, specialize, enforceargs) from rpython.rlib.signature import signature from rpython.rlib.rarithmetic import ovfcheck from rpython.rtyper.error import TyperError @@ -32,13 +32,13 @@ UNICODE = GcForwardReference() def new_malloc(TP, name): + @enforceargs(int) def mallocstr(length): ll_assert(length >= 0, "negative string length") r = malloc(TP, length) if not we_are_translated() or not malloc_zero_filled: r.hash = 0 return r - mallocstr._annspecialcase_ = 'specialize:semierased' return func_with_new_name(mallocstr, name) mallocstr = new_malloc(STR, 'mallocstr') @@ -77,6 +77,10 @@ # are obscurely essential to make sure that the strings stay alive # longer than the raw_memcopy(). assert length >= 0 + ll_assert(srcstart >= 0, "copystrc: negative srcstart") + ll_assert(srcstart + length <= len(src.chars), "copystrc: src ovf") + ll_assert(dststart >= 0, "copystrc: negative dststart") + ll_assert(dststart + length <= len(dst.chars), "copystrc: dst ovf") # from here, no GC operations can happen src = _get_raw_buf(SRC_TP, src, srcstart) dst = _get_raw_buf(DST_TP, dst, dststart) diff --git a/rpython/rtyper/lltypesystem/test/test_rffi.py b/rpython/rtyper/lltypesystem/test/test_rffi.py --- a/rpython/rtyper/lltypesystem/test/test_rffi.py +++ b/rpython/rtyper/lltypesystem/test/test_rffi.py @@ -81,6 +81,21 @@ xf = self.compile(f, [], backendopt=False) assert xf() == 4 + def test_charp2str_exact_result(self): + from rpython.annotator.annrpython import RPythonAnnotator + from rpython.rtyper.llannotation import SomePtr + a = RPythonAnnotator() + s = a.build_types(charpsize2str, [SomePtr(CCHARP), int]) + assert s.knowntype == str + assert s.can_be_None is False + assert s.no_nul is False + # + a = RPythonAnnotator() + s = a.build_types(charp2str, [SomePtr(CCHARP)]) + assert s.knowntype == str + assert s.can_be_None is False + assert s.no_nul is True + def test_string_reverse(self): c_source = py.code.Source(""" #include diff --git a/rpython/rtyper/test/test_rbuilder.py b/rpython/rtyper/test/test_rbuilder.py --- a/rpython/rtyper/test/test_rbuilder.py +++ b/rpython/rtyper/test/test_rbuilder.py @@ -3,26 +3,96 @@ import py from rpython.rlib.rstring import StringBuilder, UnicodeBuilder -from rpython.rtyper.annlowlevel import llstr, hlstr +from rpython.rtyper.annlowlevel import llstr, hlstr, llunicode, hlunicode from rpython.rtyper.lltypesystem import rffi -from rpython.rtyper.lltypesystem.rbuilder import StringBuilderRepr +from rpython.rtyper.lltypesystem.rbuilder import StringBuilderRepr, UnicodeBuilderRepr from rpython.rtyper.test.tool import BaseRtypingTest class TestStringBuilderDirect(object): + def test_nooveralloc(self): + sb = StringBuilderRepr.ll_new(33) + StringBuilderRepr.ll_append(sb, llstr("abc" * 11)) + assert StringBuilderRepr.ll_getlength(sb) == 33 + s = StringBuilderRepr.ll_build(sb) + assert hlstr(s) == "abc" * 11 + assert StringBuilderRepr.ll_getlength(sb) == 33 + + def test_shrinking(self): + sb = StringBuilderRepr.ll_new(100) + StringBuilderRepr.ll_append(sb, llstr("abc" * 11)) + assert StringBuilderRepr.ll_getlength(sb) == 33 + s = StringBuilderRepr.ll_build(sb) + assert hlstr(s) == "abc" * 11 + assert StringBuilderRepr.ll_getlength(sb) == 33 + def test_simple(self): sb = StringBuilderRepr.ll_new(3) StringBuilderRepr.ll_append_char(sb, 'x') StringBuilderRepr.ll_append(sb, llstr("abc")) StringBuilderRepr.ll_append_slice(sb, llstr("foobar"), 2, 5) StringBuilderRepr.ll_append_multiple_char(sb, 'y', 3) + assert StringBuilderRepr.ll_getlength(sb) == 10 s = StringBuilderRepr.ll_build(sb) assert hlstr(s) == "xabcobayyy" + assert StringBuilderRepr.ll_getlength(sb) == 10 - def test_nooveralloc(self): - sb = StringBuilderRepr.ll_new(3) - StringBuilderRepr.ll_append(sb, llstr("abc")) - assert StringBuilderRepr.ll_build(sb) == sb.buf + def test_grow_when_append_char(self): + sb = StringBuilderRepr.ll_new(33) + StringBuilderRepr.ll_append(sb, llstr("abc" * 11)) + StringBuilderRepr.ll_append_char(sb, "d") + s = StringBuilderRepr.ll_build(sb) + assert hlstr(s) == "abc" * 11 + "d" + + def test_grow_two_halves(self): + sb = StringBuilderRepr.ll_new(32) + StringBuilderRepr.ll_append(sb, llstr("abc" * 11)) + s = StringBuilderRepr.ll_build(sb) + assert hlstr(s) == "abc" * 11 + + def test_grow_when_exactly_full(self): + sb = StringBuilderRepr.ll_new(33) + StringBuilderRepr.ll_append(sb, llstr("abc" * 11)) + StringBuilderRepr.ll_append(sb, llstr("def")) + s = StringBuilderRepr.ll_build(sb) + assert hlstr(s) == "abc" * 11 + "def" + + def test_charp(self): + sb = StringBuilderRepr.ll_new(32) + with rffi.scoped_str2charp("hello world") as p: + StringBuilderRepr.ll_append_charpsize(sb, p, 12) + with rffi.scoped_str2charp("0123456789abcdefghijklmn") as p: + StringBuilderRepr.ll_append_charpsize(sb, p, 24) + s = StringBuilderRepr.ll_build(sb) + assert hlstr(s) == "hello world\x000123456789abcdefghijklmn" + + def test_unicode(self): + sb = UnicodeBuilderRepr.ll_new(32) + UnicodeBuilderRepr.ll_append_char(sb, u'x') + UnicodeBuilderRepr.ll_append(sb, llunicode(u"abc")) + UnicodeBuilderRepr.ll_append_slice(sb, llunicode(u"foobar"), 2, 5) + UnicodeBuilderRepr.ll_append_multiple_char(sb, u'y', 30) + u = UnicodeBuilderRepr.ll_build(sb) + assert hlunicode(u) == u"xabcoba" + u"y" * 30 + + def test_several_builds(self): + sb = StringBuilderRepr.ll_new(32) + s = StringBuilderRepr.ll_build(sb) + assert hlstr(s) == "" + assert s == StringBuilderRepr.ll_build(sb) + assert s == StringBuilderRepr.ll_build(sb) + # + sb = StringBuilderRepr.ll_new(32) + StringBuilderRepr.ll_append(sb, llstr("abcdefgh" * 3)) # not full + s = StringBuilderRepr.ll_build(sb) + assert hlstr(s) == "abcdefgh" * 3 + assert s == StringBuilderRepr.ll_build(sb) + assert s == StringBuilderRepr.ll_build(sb) + StringBuilderRepr.ll_append(sb, llstr("extra")) # overflow + s = StringBuilderRepr.ll_build(sb) + assert hlstr(s) == "abcdefgh" * 3 + "extra" + assert s == StringBuilderRepr.ll_build(sb) + assert s == StringBuilderRepr.ll_build(sb) class TestStringBuilder(BaseRtypingTest): @@ -39,25 +109,25 @@ def test_overallocation(self): def func(): - s = StringBuilder(4) - s.append("abcd") - s.append("defg") + s = StringBuilder(34) + s.append("abcd" * 5) + s.append("defg" * 5) s.append("rty") return s.build() res = self.ll_to_string(self.interpret(func, [])) - assert res == "abcddefgrty" + assert res == "abcd" * 5 + "defg" * 5 + "rty" def test_unicode(self): def func(): - s = UnicodeBuilder() + s = UnicodeBuilder(32) s.append(u'a') s.append(u'abc') s.append(u'abcdef') s.append_slice(u'abc', 1, 2) - s.append_multiple_char(u'u', 4) + s.append_multiple_char(u'u', 40) return s.build() res = self.ll_to_unicode(self.interpret(func, [])) - assert res == 'aabcabcdefbuuuu' + assert res == u'aabcabcdefb' + u'u' * 40 assert isinstance(res, unicode) def test_string_getlength(self): diff --git a/rpython/translator/c/src/mem.h b/rpython/translator/c/src/mem.h --- a/rpython/translator/c/src/mem.h +++ b/rpython/translator/c/src/mem.h @@ -117,6 +117,7 @@ #define OP_BOEHM_DISAPPEARING_LINK(link, obj, r) /* nothing */ #define OP_GC__DISABLE_FINALIZERS(r) /* nothing */ #define OP_GC__ENABLE_FINALIZERS(r) /* nothing */ +#define GC_REGISTER_FINALIZER(a,b,c,d,e) /* nothing */ #endif /************************************************************/ diff --git a/rpython/translator/c/test/test_newgc.py b/rpython/translator/c/test/test_newgc.py --- a/rpython/translator/c/test/test_newgc.py +++ b/rpython/translator/c/test/test_newgc.py @@ -1363,6 +1363,23 @@ assert res == ' '.join([''.join(map(chr, range(33, 33+length))) for length in range(1, 51)]) + def definestr_string_builder_multiple_builds_2(cls): + def fn(_): + got = [] + for j in range(3, 76, 5): + s = StringBuilder() + for i in range(j): + s.append(chr(33+i)) + gc.collect() + got.append(s.build()) + return ' '.join(got) + return fn + + def test_string_builder_multiple_builds_2(self): + res = self.run('string_builder_multiple_builds_2') + assert res == ' '.join([''.join(map(chr, range(33, 33+length))) + for length in range(3, 76, 5)]) + def define_nursery_hash_base(cls): class A: pass diff --git a/rpython/translator/c/test/test_standalone.py b/rpython/translator/c/test/test_standalone.py --- a/rpython/translator/c/test/test_standalone.py +++ b/rpython/translator/c/test/test_standalone.py @@ -960,6 +960,50 @@ self.compile(entry_point) # assert did not explode + def test_unicode_builder(self): + import random + from rpython.rlib.rstring import UnicodeBuilder + + to_do = [] + for i in range(15000): + to_do.append(random.randrange(0, 100000)) + to_do.append(0) + + expected = [] + s = '' + for x in to_do: + if x < 1500: + expected.append("``%s''" % (s,)) + if x < 1000: + s = '' + elif x < 20000: + s += chr(32 + (x & 63)) + elif x < 30000: + s += chr(32 + (x & 63)) * (x % 93) + else: + s += str(x) + expected = '\n'.join(expected) + + def entry_point(argv): + b = UnicodeBuilder(32) + for x in to_do: + if x < 1500: + print "``%s''" % str(b.build()) + if x < 1000: + b = UnicodeBuilder(32) + elif x < 20000: + b.append(unichr(32 + (x & 63))) + elif x < 30000: + b.append_multiple_char(unichr(32 + (x & 63)), x % 93) + else: + b.append(unicode(str(x))) + return 0 + + t, cbuilder = self.compile(entry_point) + out = cbuilder.cmdexec('') + assert out.strip() == expected + + class TestMaemo(TestStandalone): def setup_class(cls): py.test.skip("TestMaemo: tests skipped for now") From noreply at buildbot.pypy.org Mon Jun 16 11:02:47 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Mon, 16 Jun 2014 11:02:47 +0200 (CEST) Subject: [pypy-commit] extradoc extradoc: fix some references Message-ID: <20140616090247.907721D2E65@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: extradoc Changeset: r5345:0b30cb1cdf7e Date: 2014-06-16 11:02 +0200 http://bitbucket.org/pypy/extradoc/changeset/0b30cb1cdf7e/ Log: fix some references diff --git a/talk/dls2014/paper/paper.tex b/talk/dls2014/paper/paper.tex --- a/talk/dls2014/paper/paper.tex +++ b/talk/dls2014/paper/paper.tex @@ -140,13 +140,13 @@ % Virtual Memory Assisted Transactional Memory for Dynamic Languages} \authorinfo{Remigius Meier} - {Department of Computer Science\\ ETH Zürich} + {Department of Computer Science\\ ETH Zürich, Switzerland} {remi.meier at inf.ethz.ch} \authorinfo{Armin Rigo} {www.pypy.org} {arigo at tunes.org} \authorinfo{Thomas Gross} - {Department of Computer Science\\ ETH Zürich} + {Department of Computer Science\\ ETH Zürich, Switzerland} {thomas.gross at inf.ethz.ch} \maketitle diff --git a/talk/icooolps2014/position-paper.tex b/talk/icooolps2014/position-paper.tex --- a/talk/icooolps2014/position-paper.tex +++ b/talk/icooolps2014/position-paper.tex @@ -49,10 +49,10 @@ %% \preprintfooter{short description of paper} % 'preprint' option specified. \title{A Way Forward in Parallelising Dynamic Languages} -\subtitle{Position Paper, ICOOOLPS'14} +%\subtitle{Position Paper, ICOOOLPS'14} \authorinfo{Remigius Meier} - {Department of Computer Science\\ ETH Zürich} + {Department of Computer Science\\ ETH Zürich, Switzerland} {remi.meier at inf.ethz.ch} \authorinfo{Armin Rigo} {www.pypy.org} @@ -496,18 +496,20 @@ Principles and practice of parallel programming.} ACM, 2014. \bibitem{warmhoff13} - Wamhoff, Jons-Tobias, et al. "FastLane: improving performance of - software transactional memory for low thread counts." - \emph{Proceedings of the 18th ACM SIGPLAN symposium on Principles - and practice of parallel programming.} ACM, 2013. + Jons-Tobias Wamhoff, Christof Fetzer, Pascal Felber, Etienne Rivière, + and Gilles Muller. 2013. FastLane: improving performance of software + transactional memory for low thread counts. \emph{SIGPLAN Not.} 48, 8 + (February 2013), 113-122. -\bibitem{drago11} - Dragojević, Aleksandar, et al. "Why STM can be more than a research - toy." \emph{Communications of the ACM} 54.4 (2011): 70-77. +\bibitem{drago11} Aleksandar Dragojević, Pascal Felber, Vincent + Gramoli, and Rachid Guerraoui. 2011. Why STM can be more than a + research toy. \emph{Commun. ACM} 54, 4 (April 2011), 70-77. \bibitem{cascaval08} - Cascaval, Calin, et al. "Software transactional memory: Why is it - only a research toy?." \emph{Queue} 6.5 (2008): 40. + Calin Cascaval, Colin Blundell, Maged Michael, Harold W. Cain, Peng + Wu, Stefanie Chiras, and Siddhartha Chatterjee. 2008. Software + transactional memory: why is it only a research + toy?. \emph{Commun. ACM} 51, 11 (November 2008), 40-46. \bibitem{nicholas06} Nicholas Riley and Craig Zilles. 2006. Hardware transactional memory @@ -522,8 +524,10 @@ Comput. Archit. News 38}, 5 (April 2010) \bibitem{felber07} - Felber, Pascal, et al. "Transactifying applications using an open - compiler framework." \emph{TRANSACT}, August (2007): 4-6. + Pascal Felber and Torvald Riegel and Christof Fetzer and Martin + Süßkraut and Ulrich Müller and Heiko Sturzrehm. 2007. Transactifying + applications using an open compiler framework. \emph{TRANSACT}, August + (2007): 4-6. \bibitem{bill06} Bill McCloskey, Feng Zhou, David Gay, and Eric @@ -533,8 +537,11 @@ New York, NY, USA \bibitem{spear09} - Spear, Michael F., et al. "Transactional mutex locks." \emph{SIGPLAN - Workshop on Transactional Computing.} 2009. + Luke Dalessandro, Dave Dice, Michael Scott, Nir Shavit, and Michael + Spear. 2010. Transactional mutex locks. In \emph{Proceedings of the + 16th international Euro-Par conference on Parallel processing: Part + II} (Euro-Par'10), Pasqua D'Ambra, Mario Guarracino, and Domenico + Talia (Eds.). Springer-Verlag, Berlin, Heidelberg, 2-13. \bibitem{lamport79} Lamport, Leslie. "How to make a multiprocessor computer that From noreply at buildbot.pypy.org Mon Jun 16 11:18:25 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Mon, 16 Jun 2014 11:18:25 +0200 (CEST) Subject: [pypy-commit] extradoc extradoc: add copyright data Message-ID: <20140616091825.15A2E1D2EF2@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: extradoc Changeset: r5346:7574b43d330e Date: 2014-06-16 11:18 +0200 http://bitbucket.org/pypy/extradoc/changeset/7574b43d330e/ Log: add copyright data diff --git a/talk/icooolps2014/position-paper.tex b/talk/icooolps2014/position-paper.tex --- a/talk/icooolps2014/position-paper.tex +++ b/talk/icooolps2014/position-paper.tex @@ -30,10 +30,10 @@ \setlength{\pdfpageheight}{\paperheight} \setlength{\pdfpagewidth}{\paperwidth} -\conferenceinfo{ICOOOLPS workshop 2014}{July 28th, 2014, Uppsala, Sweden} +\conferenceinfo{ICOOOLPS'14}{July 28 2014, Uppsala, Sweden} \copyrightyear{2014} -%\copyrightdata{978-1-nnnn-nnnn-n/yy/mm} -\doi{nnnnnnn.nnnnnnn} +\copyrightdata{978-1-4503-2914-9/14/07} +\doi{2633301.2633305} % Uncomment one of the following two, if you are not going for the % traditional copyright transfer agreement. From noreply at buildbot.pypy.org Mon Jun 16 15:41:22 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 16 Jun 2014 15:41:22 +0200 (CEST) Subject: [pypy-commit] cffi default: Force the "strides" to be non-NULL. With a NULL strides, CPython >= 3.3 Message-ID: <20140616134122.7E5221C1068@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r1513:9461998086e5 Date: 2014-06-16 15:41 +0200 http://bitbucket.org/cffi/cffi/changeset/9461998086e5/ Log: Force the "strides" to be non-NULL. With a NULL strides, CPython >= 3.3 seems to segfault when doing "mymemoryview[:5] = ffi.buffer(..)". I have no clue how we're supposed to use this messy interface. diff --git a/c/minibuffer.h b/c/minibuffer.h --- a/c/minibuffer.h +++ b/c/minibuffer.h @@ -105,8 +105,12 @@ static int mb_getbuf(MiniBufferObj *self, Py_buffer *view, int flags) { - return PyBuffer_FillInfo(view, NULL, self->mb_data, self->mb_size, - /*readonly=*/0, PyBUF_CONTIG | PyBUF_FORMAT); + static Py_ssize_t dummy_stride = 1; + int res = PyBuffer_FillInfo(view, (PyObject *)self, + self->mb_data, self->mb_size, + /*readonly=*/0, PyBUF_CONTIG | PyBUF_FORMAT); + view->strides = &dummy_stride; + return res; } static PySequenceMethods mb_as_sequence = { From noreply at buildbot.pypy.org Mon Jun 16 16:21:03 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 16 Jun 2014 16:21:03 +0200 (CEST) Subject: [pypy-commit] pypy default: Revert a small part of 5c08e05e5ee8, with a theory written down in the comments. Message-ID: <20140616142103.4BEFE1C06BC@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72073:612b61347513 Date: 2014-06-16 16:20 +0200 http://bitbucket.org/pypy/pypy/changeset/612b61347513/ Log: Revert a small part of 5c08e05e5ee8, with a theory written down in the comments. diff --git a/rpython/jit/metainterp/heapcache.py b/rpython/jit/metainterp/heapcache.py --- a/rpython/jit/metainterp/heapcache.py +++ b/rpython/jit/metainterp/heapcache.py @@ -190,17 +190,24 @@ if not self.is_unescaped(frombox): del cache[frombox] return + else: + # Only invalidate things that are either escaped or arguments + for descr, boxes in self.heap_cache.iteritems(): + for box in boxes.keys(): + if not self.is_unescaped(box) or box in argboxes: + del boxes[box] + for descr, indices in self.heap_array_cache.iteritems(): + for boxes in indices.itervalues(): + for box in boxes.keys(): + if not self.is_unescaped(box) or box in argboxes: + del boxes[box] + return - # Only invalidate things that are either escaped or arguments - for descr, boxes in self.heap_cache.iteritems(): - for box in boxes.keys(): - if not self.is_unescaped(box) or box in argboxes: - del boxes[box] - for descr, indices in self.heap_array_cache.iteritems(): - for boxes in indices.itervalues(): - for box in boxes.keys(): - if not self.is_unescaped(box) or box in argboxes: - del boxes[box] + # XXX not completely sure, but I *think* it is needed to reset() the + # state at least in the 'CALL_*' operations that release the GIL. We + # tried to do only the kind of resetting done by the two loops just + # above, but hit an assertion in "pypy test_multiprocessing.py". + self.reset(reset_virtuals=False) def is_class_known(self, box): return box in self.known_class_boxes From noreply at buildbot.pypy.org Mon Jun 16 16:43:39 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 16 Jun 2014 16:43:39 +0200 (CEST) Subject: [pypy-commit] pypy fix-bytearray-complexity: Document that slicing with negative indices isn't support when overriding __getslice__. Message-ID: <20140616144339.A42601C0DCA@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: fix-bytearray-complexity Changeset: r72074:16631b4ddbab Date: 2014-06-16 09:42 -0500 http://bitbucket.org/pypy/pypy/changeset/16631b4ddbab/ Log: Document that slicing with negative indices isn't support when overriding __getslice__. diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst --- a/pypy/doc/coding-guide.rst +++ b/pypy/doc/coding-guide.rst @@ -352,7 +352,8 @@ ``__init__``, ``__del__``, ``__len__``, ``__getitem__``, ``__setitem__``, ``__getslice__``, ``__setslice__``, and ``__iter__``. To handle slicing, ``__getslice__`` and ``__setslice__`` must be used; using ``__getitem__`` and - ``__setitem__`` for slicing isn't supported. + ``__setitem__`` for slicing isn't supported. Additionally, using negative + indices for slicing is still not support, even when using ``__getslice__``. This layout makes the number of types to take care about quite limited. From noreply at buildbot.pypy.org Mon Jun 16 16:56:36 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 16 Jun 2014 16:56:36 +0200 (CEST) Subject: [pypy-commit] pypy fix-bytearray-complexity: Close branch Message-ID: <20140616145636.EDA1F1C1068@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: fix-bytearray-complexity Changeset: r72075:102091cd4a70 Date: 2014-06-16 09:53 -0500 http://bitbucket.org/pypy/pypy/changeset/102091cd4a70/ Log: Close branch From noreply at buildbot.pypy.org Mon Jun 16 16:56:38 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 16 Jun 2014 16:56:38 +0200 (CEST) Subject: [pypy-commit] pypy default: Merge fix-bytearray-complexity Message-ID: <20140616145638.BAB9C1C1068@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: Changeset: r72076:81fa7d159b4d Date: 2014-06-16 09:55 -0500 http://bitbucket.org/pypy/pypy/changeset/81fa7d159b4d/ Log: Merge fix-bytearray-complexity diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst --- a/pypy/doc/coding-guide.rst +++ b/pypy/doc/coding-guide.rst @@ -348,8 +348,12 @@ **objects** - Normal rules apply. Special methods are not honoured, except ``__init__``, - ``__del__`` and ``__iter__``. + Normal rules apply. The only special methods that are honoured are + ``__init__``, ``__del__``, ``__len__``, ``__getitem__``, ``__setitem__``, + ``__getslice__``, ``__setslice__``, and ``__iter__``. To handle slicing, + ``__getslice__`` and ``__setslice__`` must be used; using ``__getitem__`` and + ``__setitem__`` for slicing isn't supported. Additionally, using negative + indices for slicing is still not support, even when using ``__getslice__``. This layout makes the number of types to take care about quite limited. diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -5,4 +5,8 @@ .. this is a revision shortly after release-2.3.x .. startrev: ca9b7cf02cf4 +.. branch: fix-bytearray-complexity +Bytearray operations no longer copy the bytearray unnecessarily +Added support for ``__getitem__``, ``__setitem__``, ``__getslice__``, +``__setslice__``, and ``__len__`` to RPython diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py --- a/pypy/objspace/std/bytearrayobject.py +++ b/pypy/objspace/std/bytearrayobject.py @@ -1,9 +1,9 @@ """The builtin bytearray implementation""" from rpython.rlib.objectmodel import ( - import_from_mixin, newlist_hint, resizelist_hint) + import_from_mixin, newlist_hint, resizelist_hint, specialize) from rpython.rlib.buffer import Buffer -from rpython.rlib.rstring import StringBuilder +from rpython.rlib.rstring import StringBuilder, ByteListBuilder from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.error import OperationError, oefmt @@ -11,7 +11,8 @@ from pypy.interpreter.signature import Signature from pypy.objspace.std.sliceobject import W_SliceObject from pypy.objspace.std.stdtypedef import StdTypeDef -from pypy.objspace.std.stringmethods import StringMethods +from pypy.objspace.std.stringmethods import StringMethods, _get_buffer +from pypy.objspace.std.bytesobject import W_BytesObject from pypy.objspace.std.util import get_positive_index NON_HEX_MSG = "non-hexadecimal number found in fromhex() arg at position %d" @@ -20,12 +21,12 @@ class W_BytearrayObject(W_Root): import_from_mixin(StringMethods) - def __init__(w_self, data): - w_self.data = data + def __init__(self, data): + self.data = data - def __repr__(w_self): + def __repr__(self): """representation for debugging purposes""" - return "%s(%s)" % (w_self.__class__.__name__, ''.join(w_self.data)) + return "%s(%s)" % (self.__class__.__name__, ''.join(self.data)) def buffer_w(self, space, flags): return BytearrayBuffer(self.data, False) @@ -40,7 +41,10 @@ return ''.join(self.data) def _new(self, value): - return W_BytearrayObject(_make_data(value)) + return W_BytearrayObject(value) + + def _new_from_buffer(self, buffer): + return W_BytearrayObject([buffer[i] for i in range(len(buffer))]) def _new_from_list(self, value): return W_BytearrayObject(value) @@ -58,7 +62,12 @@ raise oefmt(space.w_IndexError, "bytearray index out of range") return space.wrap(ord(character)) - _val = charbuf_w + def _val(self, space): + return self.data + + @staticmethod + def _use_rstr_ops(space, w_other): + return False @staticmethod def _op_val(space, w_other): @@ -68,10 +77,15 @@ assert len(char) == 1 return str(char)[0] - _builder = StringBuilder + def _multi_chr(self, char): + return [char] + + @staticmethod + def _builder(size=100): + return ByteListBuilder(size) def _newlist_unwrapped(self, space, res): - return space.newlist([W_BytearrayObject(_make_data(i)) for i in res]) + return space.newlist([W_BytearrayObject(i) for i in res]) def _isupper(self, ch): return ch.isupper() @@ -260,58 +274,91 @@ return space.wrap(''.join(self.data)) def descr_eq(self, space, w_other): + if isinstance(w_other, W_BytearrayObject): + return space.newbool(self.data == w_other.data) + try: - res = self._val(space) == self._op_val(space, w_other) + buffer = _get_buffer(space, w_other) except OperationError as e: if e.match(space, space.w_TypeError): return space.w_NotImplemented raise - return space.newbool(res) + + value = self._val(space) + buffer_len = buffer.getlength() + + if len(value) != buffer_len: + return space.newbool(False) + + min_length = min(len(value), buffer_len) + return space.newbool(_memcmp(value, buffer, min_length) == 0) def descr_ne(self, space, w_other): + if isinstance(w_other, W_BytearrayObject): + return space.newbool(self.data != w_other.data) + try: - res = self._val(space) != self._op_val(space, w_other) + buffer = _get_buffer(space, w_other) except OperationError as e: if e.match(space, space.w_TypeError): return space.w_NotImplemented raise - return space.newbool(res) + + value = self._val(space) + buffer_len = buffer.getlength() + + if len(value) != buffer_len: + return space.newbool(True) + + min_length = min(len(value), buffer_len) + return space.newbool(_memcmp(value, buffer, min_length) != 0) + + def _comparison_helper(self, space, w_other): + value = self._val(space) + + if isinstance(w_other, W_BytearrayObject): + other = w_other.data + other_len = len(other) + cmp = _memcmp(value, other, min(len(value), len(other))) + elif isinstance(w_other, W_BytesObject): + other = self._op_val(space, w_other) + other_len = len(other) + cmp = _memcmp(value, other, min(len(value), len(other))) + else: + try: + buffer = _get_buffer(space, w_other) + except OperationError as e: + if e.match(space, space.w_TypeError): + return False, 0, 0 + raise + other_len = len(buffer) + cmp = _memcmp(value, buffer, min(len(value), len(buffer))) + + return True, cmp, other_len def descr_lt(self, space, w_other): - try: - res = self._val(space) < self._op_val(space, w_other) - except OperationError as e: - if e.match(space, space.w_TypeError): - return space.w_NotImplemented - raise - return space.newbool(res) + success, cmp, other_len = self._comparison_helper(space, w_other) + if not success: + return space.w_NotImplemented + return space.newbool(cmp < 0 or (cmp == 0 and self._len() < other_len)) def descr_le(self, space, w_other): - try: - res = self._val(space) <= self._op_val(space, w_other) - except OperationError as e: - if e.match(space, space.w_TypeError): - return space.w_NotImplemented - raise - return space.newbool(res) + success, cmp, other_len = self._comparison_helper(space, w_other) + if not success: + return space.w_NotImplemented + return space.newbool(cmp < 0 or (cmp == 0 and self._len() <= other_len)) def descr_gt(self, space, w_other): - try: - res = self._val(space) > self._op_val(space, w_other) - except OperationError as e: - if e.match(space, space.w_TypeError): - return space.w_NotImplemented - raise - return space.newbool(res) + success, cmp, other_len = self._comparison_helper(space, w_other) + if not success: + return space.w_NotImplemented + return space.newbool(cmp > 0 or (cmp == 0 and self._len() > other_len)) def descr_ge(self, space, w_other): - try: - res = self._val(space) >= self._op_val(space, w_other) - except OperationError as e: - if e.match(space, space.w_TypeError): - return space.w_NotImplemented - raise - return space.newbool(res) + success, cmp, other_len = self._comparison_helper(space, w_other) + if not success: + return space.w_NotImplemented + return space.newbool(cmp > 0 or (cmp == 0 and self._len() >= other_len)) def descr_iter(self, space): return space.newseqiter(self) @@ -319,10 +366,19 @@ def descr_inplace_add(self, space, w_other): if isinstance(w_other, W_BytearrayObject): self.data += w_other.data + return self + + if isinstance(w_other, W_BytesObject): + self._inplace_add(self._op_val(space, w_other)) else: - self.data += self._op_val(space, w_other) + self._inplace_add(_get_buffer(space, w_other)) return self + @specialize.argtype(1) + def _inplace_add(self, other): + for i in range(len(other)): + self.data.append(other[i]) + def descr_inplace_mul(self, space, w_times): try: times = space.getindex_w(w_times, space.w_OverflowError) @@ -403,12 +459,33 @@ if space.isinstance_w(w_sub, space.w_int): char = space.int_w(w_sub) return _descr_contains_bytearray(self.data, space, char) + return self._StringMethods_descr_contains(space, w_sub) + def descr_add(self, space, w_other): + if isinstance(w_other, W_BytearrayObject): + return self._new(self.data + w_other.data) + + if isinstance(w_other, W_BytesObject): + return self._add(self._op_val(space, w_other)) + + try: + buffer = _get_buffer(space, w_other) + except OperationError as e: + if e.match(space, space.w_TypeError): + return space.w_NotImplemented + raise + return self._add(buffer) + + @specialize.argtype(1) + def _add(self, other): + return self._new(self.data + [other[i] for i in range(len(other))]) + def descr_reverse(self, space): self.data.reverse() + # ____________________________________________________________ # helpers for slow paths, moved out because they contain loops @@ -1152,3 +1229,13 @@ def setitem(self, index, char): self.data[index] = char + + + at specialize.argtype(1) +def _memcmp(selfvalue, buffer, length): + for i in range(length): + if selfvalue[i] < buffer[i]: + return -1 + if selfvalue[i] > buffer[i]: + return 1 + return 0 diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -430,6 +430,7 @@ _immutable_fields_ = ['_value'] def __init__(self, str): + assert str is not None self._value = str def __repr__(self): @@ -480,6 +481,12 @@ _val = str_w @staticmethod + def _use_rstr_ops(space, w_other): + from pypy.objspace.std.unicodeobject import W_UnicodeObject + return (isinstance(w_other, W_BytesObject) or + isinstance(w_other, W_UnicodeObject)) + + @staticmethod def _op_val(space, w_other): try: return space.str_w(w_other) diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -1,9 +1,11 @@ """Functionality shared between bytes/bytearray/unicode""" from rpython.rlib import jit -from rpython.rlib.objectmodel import specialize +from rpython.rlib.objectmodel import specialize, newlist_hint from rpython.rlib.rarithmetic import ovfcheck -from rpython.rlib.rstring import endswith, replace, rsplit, split, startswith +from rpython.rlib.rstring import ( + find, rfind, count, endswith, replace, rsplit, split, startswith) +from rpython.rlib.buffer import Buffer from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import WrappedDefault, unwrap_spec @@ -28,6 +30,9 @@ space, lenself, w_start, w_end, upper_bound=upper_bound) return (value, start, end) + def _multi_chr(self, c): + return c + def descr_len(self, space): return space.wrap(self._len()) @@ -36,17 +41,33 @@ def descr_contains(self, space, w_sub): value = self._val(space) - other = self._op_val(space, w_sub) - return space.newbool(value.find(other) >= 0) + if self._use_rstr_ops(space, w_sub): + other = self._op_val(space, w_sub) + return space.newbool(value.find(other) >= 0) + + from pypy.objspace.std.bytesobject import W_BytesObject + if isinstance(w_sub, W_BytesObject): + other = self._op_val(space, w_sub) + res = find(value, other, 0, len(value)) + else: + buffer = _get_buffer(space, w_sub) + res = find(value, buffer, 0, len(value)) + + return space.newbool(res >= 0) def descr_add(self, space, w_other): - try: - other = self._op_val(space, w_other) - except OperationError as e: - if e.match(space, space.w_TypeError): - return space.w_NotImplemented - raise - return self._new(self._val(space) + other) + if self._use_rstr_ops(space, w_other): + try: + other = self._op_val(space, w_other) + except OperationError as e: + if e.match(space, space.w_TypeError): + return space.w_NotImplemented + raise + return self._new(self._val(space) + other) + + # Bytearray overrides this method, CPython doesn't support contacting + # buffers and strs, and unicodes are always handled above + return space.w_NotImplemented def descr_mul(self, space, w_times): try: @@ -58,7 +79,7 @@ if times <= 0: return self._empty() if self._len() == 1: - return self._new(self._val(space)[0] * times) + return self._new(self._multi_chr(self._val(space)[0]) * times) return self._new(self._val(space) * times) descr_rmul = descr_mul @@ -119,7 +140,7 @@ d = width - len(value) if d > 0: offset = d//2 + (d & width & 1) - fillchar = fillchar[0] # annotator hint: it's a single character + fillchar = self._multi_chr(fillchar[0]) centered = offset * fillchar + value + (d - offset) * fillchar else: centered = value @@ -128,15 +149,32 @@ def descr_count(self, space, w_sub, w_start=None, w_end=None): value, start, end = self._convert_idx_params(space, w_start, w_end) - return space.newint(value.count(self._op_val(space, w_sub), start, - end)) + + if self._use_rstr_ops(space, w_sub): + return space.newint(value.count(self._op_val(space, w_sub), start, + end)) + + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + from pypy.objspace.std.bytesobject import W_BytesObject + if isinstance(w_sub, W_BytearrayObject): + res = count(value, w_sub.data, start, end) + elif isinstance(w_sub, W_BytesObject): + res = count(value, w_sub._value, start, end) + else: + buffer = _get_buffer(space, w_sub) + res = count(value, buffer, start, end) + + return space.wrap(max(res, 0)) def descr_decode(self, space, w_encoding=None, w_errors=None): from pypy.objspace.std.unicodeobject import ( _get_encoding_and_errors, decode_object, unicode_from_string) encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors) - if encoding is None and errors is None: + + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + if (encoding is None and errors is None and + not isinstance(self, W_BytearrayObject)): return unicode_from_string(space, self) return decode_object(space, self, encoding, errors) @@ -153,7 +191,11 @@ if not value: return self._empty() - splitted = value.split(self._chr('\t')) + if self._use_rstr_ops(space, self): + splitted = value.split(self._chr('\t')) + else: + splitted = split(value, self._chr('\t')) + try: ovfcheck(len(splitted) * tabsize) except OverflowError: @@ -161,7 +203,7 @@ expanded = oldtoken = splitted.pop(0) for token in splitted: - expanded += self._chr(' ') * self._tabindent(oldtoken, + expanded += self._multi_chr(self._chr(' ')) * self._tabindent(oldtoken, tabsize) + token oldtoken = token @@ -192,30 +234,80 @@ def descr_find(self, space, w_sub, w_start=None, w_end=None): (value, start, end) = self._convert_idx_params(space, w_start, w_end) - res = value.find(self._op_val(space, w_sub), start, end) + + if self._use_rstr_ops(space, w_sub): + res = value.find(self._op_val(space, w_sub), start, end) + return space.wrap(res) + + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + from pypy.objspace.std.bytesobject import W_BytesObject + if isinstance(w_sub, W_BytearrayObject): + res = find(value, w_sub.data, start, end) + elif isinstance(w_sub, W_BytesObject): + res = find(value, w_sub._value, start, end) + else: + buffer = _get_buffer(space, w_sub) + res = find(value, buffer, start, end) + return space.wrap(res) def descr_rfind(self, space, w_sub, w_start=None, w_end=None): (value, start, end) = self._convert_idx_params(space, w_start, w_end) - res = value.rfind(self._op_val(space, w_sub), start, end) + + if self._use_rstr_ops(space, w_sub): + res = value.rfind(self._op_val(space, w_sub), start, end) + return space.wrap(res) + + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + from pypy.objspace.std.bytesobject import W_BytesObject + if isinstance(w_sub, W_BytearrayObject): + res = rfind(value, w_sub.data, start, end) + elif isinstance(w_sub, W_BytesObject): + res = rfind(value, w_sub._value, start, end) + else: + buffer = _get_buffer(space, w_sub) + res = rfind(value, buffer, start, end) + return space.wrap(res) def descr_index(self, space, w_sub, w_start=None, w_end=None): (value, start, end) = self._convert_idx_params(space, w_start, w_end) - res = value.find(self._op_val(space, w_sub), start, end) + + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + from pypy.objspace.std.bytesobject import W_BytesObject + if self._use_rstr_ops(space, w_sub): + res = value.find(self._op_val(space, w_sub), start, end) + elif isinstance(w_sub, W_BytearrayObject): + res = find(value, w_sub.data, start, end) + elif isinstance(w_sub, W_BytesObject): + res = find(value, w_sub._value, start, end) + else: + buffer = _get_buffer(space, w_sub) + res = find(value, buffer, start, end) + if res < 0: raise oefmt(space.w_ValueError, "substring not found in string.index") - return space.wrap(res) def descr_rindex(self, space, w_sub, w_start=None, w_end=None): (value, start, end) = self._convert_idx_params(space, w_start, w_end) - res = value.rfind(self._op_val(space, w_sub), start, end) + + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + from pypy.objspace.std.bytesobject import W_BytesObject + if self._use_rstr_ops(space, w_sub): + res = value.rfind(self._op_val(space, w_sub), start, end) + elif isinstance(w_sub, W_BytearrayObject): + res = rfind(value, w_sub.data, start, end) + elif isinstance(w_sub, W_BytesObject): + res = rfind(value, w_sub._value, start, end) + else: + buffer = _get_buffer(space, w_sub) + res = rfind(value, buffer, start, end) + if res < 0: raise oefmt(space.w_ValueError, "substring not found in string.rindex") - return space.wrap(res) @specialize.arg(2) @@ -328,6 +420,7 @@ value = self._val(space) prealloc_size = len(value) * (size - 1) + unwrapped = newlist_hint(size) for i in range(size): w_s = list_w[i] check_item = self._join_check_item(space, w_s) @@ -337,13 +430,16 @@ i, w_s) elif check_item == 2: return self._join_autoconvert(space, list_w) - prealloc_size += len(self._op_val(space, w_s)) + # XXX Maybe the extra copy here is okay? It was basically going to + # happen anyway, what with being placed into the builder + unwrapped.append(self._op_val(space, w_s)) + prealloc_size += len(unwrapped[i]) sb = self._builder(prealloc_size) for i in range(size): if value and i != 0: sb.append(value) - sb.append(self._op_val(space, list_w[i])) + sb.append(unwrapped[i]) return self._new(sb.build()) def _join_autoconvert(self, space, list_w): @@ -358,7 +454,7 @@ "ljust() argument 2 must be a single character") d = width - len(value) if d > 0: - fillchar = fillchar[0] # annotator hint: it's a single character + fillchar = self._multi_chr(fillchar[0]) value += d * fillchar return self._new(value) @@ -372,7 +468,7 @@ "rjust() argument 2 must be a single character") d = width - len(value) if d > 0: - fillchar = fillchar[0] # annotator hint: it's a single character + fillchar = self._multi_chr(fillchar[0]) value = d * fillchar + value return self._new(value) @@ -385,52 +481,76 @@ return self._new(builder.build()) def descr_partition(self, space, w_sub): + from pypy.objspace.std.bytearrayobject import W_BytearrayObject value = self._val(space) - sub = self._op_val(space, w_sub) - if not sub: - raise oefmt(space.w_ValueError, "empty separator") - pos = value.find(sub) + + if self._use_rstr_ops(space, w_sub): + sub = self._op_val(space, w_sub) + sublen = len(sub) + if sublen == 0: + raise oefmt(space.w_ValueError, "empty separator") + + pos = value.find(sub) + else: + sub = _get_buffer(space, w_sub) + sublen = sub.getlength() + if sublen == 0: + raise oefmt(space.w_ValueError, "empty separator") + + pos = find(value, sub, 0, len(value)) + if pos != -1 and isinstance(self, W_BytearrayObject): + w_sub = self._new_from_buffer(sub) + if pos == -1: - from pypy.objspace.std.bytearrayobject import W_BytearrayObject if isinstance(self, W_BytearrayObject): self = self._new(value) return space.newtuple([self, self._empty(), self._empty()]) else: - from pypy.objspace.std.bytearrayobject import W_BytearrayObject - if isinstance(self, W_BytearrayObject): - w_sub = self._new(sub) return space.newtuple( [self._sliced(space, value, 0, pos, self), w_sub, - self._sliced(space, value, pos+len(sub), len(value), self)]) + self._sliced(space, value, pos + sublen, len(value), self)]) def descr_rpartition(self, space, w_sub): + from pypy.objspace.std.bytearrayobject import W_BytearrayObject value = self._val(space) - sub = self._op_val(space, w_sub) - if not sub: - raise oefmt(space.w_ValueError, "empty separator") - pos = value.rfind(sub) + + if self._use_rstr_ops(space, w_sub): + sub = self._op_val(space, w_sub) + sublen = len(sub) + if sublen == 0: + raise oefmt(space.w_ValueError, "empty separator") + + pos = value.rfind(sub) + else: + sub = _get_buffer(space, w_sub) + sublen = sub.getlength() + if sublen == 0: + raise oefmt(space.w_ValueError, "empty separator") + + pos = rfind(value, sub, 0, len(value)) + if pos != -1 and isinstance(self, W_BytearrayObject): + w_sub = self._new_from_buffer(sub) + if pos == -1: - from pypy.objspace.std.bytearrayobject import W_BytearrayObject if isinstance(self, W_BytearrayObject): self = self._new(value) return space.newtuple([self._empty(), self._empty(), self]) else: - from pypy.objspace.std.bytearrayobject import W_BytearrayObject - if isinstance(self, W_BytearrayObject): - w_sub = self._new(sub) return space.newtuple( [self._sliced(space, value, 0, pos, self), w_sub, - self._sliced(space, value, pos+len(sub), len(value), self)]) + self._sliced(space, value, pos + sublen, len(value), self)]) @unwrap_spec(count=int) def descr_replace(self, space, w_old, w_new, count=-1): input = self._val(space) + sub = self._op_val(space, w_old) by = self._op_val(space, w_new) try: res = replace(input, sub, by, count) except OverflowError: raise oefmt(space.w_OverflowError, "replace string is too long") + return self._new(res) @unwrap_spec(maxsplit=int) @@ -442,10 +562,10 @@ return self._newlist_unwrapped(space, res) by = self._op_val(space, w_sep) - bylen = len(by) - if bylen == 0: + if len(by) == 0: raise oefmt(space.w_ValueError, "empty separator") res = split(value, by, maxsplit) + return self._newlist_unwrapped(space, res) @unwrap_spec(maxsplit=int) @@ -457,10 +577,10 @@ return self._newlist_unwrapped(space, res) by = self._op_val(space, w_sep) - bylen = len(by) - if bylen == 0: + if len(by) == 0: raise oefmt(space.w_ValueError, "empty separator") res = rsplit(value, by, maxsplit) + return self._newlist_unwrapped(space, res) @unwrap_spec(keepends=bool) @@ -616,10 +736,11 @@ for char in string: buf.append(table[ord(char)]) else: + # XXX Why not preallocate here too? buf = self._builder() deletion_table = [False] * 256 - for c in deletechars: - deletion_table[ord(c)] = True + for i in range(len(deletechars)): + deletion_table[ord(deletechars[i])] = True for char in string: if not deletion_table[ord(char)]: buf.append(table[ord(char)]) @@ -636,7 +757,7 @@ def descr_zfill(self, space, width): selfval = self._val(space) if len(selfval) == 0: - return self._new(self._chr('0') * width) + return self._new(self._multi_chr(self._chr('0')) * width) num_zeros = width - len(selfval) if num_zeros <= 0: # cannot return self, in case it is a subclass of str @@ -662,3 +783,8 @@ @specialize.argtype(0) def _descr_getslice_slowpath(selfvalue, start, step, sl): return [selfvalue[start + i*step] for i in range(sl)] + +def _get_buffer(space, w_obj): + return space.buffer_w(w_obj, space.BUF_SIMPLE) + + diff --git a/pypy/objspace/std/test/test_bytearrayobject.py b/pypy/objspace/std/test/test_bytearrayobject.py --- a/pypy/objspace/std/test/test_bytearrayobject.py +++ b/pypy/objspace/std/test/test_bytearrayobject.py @@ -178,8 +178,10 @@ assert bytearray('hello').rindex('l') == 3 assert bytearray('hello').index(bytearray('e')) == 1 assert bytearray('hello').find('l') == 2 + assert bytearray('hello').find('l', -2) == 3 assert bytearray('hello').rfind('l') == 3 + # these checks used to not raise in pypy but they should raises(TypeError, bytearray('hello').index, ord('e')) raises(TypeError, bytearray('hello').rindex, ord('e')) @@ -440,6 +442,7 @@ u = b.decode('utf-8') assert isinstance(u, unicode) assert u == u'abcdefghi' + assert b.decode().encode() == b def test_int(self): assert int(bytearray('-1234')) == -1234 diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -103,6 +103,12 @@ _val = unicode_w @staticmethod + def _use_rstr_ops(space, w_other): + # Always return true because we always need to copy the other + # operand(s) before we can do comparisons + return True + + @staticmethod def _op_val(space, w_other): if isinstance(w_other, W_UnicodeObject): return w_other._value diff --git a/rpython/annotator/binaryop.py b/rpython/annotator/binaryop.py --- a/rpython/annotator/binaryop.py +++ b/rpython/annotator/binaryop.py @@ -719,6 +719,14 @@ return super(thistype, pair(ins1, ins2)).improve() +class __extend__(pairtype(SomeInstance, SomeObject)): + def getitem((s_ins, s_idx)): + return s_ins._emulate_call("__getitem__", s_idx) + + def setitem((s_ins, s_idx), s_value): + return s_ins._emulate_call("__setitem__", s_idx, s_value) + + class __extend__(pairtype(SomeIterator, SomeIterator)): def union((iter1, iter2)): diff --git a/rpython/annotator/test/test_annrpython.py b/rpython/annotator/test/test_annrpython.py --- a/rpython/annotator/test/test_annrpython.py +++ b/rpython/annotator/test/test_annrpython.py @@ -3937,6 +3937,78 @@ s = a.build_types(fn, [int]) assert isinstance(s, annmodel.SomeInteger) + def test_instance_getitem(self): + class A(object): + def __getitem__(self, i): + return i * i + + def fn(i): + a = A() + return a[i] + + a = self.RPythonAnnotator() + s = a.build_types(fn, [int]) + assert len(a.translator.graphs) == 2 # fn, __getitem__ + assert isinstance(s, annmodel.SomeInteger) + + def test_instance_setitem(self): + class A(object): + def __setitem__(self, i, v): + self.value = i * v + + def fn(i, v): + a = A() + a[i] = v + return a.value + + a = self.RPythonAnnotator() + s = a.build_types(fn, [int, int]) + assert len(a.translator.graphs) == 2 # fn, __setitem__ + assert isinstance(s, annmodel.SomeInteger) + + def test_instance_getslice(self): + class A(object): + def __getslice__(self, stop, start): + return "Test"[stop:start] + + def fn(): + a = A() + return a[0:2] + + a = self.RPythonAnnotator() + s = a.build_types(fn, []) + assert len(a.translator.graphs) == 2 # fn, __getslice__ + assert isinstance(s, annmodel.SomeString) + + def test_instance_setslice(self): + class A(object): + def __setslice__(self, stop, start, value): + self.value = value + + def fn(): + a = A() + a[0:2] = '00' + return a.value + + a = self.RPythonAnnotator() + s = a.build_types(fn, []) + assert len(a.translator.graphs) == 2 # fn, __setslice__ + assert isinstance(s, annmodel.SomeString) + + def test_instance_len(self): + class A(object): + def __len__(self): + return 0 + + def fn(): + a = A() + return len(a) + + a = self.RPythonAnnotator() + s = a.build_types(fn, []) + assert len(a.translator.graphs) == 2 # fn, __len__ + assert isinstance(s, annmodel.SomeInteger) + def test_reversed(self): def fn(n): for elem in reversed([1, 2, 3, 4, 5]): diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py --- a/rpython/annotator/unaryop.py +++ b/rpython/annotator/unaryop.py @@ -683,19 +683,27 @@ if not self.can_be_None: s.const = True + def _emulate_call(self, meth_name, *args_s): + bk = getbookkeeper() + s_attr = self._true_getattr(meth_name) + # record for calltables + bk.emulate_pbc_call(bk.position_key, s_attr, args_s) + return s_attr.call(simple_args(args_s)) + def iter(self): - s_iterable = self._true_getattr('__iter__') - bk = getbookkeeper() - # record for calltables - bk.emulate_pbc_call(bk.position_key, s_iterable, []) - return s_iterable.call(simple_args([])) + return self._emulate_call('__iter__') def next(self): - s_next = self._true_getattr('next') - bk = getbookkeeper() - # record for calltables - bk.emulate_pbc_call(bk.position_key, s_next, []) - return s_next.call(simple_args([])) + return self._emulate_call('next') + + def len(self): + return self._emulate_call('__len__') + + def getslice(self, s_start, s_stop): + return self._emulate_call('__getslice__', s_start, s_stop) + + def setslice(self, s_start, s_stop, s_iterable): + return self._emulate_call('__setslice__', s_start, s_stop, s_iterable) class __extend__(SomeBuiltin): def simple_call(self, *args): diff --git a/rpython/rlib/buffer.py b/rpython/rlib/buffer.py --- a/rpython/rlib/buffer.py +++ b/rpython/rlib/buffer.py @@ -12,6 +12,11 @@ def getlength(self): raise NotImplementedError + def __len__(self): + res = self.getlength() + assert res >= 0 + return res + def as_str(self): "Returns an interp-level string with the whole content of the buffer." # May be overridden. @@ -21,14 +26,23 @@ "Returns the index'th character in the buffer." raise NotImplementedError # Must be overriden. No bounds checks. + def __getitem__(self, i): + return self.getitem(i) + def getslice(self, start, stop, step, size): # May be overridden. No bounds checks. return ''.join([self.getitem(i) for i in range(start, stop, step)]) + def __getslice__(self, start, stop): + return self.getslice(start, stop, 1, stop - start) + def setitem(self, index, char): "Write a character into the buffer." raise NotImplementedError # Must be overriden. No bounds checks. + def __setitem__(self, i, char): + return self.setitem(i, char) + def setslice(self, start, string): # May be overridden. No bounds checks. for i in range(len(string)): diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -6,8 +6,9 @@ SomeInteger, SomeUnicodeCodePoint, SomeUnicodeString, SomePBC) from rpython.rtyper.llannotation import SomePtr from rpython.rlib import jit -from rpython.rlib.objectmodel import newlist_hint, specialize -from rpython.rlib.rarithmetic import ovfcheck +from rpython.rlib.objectmodel import newlist_hint, resizelist_hint, specialize +from rpython.rlib.rarithmetic import ovfcheck, LONG_BIT as BLOOM_WIDTH +from rpython.rlib.buffer import Buffer from rpython.rlib.unicodedata import unicodedb_5_2_0 as unicodedb from rpython.rtyper.extregistry import ExtRegistryEntry from rpython.tool.pairtype import pairtype @@ -24,7 +25,7 @@ return unicodedb.isspace(ord(char)) - at specialize.argtype(0) + at specialize.argtype(0, 1) def split(value, by=None, maxsplit=-1): if by is None: length = len(value) @@ -55,10 +56,13 @@ i = j + 1 return res + if isinstance(value, unicode): + assert isinstance(by, unicode) if isinstance(value, str): assert isinstance(by, str) - else: - assert isinstance(by, unicode) + if isinstance(value, list): + assert isinstance(by, str) + bylen = len(by) if bylen == 0: raise ValueError("empty separator") @@ -67,16 +71,16 @@ if bylen == 1: # fast path: uses str.rfind(character) and str.count(character) by = by[0] # annotator hack: string -> char - count = value.count(by) - if 0 <= maxsplit < count: - count = maxsplit - res = newlist_hint(count + 1) - while count > 0: - next = value.find(by, start) + cnt = count(value, by, 0, len(value)) + if 0 <= maxsplit < cnt: + cnt = maxsplit + res = newlist_hint(cnt + 1) + while cnt > 0: + next = find(value, by, start, len(value)) assert next >= 0 # cannot fail due to the value.count above res.append(value[start:next]) start = next + bylen - count -= 1 + cnt -= 1 res.append(value[start:len(value)]) return res @@ -86,9 +90,10 @@ res = [] while maxsplit != 0: - next = value.find(by, start) + next = find(value, by, start, len(value)) if next < 0: break + assert start >= 0 res.append(value[start:next]) start = next + bylen maxsplit -= 1 # NB. if it's already < 0, it stays < 0 @@ -97,7 +102,7 @@ return res - at specialize.argtype(0) + at specialize.argtype(0, 1) def rsplit(value, by=None, maxsplit=-1): if by is None: res = [] @@ -133,10 +138,13 @@ res.reverse() return res + if isinstance(value, unicode): + assert isinstance(by, unicode) if isinstance(value, str): assert isinstance(by, str) - else: - assert isinstance(by, unicode) + if isinstance(value, list): + assert isinstance(by, str) + if maxsplit > 0: res = newlist_hint(min(maxsplit + 1, len(value))) else: @@ -147,7 +155,7 @@ raise ValueError("empty separator") while maxsplit != 0: - next = value.rfind(by, 0, end) + next = rfind(value, by, 0, end) if next < 0: break res.append(value[next + bylen:end]) @@ -159,20 +167,20 @@ return res - at specialize.argtype(0) + at specialize.argtype(0, 1) @jit.elidable def replace(input, sub, by, maxsplit=-1): if isinstance(input, str): - assert isinstance(sub, str) - assert isinstance(by, str) Builder = StringBuilder + elif isinstance(input, unicode): + Builder = UnicodeBuilder else: - assert isinstance(sub, unicode) - assert isinstance(by, unicode) - Builder = UnicodeBuilder + assert isinstance(input, list) + Builder = ByteListBuilder if maxsplit == 0: return input + if not sub: upper = len(input) if maxsplit > 0 and maxsplit < upper + 2: @@ -195,12 +203,12 @@ builder.append_slice(input, upper, len(input)) else: # First compute the exact result size - count = input.count(sub) - if count > maxsplit and maxsplit > 0: - count = maxsplit + cnt = count(input, sub, 0, len(input)) + if cnt > maxsplit and maxsplit > 0: + cnt = maxsplit diff_len = len(by) - len(sub) try: - result_size = ovfcheck(diff_len * count) + result_size = ovfcheck(diff_len * cnt) result_size = ovfcheck(result_size + len(input)) except OverflowError: raise @@ -210,7 +218,7 @@ sublen = len(sub) while maxsplit != 0: - next = input.find(sub, start) + next = find(input, sub, start, len(input)) if next < 0: break builder.append_slice(input, start, next) @@ -235,7 +243,7 @@ end = length return start, end - at specialize.argtype(0) + at specialize.argtype(0, 1) @jit.elidable def startswith(u_self, prefix, start=0, end=sys.maxint): length = len(u_self) @@ -248,7 +256,7 @@ return False return True - at specialize.argtype(0) + at specialize.argtype(0, 1) @jit.elidable def endswith(u_self, suffix, start=0, end=sys.maxint): length = len(u_self) @@ -261,6 +269,133 @@ return False return True + at specialize.argtype(0, 1) +def find(value, other, start, end): + if ((isinstance(value, str) and isinstance(other, str)) or + (isinstance(value, unicode) and isinstance(other, unicode))): + return value.find(other, start, end) + return _search(value, other, start, end, SEARCH_FIND) + + at specialize.argtype(0, 1) +def rfind(value, other, start, end): + if ((isinstance(value, str) and isinstance(other, str)) or + (isinstance(value, unicode) and isinstance(other, unicode))): + return value.rfind(other, start, end) + return _search(value, other, start, end, SEARCH_RFIND) + + at specialize.argtype(0, 1) +def count(value, other, start, end): + if ((isinstance(value, str) and isinstance(other, str)) or + (isinstance(value, unicode) and isinstance(other, unicode))): + return value.count(other, start, end) + return _search(value, other, start, end, SEARCH_COUNT) + +# -------------- substring searching helper ---------------- + +SEARCH_COUNT = 0 +SEARCH_FIND = 1 +SEARCH_RFIND = 2 + +def bloom_add(mask, c): + return mask | (1 << (ord(c) & (BLOOM_WIDTH - 1))) + +def bloom(mask, c): + return mask & (1 << (ord(c) & (BLOOM_WIDTH - 1))) + + at specialize.argtype(0, 1) +def _search(value, other, start, end, mode): + if start < 0: + start = 0 + if end > len(value): + end = len(value) + if start > end: + return -1 + + count = 0 + n = end - start + m = len(other) + + if m == 0: + if mode == SEARCH_COUNT: + return end - start + 1 + elif mode == SEARCH_RFIND: + return end + else: + return start + + w = n - m + + if w < 0: + return -1 + + mlast = m - 1 + skip = mlast - 1 + mask = 0 + + if mode != SEARCH_RFIND: + for i in range(mlast): + mask = bloom_add(mask, other[i]) + if other[i] == other[mlast]: + skip = mlast - i - 1 + mask = bloom_add(mask, other[mlast]) + + i = start - 1 + while i + 1 <= start + w: + i += 1 + if value[i + m - 1] == other[m - 1]: + for j in range(mlast): + if value[i + j] != other[j]: + break + else: + if mode != SEARCH_COUNT: + return i + count += 1 + i += mlast + continue + + if i + m < len(value): + c = value[i + m] + else: + c = '\0' + if not bloom(mask, c): + i += m + else: + i += skip + else: + if i + m < len(value): + c = value[i + m] + else: + c = '\0' + if not bloom(mask, c): + i += m + else: + mask = bloom_add(mask, other[0]) + for i in range(mlast, 0, -1): + mask = bloom_add(mask, other[i]) + if other[i] == other[0]: + skip = i - 1 + + i = start + w + 1 + while i - 1 >= start: + i -= 1 + if value[i] == other[0]: + for j in xrange(mlast, 0, -1): + if value[i + j] != other[j]: + break + else: + return i + if i - 1 >= 0 and not bloom(mask, value[i - 1]): + i -= m + else: + i -= skip + else: + if i - 1 >= 0 and not bloom(mask, value[i - 1]): + i -= m + + if mode != SEARCH_COUNT: + return -1 + return count + # -------------- numeric parsing support -------------------- def strip_spaces(s): @@ -407,6 +542,35 @@ class UnicodeBuilder(AbstractStringBuilder): _tp = unicode +class ByteListBuilder(object): + def __init__(self, init_size=INIT_SIZE): + self.l = newlist_hint(init_size) + + @specialize.argtype(1) + def append(self, s): + for c in s: + self.l.append(c) + + @specialize.argtype(1) + def append_slice(self, s, start, end): + assert 0 <= start <= end <= len(s) + for c in s[start:end]: + self.l.append(c) + + def append_multiple_char(self, c, times): + assert isinstance(c, str) + self.l.extend([c[0]] * times) + + def append_charpsize(self, s, size): + assert size >= 0 + for i in xrange(size): + self.l.append(s[i]) + + def build(self): + return self.l + + def getlength(self): + return len(self.l) # ------------------------------------------------------------ # ----------------- implementation details ------------------- diff --git a/rpython/rlib/test/test_buffer.py b/rpython/rlib/test/test_buffer.py --- a/rpython/rlib/test/test_buffer.py +++ b/rpython/rlib/test/test_buffer.py @@ -1,10 +1,34 @@ from rpython.rlib.buffer import * +from rpython.annotator.annrpython import RPythonAnnotator +from rpython.annotator.model import SomeInteger def test_string_buffer(): buf = StringBuffer('hello world') assert buf.getitem(4) == 'o' + assert buf.getitem(4) == buf[4] assert buf.getlength() == 11 + assert buf.getlength() == len(buf) assert buf.getslice(1, 6, 1, 5) == 'ello ' + assert buf.getslice(1, 6, 1, 5) == buf[1:6] assert buf.getslice(1, 6, 2, 3) == 'el ' assert buf.as_str() == 'hello world' + + + +def test_len_nonneg(): + # This test needs a buffer subclass whose getlength() isn't guaranteed to + # return a non-neg integer. + class DummyBuffer(Buffer): + def __init__(self, s): + self.size = s + + def getlength(self): + return self.size + def func(n): + buf = DummyBuffer(n) + return len(buf) + + a = RPythonAnnotator() + s = a.build_types(func, [int]) + assert s == SomeInteger(nonneg=True) diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py --- a/rpython/rlib/test/test_rstring.py +++ b/rpython/rlib/test/test_rstring.py @@ -2,18 +2,29 @@ from rpython.rlib.rstring import StringBuilder, UnicodeBuilder, split, rsplit from rpython.rlib.rstring import replace, startswith, endswith +from rpython.rlib.rstring import find, rfind, count +from rpython.rlib.buffer import StringBuffer from rpython.rtyper.test.tool import BaseRtypingTest def test_split(): - assert split("", 'x') == [''] - assert split("a", "a", 1) == ['', ''] - assert split(" ", " ", 1) == ['', ''] - assert split("aa", "a", 2) == ['', '', ''] - assert split('a|b|c|d', '|') == ['a', 'b', 'c', 'd'] - assert split('a|b|c|d', '|', 2) == ['a', 'b', 'c|d'] - assert split('a//b//c//d', '//') == ['a', 'b', 'c', 'd'] - assert split('a//b//c//d', '//', 2) == ['a', 'b', 'c//d'] - assert split('endcase test', 'test') == ['endcase ', ''] + def check_split(value, sub, *args, **kwargs): + result = kwargs['res'] + assert split(value, sub, *args) == result + + list_result = [list(i) for i in result] + assert split(list(value), sub, *args) == list_result + + assert split(buffer(value), sub, *args) == result + + check_split("", 'x', res=['']) + check_split("a", "a", 1, res=['', '']) + check_split(" ", " ", 1, res=['', '']) + check_split("aa", "a", 2, res=['', '', '']) + check_split('a|b|c|d', '|', res=['a', 'b', 'c', 'd']) + check_split('a|b|c|d', '|', 2, res=['a', 'b', 'c|d']) + check_split('a//b//c//d', '//', res=['a', 'b', 'c', 'd']) + check_split('a//b//c//d', '//', 2, res=['a', 'b', 'c//d']) + check_split('endcase test', 'test', res=['endcase ', '']) py.test.raises(ValueError, split, 'abc', '') def test_split_None(): @@ -33,13 +44,22 @@ py.test.raises(ValueError, split, u'abc', u'') def test_rsplit(): - assert rsplit("a", "a", 1) == ['', ''] - assert rsplit(" ", " ", 1) == ['', ''] - assert rsplit("aa", "a", 2) == ['', '', ''] - assert rsplit('a|b|c|d', '|') == ['a', 'b', 'c', 'd'] - assert rsplit('a|b|c|d', '|', 2) == ['a|b', 'c', 'd'] - assert rsplit('a//b//c//d', '//') == ['a', 'b', 'c', 'd'] - assert rsplit('endcase test', 'test') == ['endcase ', ''] + def check_rsplit(value, sub, *args, **kwargs): + result = kwargs['res'] + assert rsplit(value, sub, *args) == result + + list_result = [list(i) for i in result] + assert rsplit(list(value), sub, *args) == list_result + + assert rsplit(buffer(value), sub, *args) == result + + check_rsplit("a", "a", 1, res=['', '']) + check_rsplit(" ", " ", 1, res=['', '']) + check_rsplit("aa", "a", 2, res=['', '', '']) + check_rsplit('a|b|c|d', '|', res=['a', 'b', 'c', 'd']) + check_rsplit('a|b|c|d', '|', 2, res=['a|b', 'c', 'd']) + check_rsplit('a//b//c//d', '//', res=['a', 'b', 'c', 'd']) + check_rsplit('endcase test', 'test', res=['endcase ', '']) py.test.raises(ValueError, rsplit, "abc", '') def test_rsplit_None(): @@ -58,25 +78,31 @@ py.test.raises(ValueError, rsplit, u"abc", u'') def test_string_replace(): - assert replace('one!two!three!', '!', '@', 1) == 'one at two!three!' - assert replace('one!two!three!', '!', '') == 'onetwothree' - assert replace('one!two!three!', '!', '@', 2) == 'one at two@three!' - assert replace('one!two!three!', '!', '@', 3) == 'one at two@three@' - assert replace('one!two!three!', '!', '@', 4) == 'one at two@three@' - assert replace('one!two!three!', '!', '@', 0) == 'one!two!three!' - assert replace('one!two!three!', '!', '@') == 'one at two@three@' - assert replace('one!two!three!', 'x', '@') == 'one!two!three!' - assert replace('one!two!three!', 'x', '@', 2) == 'one!two!three!' - assert replace('abc', '', '-') == '-a-b-c-' - assert replace('abc', '', '-', 3) == '-a-b-c' - assert replace('abc', '', '-', 0) == 'abc' - assert replace('', '', '') == '' - assert replace('', '', 'a') == 'a' - assert replace('abc', 'ab', '--', 0) == 'abc' - assert replace('abc', 'xy', '--') == 'abc' - assert replace('123', '123', '') == '' - assert replace('123123', '123', '') == '' - assert replace('123x123', '123', '') == 'x' + def check_replace(value, sub, *args, **kwargs): + result = kwargs['res'] + assert replace(value, sub, *args) == result + + assert replace(list(value), sub, *args) == list(result) + + check_replace('one!two!three!', '!', '@', 1, res='one at two!three!') + check_replace('one!two!three!', '!', '', res='onetwothree') + check_replace('one!two!three!', '!', '@', 2, res='one at two@three!') + check_replace('one!two!three!', '!', '@', 3, res='one at two@three@') + check_replace('one!two!three!', '!', '@', 4, res='one at two@three@') + check_replace('one!two!three!', '!', '@', 0, res='one!two!three!') + check_replace('one!two!three!', '!', '@', res='one at two@three@') + check_replace('one!two!three!', 'x', '@', res='one!two!three!') + check_replace('one!two!three!', 'x', '@', 2, res='one!two!three!') + check_replace('abc', '', '-', res='-a-b-c-') + check_replace('abc', '', '-', 3, res='-a-b-c') + check_replace('abc', '', '-', 0, res='abc') + check_replace('', '', '', res='') + check_replace('', '', 'a', res='a') + check_replace('abc', 'ab', '--', 0, res='abc') + check_replace('abc', 'xy', '--', res='abc') + check_replace('123', '123', '', res='') + check_replace('123123', '123', '', res='') + check_replace('123x123', '123', '', res='x') def test_string_replace_overflow(): if sys.maxint > 2**31-1: @@ -122,35 +148,45 @@ replace(s, u"a", s, len(s) - 10) def test_startswith(): - assert startswith('ab', 'ab') is True - assert startswith('ab', 'a') is True - assert startswith('ab', '') is True - assert startswith('x', 'a') is False - assert startswith('x', 'x') is True - assert startswith('', '') is True - assert startswith('', 'a') is False - assert startswith('x', 'xx') is False - assert startswith('y', 'xx') is False - assert startswith('ab', 'a', 0) is True - assert startswith('ab', 'a', 1) is False - assert startswith('ab', 'b', 1) is True - assert startswith('abc', 'bc', 1, 2) is False - assert startswith('abc', 'c', -1, 4) is True + def check_startswith(value, sub, *args, **kwargs): + result = kwargs['res'] + assert startswith(value, sub, *args) is result + assert startswith(list(value), sub, *args) is result + + check_startswith('ab', 'ab', res=True) + check_startswith('ab', 'a', res=True) + check_startswith('ab', '', res=True) + check_startswith('x', 'a', res=False) + check_startswith('x', 'x', res=True) + check_startswith('', '', res=True) + check_startswith('', 'a', res=False) + check_startswith('x', 'xx', res=False) + check_startswith('y', 'xx', res=False) + check_startswith('ab', 'a', 0, res=True) + check_startswith('ab', 'a', 1, res=False) + check_startswith('ab', 'b', 1, res=True) + check_startswith('abc', 'bc', 1, 2, res=False) + check_startswith('abc', 'c', -1, 4, res=True) def test_endswith(): - assert endswith('ab', 'ab') is True - assert endswith('ab', 'b') is True - assert endswith('ab', '') is True - assert endswith('x', 'a') is False - assert endswith('x', 'x') is True - assert endswith('', '') is True - assert endswith('', 'a') is False - assert endswith('x', 'xx') is False - assert endswith('y', 'xx') is False - assert endswith('abc', 'ab', 0, 2) is True - assert endswith('abc', 'bc', 1) is True - assert endswith('abc', 'bc', 2) is False - assert endswith('abc', 'b', -3, -1) is True + def check_endswith(value, sub, *args, **kwargs): + result = kwargs['res'] + assert endswith(value, sub, *args) is result + assert endswith(list(value), sub, *args) is result + + check_endswith('ab', 'ab', res=True) + check_endswith('ab', 'b', res=True) + check_endswith('ab', '', res=True) + check_endswith('x', 'a', res=False) + check_endswith('x', 'x', res=True) + check_endswith('', '', res=True) + check_endswith('', 'a', res=False) + check_endswith('x', 'xx', res=False) + check_endswith('y', 'xx', res=False) + check_endswith('abc', 'ab', 0, 2, res=True) + check_endswith('abc', 'bc', 1, res=True) + check_endswith('abc', 'bc', 2, res=False) + check_endswith('abc', 'b', -3, -1, res=True) def test_string_builder(): s = StringBuilder() @@ -177,6 +213,24 @@ assert result == 'aabcbdddd' assert isinstance(result, unicode) +def test_search(): + def check_search(func, value, sub, *args, **kwargs): + result = kwargs['res'] + assert func(value, sub, *args) == result + assert func(list(value), sub, *args) == result + + check_search(find, 'one two three', 'ne', 0, 13, res=1) + check_search(find, 'one two three', 'ne', 5, 13, res=-1) + check_search(find, 'one two three', '', 0, 13, res=0) + + check_search(rfind, 'one two three', 'e', 0, 13, res=12) + check_search(rfind, 'one two three', 'e', 0, 1, res=-1) + check_search(rfind, 'one two three', '', 0, 13, res=13) + + check_search(count, 'one two three', 'e', 0, 13, res=3) + check_search(count, 'one two three', 'e', 0, 1, res=0) + check_search(count, 'one two three', '', 0, 13, res=14) + class TestTranslates(BaseRtypingTest): def test_split_rsplit(self): @@ -196,6 +250,17 @@ res = self.interpret(fn, []) assert res + def test_buffer_parameter(self): + def fn(): + res = True + res = res and find('a//b//c//d', StringBuffer('//'), 0, 10) != -1 + res = res and rfind('a//b//c//d', StringBuffer('//'), 0, 10) != -1 + res = res and count('a//b//c//d', StringBuffer('//'), 0, 10) != 0 + return res + res = self.interpret(fn, []) + assert res + + def test_replace(self): def fn(): res = True diff --git a/rpython/rtyper/rclass.py b/rpython/rtyper/rclass.py --- a/rpython/rtyper/rclass.py +++ b/rpython/rtyper/rclass.py @@ -7,6 +7,7 @@ from rpython.rtyper.lltypesystem.lltype import Void from rpython.rtyper.rmodel import Repr, getgcflavor, inputconst from rpython.rlib.objectmodel import UnboxedValue +from rpython.tool.pairtype import pairtype class FieldListAccessor(object): @@ -390,7 +391,7 @@ raise NotImplementedError def _emulate_call(self, hop, meth_name): - vinst, = hop.inputargs(self) + vinst = hop.args_v[0] clsdef = hop.args_s[0].classdef s_unbound_attr = clsdef.find_attribute(meth_name).getvalue() s_attr = clsdef.lookup_filter(s_unbound_attr, meth_name, @@ -402,10 +403,10 @@ r_method = self.rtyper.getrepr(s_attr) r_method.get_method_from_instance(self, vinst, hop.llops) hop2 = hop.copy() - hop2.spaceop = op.simple_call(hop.spaceop.args[0]) + hop2.spaceop = op.simple_call(*hop.spaceop.args) hop2.spaceop.result = hop.spaceop.result - hop2.args_r = [r_method] - hop2.args_s = [s_attr] + hop2.args_r[0] = r_method + hop2.args_s[0] = s_attr return hop2.dispatch() def rtype_iter(self, hop): @@ -414,6 +415,15 @@ def rtype_next(self, hop): return self._emulate_call(hop, 'next') + def rtype_getslice(self, hop): + return self._emulate_call(hop, "__getslice__") + + def rtype_setslice(self, hop): + return self._emulate_call(hop, "__setslice__") + + def rtype_len(self, hop): + return self._emulate_call(hop, "__len__") + def ll_str(self, i): raise NotImplementedError @@ -460,6 +470,16 @@ if len(seen) == oldlength: break + +class __extend__(pairtype(AbstractInstanceRepr, Repr)): + def rtype_getitem((r_ins, r_obj), hop): + return r_ins._emulate_call(hop, "__getitem__") + + def rtype_setitem((r_ins, r_obj), hop): + return r_ins._emulate_call(hop, "__setitem__") + + + # ____________________________________________________________ def rtype_new_instance(rtyper, classdef, llops, classcallhop=None): diff --git a/rpython/rtyper/rlist.py b/rpython/rtyper/rlist.py --- a/rpython/rtyper/rlist.py +++ b/rpython/rtyper/rlist.py @@ -293,6 +293,11 @@ v_lst, v_factor = hop.inputargs(r_lst, Signed) return hop.gendirectcall(ll_mul, cRESLIST, v_lst, v_factor) +class __extend__(pairtype(IntegerRepr, AbstractBaseListRepr)): + def rtype_mul((r_int, r_lst), hop): + cRESLIST = hop.inputconst(Void, hop.r_result.LIST) + v_factor, v_lst = hop.inputargs(Signed, r_lst) + return hop.gendirectcall(ll_mul, cRESLIST, v_lst, v_factor) class __extend__(pairtype(AbstractListRepr, IntegerRepr)): diff --git a/rpython/rtyper/test/test_rclass.py b/rpython/rtyper/test/test_rclass.py --- a/rpython/rtyper/test/test_rclass.py +++ b/rpython/rtyper/test/test_rclass.py @@ -1193,6 +1193,69 @@ assert self.interpret(f, [True]) == f(True) assert self.interpret(f, [False]) == f(False) + def test_indexing(self): + class A(object): + def __init__(self, data): + self.data = data + + def __getitem__(self, i): + return self.data[i] + + def __setitem__(self, i, v): + self.data[i] = v + + def __getslice__(self, start, stop): + assert start >= 0 + assert stop >= 0 + return self.data[start:stop] + + def __setslice__(self, start, stop, v): + assert start >= 0 + assert stop >= 0 + i = 0 + for n in range(start, stop): + self.data[n] = v[i] + i += 1 + + def getitem(i): + a = A("abcdefg") + return a[i] + + def setitem(i, v): + a = A([0] * 5) + a[i] = v + return a[i] + + def getslice(start, stop): + a = A([1, 2, 3, 4, 5, 6]) + sum = 0 + for i in a[start:stop]: + sum += i + return sum + + def setslice(start, stop, i): + a = A([0] * stop) + a[start:stop] = range(start, stop) + return a[i] + + assert self.interpret(getitem, [0]) == getitem(0) + assert self.interpret(getitem, [1]) == getitem(1) + assert self.interpret(setitem, [0, 5]) == setitem(0, 5) + assert self.interpret(getslice, [0, 4]) == getslice(0, 4) + assert self.interpret(getslice, [1, 4]) == getslice(1, 4) + assert self.interpret(setslice, [4, 6, 5]) == setslice(4, 6, 5) + + def test_len(self): + class A(object): + def __len__(self): + return 5 + + def fn(): + a = A() + return len(a) + + assert self.interpret(fn, []) == fn() + def test_init_with_star_args(self): class Base(object): def __init__(self, a, b): diff --git a/rpython/rtyper/test/test_rlist.py b/rpython/rtyper/test/test_rlist.py --- a/rpython/rtyper/test/test_rlist.py +++ b/rpython/rtyper/test/test_rlist.py @@ -946,6 +946,15 @@ for arg in (1, 9, 0, -1, -27): res = self.interpret(fn, [arg]) assert res == fn(arg) + def fn(i): + lst = i * [i, i + 1] + ret = len(lst) + if ret: + ret *= lst[-1] + return ret + for arg in (1, 9, 0, -1, -27): + res = self.interpret(fn, [arg]) + assert res == fn(arg) def test_list_inplace_multiply(self): def fn(i): From noreply at buildbot.pypy.org Mon Jun 16 17:14:34 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 16 Jun 2014 17:14:34 +0200 (CEST) Subject: [pypy-commit] pypy utf8-unicode2: WIP Message-ID: <20140616151434.D1D2E1D248A@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: utf8-unicode2 Changeset: r72077:69df5d97a930 Date: 2014-06-16 10:13 -0500 http://bitbucket.org/pypy/pypy/changeset/69df5d97a930/ Log: WIP diff --git a/pypy/interpreter/test/test_utf8.py b/pypy/interpreter/test/test_utf8.py new file mode 100644 --- /dev/null +++ b/pypy/interpreter/test/test_utf8.py @@ -0,0 +1,65 @@ +from pypy.interpreter.utf8 import ( + Utf8Str, Utf8Builder, utf8chr, utf8ord) + +def build_utf8str(): + builder = Utf8Builder() + builder.append('A') #0x41 + builder.append(0x10F) #0xC4 0x8F + builder.append(0x20AC) #0xE2 0x82 0xAC + builder.append(0x1F63D) #0xF0 0x9F 0x98 0xBD + return builder.build() + +def test_builder(): + s = build_utf8str() + assert not s._is_ascii + + assert list(s.bytes) == [chr(i) for i in [ + 0x41, + 0xC4, 0x8F, + 0xE2, 0x82, 0xAC, + 0xF0, 0x9F, 0x98, 0xBD, + ]] + +def test_unicode_literal_comparison(): + builder = Utf8Builder() + builder.append(0x10F) + s = builder.build() + assert s == u'\u010F' + assert s[0] == u'\u010F' + assert s[0] == utf8chr(0x10F) + +def test_utf8chr(): + assert utf8chr(65) == u'A' + assert utf8chr(0x7FF) == u'\u07FF' + assert utf8chr(0x17FF) == u'\u17FF' + assert utf8chr(0x10001) == u'\U00010001' + +def test_utf8ord(): + s = build_utf8str() + assert utf8ord(s) == 65 + assert utf8ord(s, 1) == 0x10F + assert utf8ord(s, 2) == 0x20AC + assert utf8ord(s, 3) == 0x1F63D + +def test_len(): + s = build_utf8str() + assert len(s) == 4 + +def test_getitem(): + s = build_utf8str() + + assert s[0] == utf8chr(65) + assert s[1] == utf8chr(0x10F) + assert s[2] == utf8chr(0x20AC) + assert s[3] == utf8chr(0x1F63D) + assert s[-1] == utf8chr(0x1F63D) + assert s[-2] == utf8chr(0x20AC) + +def test_getslice(): + s = build_utf8str() + + assert s[0:1] == u'A' + assert s[0:2] == u'A\u010F' + assert s[1:2] == u'\u010F' + assert s[-4:-3] == u'A' + assert s[-4:-2] == u'A\u010F' diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1,6 +1,5 @@ from pypy.interpreter.error import OperationError from rpython.rlib.objectmodel import specialize -from rpython.rlib import runicode from pypy.module._codecs import interp_codecs @specialize.memo() @@ -35,29 +34,30 @@ # These functions take and return unwrapped rpython strings and unicodes def decode_unicode_escape(space, string): + from pypy.interpreter.utf8 import decode_unicode_escape state = space.fromcache(interp_codecs.CodecState) unicodedata_handler = state.get_unicodedata_handler(space) - result, consumed = runicode.str_decode_unicode_escape( + result, consumed = decode_unicode_escape( string, len(string), "strict", final=True, errorhandler=decode_error_handler(space), unicodedata_handler=unicodedata_handler) return result def decode_raw_unicode_escape(space, string): - result, consumed = runicode.str_decode_raw_unicode_escape( + from pypy.interpreter.utf8 import decode_raw_unicode_escape + result, consumed = decode_raw_unicode_escape( string, len(string), "strict", final=True, errorhandler=decode_error_handler(space)) return result def decode_utf8(space, string): - result, consumed = runicode.str_decode_utf_8( + from pypy.interpreter.utf8 import decode_utf_8 + result, consumed = decode_utf_8( string, len(string), "strict", final=True, errorhandler=decode_error_handler(space), allow_surrogates=True) return result def encode_utf8(space, uni): - return runicode.unicode_encode_utf_8( - uni, len(uni), "strict", - errorhandler=encode_error_handler(space), - allow_surrogates=True) + # unicode to string... + return s.bytes diff --git a/pypy/interpreter/utf8.py b/pypy/interpreter/utf8.py new file mode 100644 --- /dev/null +++ b/pypy/interpreter/utf8.py @@ -0,0 +1,569 @@ +from rpython.rlib.rstring import StringBuilder +from rpython.rlib.objectmodel import specialize +from rpython.rlib.runicode import utf8_code_length + +MAXUNICODE = 0x10ffff + +def utf8chr(value): + # Like unichr, but returns a Utf8Str object + b = Utf8Builder() + b.append(value) + return b.build() + +def utf8ord(ustr, start=0): + bytes = ustr.bytes + start = ustr.index_of_char(start) + codepoint_length = utf8_code_length[ord(bytes[start])] + + if codepoint_length == 1: + return ord(bytes[start]) + + elif codepoint_length == 2: + return ((ord(bytes[start]) & 0x1F) << 6 | + (ord(bytes[start + 1]) & 0x3F)) + elif codepoint_length == 3: + return ((ord(bytes[start]) & 0xF) << 12 | + (ord(bytes[start + 1]) & 0x3F) << 6 | + (ord(bytes[start + 2]) & 0x3F)) + else: + assert codepoint_length == 4 + return ((ord(bytes[start]) & 0xF) << 18 | + (ord(bytes[start + 1]) & 0x3F) << 12 | + (ord(bytes[start + 2]) & 0x3F) << 6 | + (ord(bytes[start + 3]) & 0x3F)) + + +class Utf8Str(object): + _immutable_fields_ = ['bytes', '_is_ascii', '_len'] + + def __init__(self, data, is_ascii=False, length=-1): + # TODO: Maybe I can determine is_ascii rather than have it passed in? + # It really depends on what my model ends up looking like? + # It is worth noting that this check can be really fast. We just + # have to iterate the bytes while checking for (& 0b01000000) + + self.bytes = data + self._is_ascii = is_ascii + + if length != -1: + self._len = length + else: + if not is_ascii: + #self._len = -1 + self._calc_length() + else: + self._len = len(data) + + def _calc_length(self): + pos = 0 + length = 0 + + while pos < len(self.bytes): + length += 1 + pos += utf8_code_length[ord(self.bytes[pos])] + + self._len = length + + def index_of_char(self, char): + byte = 0 + pos = 0 + while pos < char: + pos += 1 + byte += utf8_code_length[ord(self.bytes[byte])] + + return byte + + def __getitem__(self, char_pos): + # This if statement is needed for [-1:0] to slice correctly + if char_pos < 0: + char_pos += self._len + return self[char_pos:char_pos+1] + + def __getslice__(self, start, stop): + assert start < stop + # TODO: If start > _len or stop >= _len, then raise exception + + if self._is_ascii: + return Utf8Str(self.bytes[start:stop], True) + + start_byte = self.index_of_char(start) + stop_byte = start_byte + stop_pos = start + # TODO: Is detecting ascii-ness here actually useful? If it will + # happen in __init__ anyway, maybe its not worth the extra + # complexity. + is_ascii = True + while stop_pos < stop: + stop_pos += 1 + increment = utf8_code_length[ord(self.bytes[stop_byte])] + if increment != 1: + is_ascii = False + stop_byte += increment + + return Utf8Str(self.bytes[start_byte:stop_byte], is_ascii, + stop - start) + + def __len__(self): + return self._len + + def __eq__(self, other): + """NOT_RPYTHON""" + if isinstance(other, Utf8Str): + return self.bytes == other.bytes + if isinstance(other, unicode): + return unicode(self.bytes, 'utf8') == other + + return False + +class Utf8Builder(object): + @specialize.argtype(1) + def __init__(self, init_size=None): + if init_size is None: + self._builder = StringBuilder() + else: + self._builder = StringBuilder(init_size) + self._is_ascii = True + + + @specialize.argtype(1) + def append(self, c): + if isinstance(c, int): + if c < 0x80: + self._builder.append(chr(c)) + elif c < 0x800: + self._builder.append(chr(0xC0 | (c >> 6))) + self._builder.append(chr(0x80 | (c & 0x3F))) + self._is_ascii = False + elif c < 0x10000: + self._builder.append(chr(0xE0 | (c >> 12))) + self._builder.append(chr(0x80 | (c >> 6 & 0x3F))) + self._builder.append(chr(0x80 | (c & 0x3F))) + self._is_ascii = False + elif c <= 0x10FFFF: + self._builder.append(chr(0xF0 | (c >> 18))) + self._builder.append(chr(0x80 | (c >> 12 & 0x3F))) + self._builder.append(chr(0x80 | (c >> 6 & 0x3F))) + self._builder.append(chr(0x80 | (c & 0x3F))) + self._is_ascii = False + else: + raise ValueError("Invalid unicode codepoint > 0x10FFFF.") + else: + # TODO: Only allow ord(c) in [0, 127] + self._builder.append(c) + + def append_slice(self, s, start, end, is_ascii=False): + self._builder.append_slice(s, start, end) + if not is_ascii: + self._is_ascii = False + + def build(self): + return Utf8Str(self._builder.build(), self._is_ascii) + + +# ____________________________________________________________ +# Escape-parsing functions + +def decode_raw_unicode_escape(s, size, errors, final=False, + errorhandler=None): + if errorhandler is None: + errorhandler = default_unicode_error_decode + if size == 0: + # TODO:? + return Utf8Str('', True), 0 + + result = Utf8Builder(size) + pos = 0 + while pos < size: + ch = s[pos] + + # Non-escape characters are interpreted as Unicode ordinals + if ch != '\\': + result.append(ch) + pos += 1 + continue + + # \u-escapes are only interpreted iff the number of leading + # backslashes is odd + bs = pos + while pos < size: + pos += 1 + if pos == size or s[pos] != '\\': + break + result.append('\\') + + # we have a backslash at the end of the string, stop here + if pos >= size: + result.append('\\') + break + + if ((pos - bs) & 1 == 0 or + pos >= size or + (s[pos] != 'u' and s[pos] != 'U')): + result.append('\\') + result.append(s[pos]) + pos += 1 + continue + + digits = 4 if s[pos] == 'u' else 8 + message = "truncated \\uXXXX" + pos += 1 + pos = hexescape(result, s, pos, digits, + "rawunicodeescape", errorhandler, message, errors) + + return result.build(), pos + +# Specialize on the errorhandler when it's a constant + at specialize.arg_or_var(4) +def decode_unicode_escape(s, size, errors, final=False, + errorhandler=None, + unicodedata_handler=None): + if errorhandler is None: + errorhandler = default_unicode_error_decode + + if size == 0: + return Utf8Str('', True), 0 + + builder = Utf8Builder(size) + pos = 0 + while pos < size: + ch = s[pos] + + # Non-escape characters are interpreted as Unicode ordinals + if ch != '\\': + builder.append(ch) + pos += 1 + continue + + # - Escapes + pos += 1 + if pos >= size: + message = "\\ at end of string" + res, pos = errorhandler(errors, "unicodeescape", + message, s, pos-1, size) + builder.append(res) + continue + + ch = s[pos] + pos += 1 + # \x escapes + if ch == '\n': pass + elif ch == '\\': builder.append('\\') + elif ch == '\'': builder.append('\'') + elif ch == '\"': builder.append('\"') + elif ch == 'b' : builder.append('\b') + elif ch == 'f' : builder.append('\f') + elif ch == 't' : builder.append('\t') + elif ch == 'n' : builder.append('\n') + elif ch == 'r' : builder.append('\r') + elif ch == 'v' : builder.append('\v') + elif ch == 'a' : builder.append('\a') + elif '0' <= ch <= '7': + x = ord(ch) - ord('0') + if pos < size: + ch = s[pos] + if '0' <= ch <= '7': + pos += 1 + x = (x<<3) + ord(ch) - ord('0') + if pos < size: + ch = s[pos] + if '0' <= ch <= '7': + pos += 1 + x = (x<<3) + ord(ch) - ord('0') + builder.append(x) + # hex escapes + # \xXX + elif ch == 'x': + digits = 2 + message = "truncated \\xXX escape" + pos = hexescape(builder, s, pos, digits, + "unicodeescape", errorhandler, message, errors) + + # \uXXXX + elif ch == 'u': + digits = 4 + message = "truncated \\uXXXX escape" + pos = hexescape(builder, s, pos, digits, + "unicodeescape", errorhandler, message, errors) + + # \UXXXXXXXX + elif ch == 'U': + digits = 8 + message = "truncated \\UXXXXXXXX escape" + pos = hexescape(builder, s, pos, digits, + "unicodeescape", errorhandler, message, errors) + + # \N{name} + elif ch == 'N': + message = "malformed \\N character escape" + look = pos + if unicodedata_handler is None: + message = ("\\N escapes not supported " + "(can't load unicodedata module)") + res, pos = errorhandler(errors, "unicodeescape", + message, s, pos-1, size) + builder.append(res) + continue + + if look < size and s[look] == '{': + # look for the closing brace + while look < size and s[look] != '}': + look += 1 + if look < size and s[look] == '}': + # found a name. look it up in the unicode database + message = "unknown Unicode character name" + name = s[pos+1:look] + code = unicodedata_handler.call(name) + if code < 0: + res, pos = errorhandler(errors, "unicodeescape", + message, s, pos-1, look+1) + builder.append(res) + continue + pos = look + 1 + builder.append(code) + else: + res, pos = errorhandler(errors, "unicodeescape", + message, s, pos-1, look+1) + builder.append(res) + else: + res, pos = errorhandler(errors, "unicodeescape", + message, s, pos-1, look+1) + builder.append(res) + else: + builder.append('\\') + builder.append(ch) + + return builder.build(), pos + +hexdigits = "0123456789ABCDEFabcdef" + +def hexescape(builder, s, pos, digits, + encoding, errorhandler, message, errors): + chr = 0 + if pos + digits > len(s): + endinpos = pos + while endinpos < len(s) and s[endinpos] in hexdigits: + endinpos += 1 + res, pos = errorhandler(errors, encoding, + message, s, pos-2, endinpos) + builder.append(res) + else: + try: + chr = r_uint(int(s[pos:pos+digits], 16)) + except ValueError: + endinpos = pos + while s[endinpos] in hexdigits: + endinpos += 1 + res, pos = errorhandler(errors, encoding, + message, s, pos-2, endinpos) + builder.append(res) + else: + # when we get here, chr is a 32-bit unicode character + if chr <= MAXUNICODE: + builder.append(chr) + pos += digits + + else: + message = "illegal Unicode character" + res, pos = errorhandler(errors, encoding, + message, s, pos-2, pos+digits) + builder.append(res) + return pos + +# ____________________________________________________________ + +# Converting bytes (utf8) to unicode? +# I guess we just make sure we're looking at valid utf-8 and then make the +# object? + +def decode_utf_8(s, size, errors, final=False, + errorhandler=None, allow_surrogates=False): + if errorhandler is None: + errorhandler = default_unicode_error_decode + result = Utf8Builder(size) + pos = decode_utf_8_impl(s, size, errors, final, errorhandler, result, + allow_surrogates=allow_surrogates) + return result.build(), pos + +def decode_utf_8_impl(s, size, errors, final, errorhandler, result, + allow_surrogates): + if size == 0: + return 0 + + # TODO: Instead of assembling and then re-disassembling the codepoints, + # just use builder.append_slice + pos = 0 + while pos < size: + ordch1 = ord(s[pos]) + # fast path for ASCII + # XXX maybe use a while loop here + if ordch1 < 0x80: + result.append(ordch1) + pos += 1 + continue + + n = utf8_code_length[ordch1] + if pos + n > size: + if not final: + break + charsleft = size - pos - 1 # either 0, 1, 2 + # note: when we get the 'unexpected end of data' we don't care + # about the pos anymore and we just ignore the value + if not charsleft: + # there's only the start byte and nothing else + r, pos = errorhandler(errors, 'utf8', + 'unexpected end of data', + s, pos, pos+1) + result.append(r) + break + ordch2 = ord(s[pos+1]) + if n == 3: + # 3-bytes seq with only a continuation byte + if (ordch2>>6 != 0x2 or # 0b10 + (ordch1 == 0xe0 and ordch2 < 0xa0)): + # or (ordch1 == 0xed and ordch2 > 0x9f) + # second byte invalid, take the first and continue + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+1) + result.append(r) + continue + else: + # second byte valid, but third byte missing + r, pos = errorhandler(errors, 'utf8', + 'unexpected end of data', + s, pos, pos+2) + result.append(r) + break + elif n == 4: + # 4-bytes seq with 1 or 2 continuation bytes + if (ordch2>>6 != 0x2 or # 0b10 + (ordch1 == 0xf0 and ordch2 < 0x90) or + (ordch1 == 0xf4 and ordch2 > 0x8f)): + # second byte invalid, take the first and continue + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+1) + result.append(r) + continue + elif charsleft == 2 and ord(s[pos+2])>>6 != 0x2: # 0b10 + # third byte invalid, take the first two and continue + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+2) + result.append(r) + continue + else: + # there's only 1 or 2 valid cb, but the others are missing + r, pos = errorhandler(errors, 'utf8', + 'unexpected end of data', + s, pos, pos+charsleft+1) + result.append(r) + break + + if n == 0: + r, pos = errorhandler(errors, 'utf8', + 'invalid start byte', + s, pos, pos+1) + result.append(r) + + elif n == 1: + assert 0, "ascii should have gone through the fast path" + + elif n == 2: + ordch2 = ord(s[pos+1]) + if ordch2>>6 != 0x2: # 0b10 + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+1) + result.append(r) + continue + # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz + result.append(((ordch1 & 0x1F) << 6) + # 0b00011111 + (ordch2 & 0x3F)) # 0b00111111 + pos += 2 + + elif n == 3: + ordch2 = ord(s[pos+1]) + ordch3 = ord(s[pos+2]) + if (ordch2>>6 != 0x2 or # 0b10 + (ordch1 == 0xe0 and ordch2 < 0xa0) + # surrogates shouldn't be valid UTF-8! + or (not allow_surrogates and ordch1 == 0xed and ordch2 > 0x9f) + ): + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+1) + result.append(r) + continue + elif ordch3>>6 != 0x2: # 0b10 + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+2) + result.append(r) + continue + # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz + result.append((((ordch1 & 0x0F) << 12) + # 0b00001111 + ((ordch2 & 0x3F) << 6) + # 0b00111111 + (ordch3 & 0x3F))) # 0b00111111 + pos += 3 + + elif n == 4: + ordch2 = ord(s[pos+1]) + ordch3 = ord(s[pos+2]) + ordch4 = ord(s[pos+3]) + if (ordch2>>6 != 0x2 or # 0b10 + (ordch1 == 0xf0 and ordch2 < 0x90) or + (ordch1 == 0xf4 and ordch2 > 0x8f)): + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+1) + result.append(r) + continue + elif ordch3>>6 != 0x2: # 0b10 + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+2) + result.append(r) + continue + elif ordch4>>6 != 0x2: # 0b10 + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+3) + result.append(r) + continue + # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz + c = (((ordch1 & 0x07) << 18) + # 0b00000111 + ((ordch2 & 0x3F) << 12) + # 0b00111111 + ((ordch3 & 0x3F) << 6) + # 0b00111111 + (ordch4 & 0x3F)) # 0b00111111 + + # TODO: Why doesn't this raise an error when c > MAXUNICODE? If I'm + # converting utf8 -> utf8 is this necessary + if c <= MAXUNICODE: + result.append(c) + pos += 4 + + return pos + +# ____________________________________________________________ +# Default error handlers + + +def default_unicode_error_decode(errors, encoding, msg, s, + startingpos, endingpos): + if errors == 'replace': + return _unicode_error_replacement, endingpos + if errors == 'ignore': + return '', endingpos + raise UnicodeDecodeError(encoding, s, startingpos, endingpos, msg) +_unicode_error_replacement = decode_raw_unicode_escape( + '\ufffd', 1, default_unicode_error_decode) + +def default_unicode_error_encode(errors, encoding, msg, u, + startingpos, endingpos): + if errors == 'replace': + return '?', None, endingpos + if errors == 'ignore': + return '', None, endingpos + raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg) + diff --git a/pypy/module/__builtin__/operation.py b/pypy/module/__builtin__/operation.py --- a/pypy/module/__builtin__/operation.py +++ b/pypy/module/__builtin__/operation.py @@ -5,7 +5,8 @@ from pypy.interpreter import gateway from pypy.interpreter.error import OperationError from pypy.interpreter.gateway import unwrap_spec, WrappedDefault -from rpython.rlib.runicode import UNICHR +from pypy.interpreter.utf8 import Utf8Str, utf8chr +#from rpython.rlib.runicode import UNICHR from rpython.rlib.rfloat import isnan, isinf, round_double from rpython.rlib import rfloat import __builtin__ @@ -28,7 +29,8 @@ "Return a Unicode string of one character with the given ordinal." # XXX range checking! try: - c = UNICHR(code) + #c = UNICHR(code) + c = utf8chr(code) except ValueError: raise OperationError(space.w_ValueError, space.wrap("unichr() arg out of range")) diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -3,6 +3,7 @@ from pypy.interpreter import special from pypy.interpreter.baseobjspace import ObjSpace, W_Root from pypy.interpreter.error import OperationError, oefmt +from pypy.interpreter.utf8 import Utf8Str from pypy.interpreter.typedef import get_unique_interplevel_subclass from pypy.objspace.std import (builtinshortcut, stdtypedef, frame, model, transparent, callmethod) @@ -158,8 +159,12 @@ return self.newint(x) if isinstance(x, str): return wrapstr(self, x) + if isinstance(x, Utf8Str): + return wrapunicode(self, x) + if isinstance(x, unicode): - return wrapunicode(self, x) + import pdb; pdb.set_trace() + if isinstance(x, float): return W_FloatObject(x) if isinstance(x, W_Root): diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -10,6 +10,7 @@ from pypy.interpreter import unicodehelper from pypy.interpreter.baseobjspace import W_Root +from pypy.interpreter.utf8 import Utf8Str from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import WrappedDefault, interp2app, unwrap_spec from pypy.module.unicodedata import unicodedb @@ -29,7 +30,8 @@ _immutable_fields_ = ['_value'] def __init__(w_self, unistr): - assert isinstance(unistr, unicode) + assert isinstance(unistr, Utf8Str) + #assert isinstance(unistr, unicode) w_self._value = unistr def __repr__(w_self): @@ -1076,7 +1078,8 @@ return [s for s in value] -W_UnicodeObject.EMPTY = W_UnicodeObject(u'') +#W_UnicodeObject.EMPTY = W_UnicodeObject(u'') +W_UnicodeObject.EMPTY = W_UnicodeObject(Utf8Str('')) # Helper for converting int/long From noreply at buildbot.pypy.org Mon Jun 16 17:15:43 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 16 Jun 2014 17:15:43 +0200 (CEST) Subject: [pypy-commit] cffi default: Ah, on http://bugs.python.org/issue21778 I got the answer as to what Message-ID: <20140616151543.9D9311D248A@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r1514:f27ffac380dc Date: 2014-06-16 17:15 +0200 http://bitbucket.org/cffi/cffi/changeset/f27ffac380dc/ Log: Ah, on http://bugs.python.org/issue21778 I got the answer as to what the expected way to call this is. diff --git a/c/minibuffer.h b/c/minibuffer.h --- a/c/minibuffer.h +++ b/c/minibuffer.h @@ -105,12 +105,9 @@ static int mb_getbuf(MiniBufferObj *self, Py_buffer *view, int flags) { - static Py_ssize_t dummy_stride = 1; - int res = PyBuffer_FillInfo(view, (PyObject *)self, - self->mb_data, self->mb_size, - /*readonly=*/0, PyBUF_CONTIG | PyBUF_FORMAT); - view->strides = &dummy_stride; - return res; + return PyBuffer_FillInfo(view, (PyObject *)self, + self->mb_data, self->mb_size, + /*readonly=*/0, flags); } static PySequenceMethods mb_as_sequence = { From noreply at buildbot.pypy.org Mon Jun 16 21:26:06 2014 From: noreply at buildbot.pypy.org (rlamy) Date: Mon, 16 Jun 2014 21:26:06 +0200 (CEST) Subject: [pypy-commit] pypy rpath-enforceargs: hg merge default Message-ID: <20140616192606.B408B1C1068@cobra.cs.uni-duesseldorf.de> Author: Ronan Lamy Branch: rpath-enforceargs Changeset: r72078:f91e27235f5c Date: 2014-06-16 20:24 +0100 http://bitbucket.org/pypy/pypy/changeset/f91e27235f5c/ Log: hg merge default diff too long, truncating to 2000 out of 5322 lines diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -10,3 +10,7 @@ 20e51c4389ed4469b66bb9d6289ce0ecfc82c4b9 release-2.3.0 0000000000000000000000000000000000000000 release-2.3.0 394146e9bb673514c61f0150ab2013ccf78e8de7 release-2.3 +32f35069a16d819b58c1b6efb17c44e3e53397b2 release-2.2=3.1 +32f35069a16d819b58c1b6efb17c44e3e53397b2 release-2.3.1 +32f35069a16d819b58c1b6efb17c44e3e53397b2 release-2.2=3.1 +0000000000000000000000000000000000000000 release-2.2=3.1 diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -128,6 +128,7 @@ Stian Andreassen Laurence Tratt Wanja Saatkamp + Ivan Sichmann Freitas Gerald Klix Mike Blume Oscar Nierstrasz @@ -212,7 +213,9 @@ Alejandro J. Cura Jacob Oscarson Travis Francis Athougies + Ryan Gonzalez Kristjan Valur Jonsson + Sebastian Pawluś Neil Blakey-Milner anatoly techtonik Lutz Paelike @@ -245,6 +248,7 @@ Michael Hudson-Doyle Anders Sigfridsson Yasir Suhail + rafalgalczynski at gmail.com Floris Bruynooghe Laurens Van Houtven Akira Li @@ -274,6 +278,8 @@ Zooko Wilcox-O Hearn Tomer Chachamu Christopher Groskopf + Asmo Soinio + Stefan Marr jiaaro opassembler.py Antony Lee diff --git a/lib_pypy/_tkinter/license.terms b/lib_pypy/_tkinter/license.terms new file mode 100644 --- /dev/null +++ b/lib_pypy/_tkinter/license.terms @@ -0,0 +1,39 @@ +This software is copyrighted by the Regents of the University of +California, Sun Microsystems, Inc., and other parties. The following +terms apply to all files associated with the software unless explicitly +disclaimed in individual files. + +The authors hereby grant permission to use, copy, modify, distribute, +and license this software and its documentation for any purpose, provided +that existing copyright notices are retained in all copies and that this +notice is included verbatim in any distributions. No written agreement, +license, or royalty fee is required for any of the authorized uses. +Modifications to this software may be copyrighted by their authors +and need not follow the licensing terms described here, provided that +the new terms are clearly indicated on the first page of each file where +they apply. + +IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY +FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY +DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE +IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE +NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR +MODIFICATIONS. + +GOVERNMENT USE: If you are acquiring this software on behalf of the +U.S. government, the Government shall have only "Restricted Rights" +in the software and related documentation as defined in the Federal +Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you +are acquiring the software on behalf of the Department of Defense, the +software shall be classified as "Commercial Computer Software" and the +Government shall have only "Restricted Rights" as defined in Clause +252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the +authors grant the U.S. Government and others acting in its behalf +permission to use and distribute the software in accordance with the +terms specified in this license. diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py --- a/pypy/config/pypyoption.py +++ b/pypy/config/pypyoption.py @@ -113,7 +113,7 @@ try: for name in modlist: __import__(name) - except (ImportError, CompilationError, py.test.skip.Exception), e: + except (ImportError, CompilationError, py.test.skip.Exception) as e: errcls = e.__class__.__name__ raise Exception( "The module %r is disabled\n" % (modname,) + diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst --- a/pypy/doc/coding-guide.rst +++ b/pypy/doc/coding-guide.rst @@ -105,7 +105,7 @@ while True: try: w_key = space.next(w_iter) - except OperationError, e: + except OperationError as e: if not e.match(space, space.w_StopIteration): raise # re-raise other app-level exceptions break @@ -348,8 +348,12 @@ **objects** - Normal rules apply. Special methods are not honoured, except ``__init__``, - ``__del__`` and ``__iter__``. + Normal rules apply. The only special methods that are honoured are + ``__init__``, ``__del__``, ``__len__``, ``__getitem__``, ``__setitem__``, + ``__getslice__``, ``__setslice__``, and ``__iter__``. To handle slicing, + ``__getslice__`` and ``__setslice__`` must be used; using ``__getitem__`` and + ``__setitem__`` for slicing isn't supported. Additionally, using negative + indices for slicing is still not support, even when using ``__getslice__``. This layout makes the number of types to take care about quite limited. @@ -567,7 +571,7 @@ try: ... - except OperationError, e: + except OperationError as e: if not e.match(space, space.w_XxxError): raise ... diff --git a/pypy/doc/config/translation.log.txt b/pypy/doc/config/translation.log.txt --- a/pypy/doc/config/translation.log.txt +++ b/pypy/doc/config/translation.log.txt @@ -2,4 +2,4 @@ These must be enabled by setting the PYPYLOG environment variable. The exact set of features supported by PYPYLOG is described in -pypy/translation/c/src/debug_print.h. +rpython/translator/c/src/debug_print.h. diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst --- a/pypy/doc/contributor.rst +++ b/pypy/doc/contributor.rst @@ -99,6 +99,7 @@ Stian Andreassen Laurence Tratt Wanja Saatkamp + Ivan Sichmann Freitas Gerald Klix Mike Blume Oscar Nierstrasz @@ -183,7 +184,9 @@ Alejandro J. Cura Jacob Oscarson Travis Francis Athougies + Ryan Gonzalez Kristjan Valur Jonsson + Sebastian Pawluś Neil Blakey-Milner anatoly techtonik Lutz Paelike @@ -216,6 +219,7 @@ Michael Hudson-Doyle Anders Sigfridsson Yasir Suhail + rafalgalczynski at gmail.com Floris Bruynooghe Laurens Van Houtven Akira Li @@ -245,6 +249,8 @@ Zooko Wilcox-O Hearn Tomer Chachamu Christopher Groskopf + Asmo Soinio + Stefan Marr jiaaro opassembler.py Antony Lee diff --git a/pypy/doc/index-of-release-notes.rst b/pypy/doc/index-of-release-notes.rst --- a/pypy/doc/index-of-release-notes.rst +++ b/pypy/doc/index-of-release-notes.rst @@ -6,6 +6,7 @@ .. toctree:: + release-2.3.1.rst release-2.3.0.rst release-2.2.1.rst release-2.2.0.rst diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst --- a/pypy/doc/index.rst +++ b/pypy/doc/index.rst @@ -40,7 +40,7 @@ * `FAQ`_: some frequently asked questions. -* `Release 2.3.0`_: the latest official release +* `Release 2.3.1`_: the latest official release * `PyPy Blog`_: news and status info about PyPy @@ -110,7 +110,7 @@ .. _`Getting Started`: getting-started.html .. _`Papers`: extradoc.html .. _`Videos`: video-index.html -.. _`Release 2.3.0`: http://pypy.org/download.html +.. _`Release 2.3.1`: http://pypy.org/download.html .. _`speed.pypy.org`: http://speed.pypy.org .. _`RPython toolchain`: translation.html .. _`potential project ideas`: project-ideas.html diff --git a/pypy/doc/man/pypy.1.rst b/pypy/doc/man/pypy.1.rst --- a/pypy/doc/man/pypy.1.rst +++ b/pypy/doc/man/pypy.1.rst @@ -95,13 +95,12 @@ ``PYPYLOG`` If set to a non-empty value, enable logging, the format is: - *fname* + *fname* or *+fname* logging for profiling: includes all ``debug_start``/``debug_stop`` but not any nested ``debug_print``. *fname* can be ``-`` to log to *stderr*. - Note that using a : in fname is a bad idea, Windows - users, beware. + The *+fname* form can be used if there is a *:* in fname ``:``\ *fname* Full logging, including ``debug_print``. diff --git a/pypy/doc/release-2.3.1.rst b/pypy/doc/release-2.3.1.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/release-2.3.1.rst @@ -0,0 +1,81 @@ +================================================= +PyPy 2.3.1 - Terrestrial Arthropod Trap Revisited +================================================= + +We're pleased to announce PyPy 2.3.1, a feature-and-bugfix improvement over our +recent release last month. + +This release contains several bugfixes and enhancements. + +You can download the PyPy 2.3.1 release here: + + http://pypy.org/download.html + +We would like to thank our donors for the continued support of the PyPy +project, and for those who donate to our three sub-projects. +We've shown quite a bit of progress +but we're slowly running out of funds. +Please consider donating more, or even better convince your employer to donate, +so we can finish those projects! The three sub-projects are: + +* `Py3k`_ (supporting Python 3.x): the release PyPy3 2.3 is imminent. + +* `STM`_ (software transactional memory): a preview will be released very soon, + once we fix a few bugs + +* `NumPy`_ which requires installation of our fork of upstream numpy, available `on bitbucket`_ + +.. _`Py3k`: http://pypy.org/py3donate.html +.. _`STM`: http://pypy.org/tmdonate2.html +.. _`NumPy`: http://pypy.org/numpydonate.html +.. _`on bitbucket`: https://www.bitbucket.org/pypy/numpy + +What is PyPy? +============= + +PyPy is a very compliant Python interpreter, almost a drop-in replacement for +CPython 2.7. It's fast (`pypy 2.3 and cpython 2.7.x`_ performance comparison; +note that cpython's speed has not changed since 2.7.2) +due to its integrated tracing JIT compiler. + +This release supports x86 machines running Linux 32/64, Mac OS X 64, Windows, +and OpenBSD, +as well as newer ARM hardware (ARMv6 or ARMv7, with VFPv3) running Linux. + +While we support 32 bit python on Windows, work on the native Windows 64 +bit python is still stalling, we would welcome a volunteer +to `handle that`_. + +.. _`pypy 2.3 and cpython 2.7.x`: http://speed.pypy.org +.. _`handle that`: http://doc.pypy.org/en/latest/windows.html#what-is-missing-for-a-full-64-bit-translation + +Highlights +========== + +Issues with the 2.3 release were resolved after being reported by users to +our new issue tracker at https://bitbucket.org/pypy/pypy/issues or on IRC at +#pypy. Here is a summary of the user-facing changes; +for more information see `whats-new`_: + +* The built-in ``struct`` module was renamed to ``_struct``, solving issues + with IDLE and other modules. + +* Support for compilation with gcc-4.9 + +* A rewrite of packaging.py which produces our downloadable packages to + modernize command line argument handling and to document third-party + contributions in our LICENSE file + +* A CFFI-based version of the gdbm module is now included in our downloads + +* Many issues were resolved_ since the 2.3 release on May 8 + +.. _`whats-new`: http://doc.pypy.org/en/latest/whatsnew-2.3.1.html +.. _resolved: https://bitbucket.org/pypy/pypy/issues?status=resolved +Please try it out and let us know what you think. We especially welcome +success stories, we know you are using PyPy, please tell us about it! + +Cheers + +The PyPy Team + diff --git a/pypy/doc/whatsnew-2.3.1.rst b/pypy/doc/whatsnew-2.3.1.rst --- a/pypy/doc/whatsnew-2.3.1.rst +++ b/pypy/doc/whatsnew-2.3.1.rst @@ -9,5 +9,16 @@ Support compilation with gcc-4.9 -Fixes for issues #1769, #1764, #1762, #1752 +Added support for the stdlib gdbm module via cffi +Annotator cleanups + +.. branch: release-2.3.x + +.. branch: unify-call-ops + +.. branch packaging +Use argparse for packaging.py, and add third-party components to LICENSE file. +Also mention that gdbm is GPL. +Do not crash the packaging process on failure in CFFI or license-building, +rather complete the build step and return -1. diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -3,12 +3,10 @@ ======================= .. this is a revision shortly after release-2.3.x -.. startrev: b2cc67adbaad +.. startrev: ca9b7cf02cf4 -Added support for the stdlib gdbm module via cffi +.. branch: fix-bytearray-complexity +Bytearray operations no longer copy the bytearray unnecessarily -Annotator cleanups - -.. branch: release-2.3.x - -.. branch: unify-call-ops +Added support for ``__getitem__``, ``__setitem__``, ``__getslice__``, +``__setslice__``, and ``__len__`` to RPython diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -10,8 +10,14 @@ 64bit Windows. See at the end of this page for what is missing for a full 64bit translation. -To build pypy-c you need a C compiler. Microsoft Visual Studio is -preferred, but can also use the mingw32 port of gcc. +To build pypy-c you need a working python environment, and a C compiler. +It is possible to translate with a CPython 2.6 or later, but this is not +the preferred way, because it will take a lot longer to run – depending +on your architecture, between two and three times as long. So head to +`our downloads`_ and get the latest stable version. + +Microsoft Visual Studio is preferred as a compiler, but there are reports +of success with the mingw32 port of gcc. Translating PyPy with Visual Studio @@ -34,10 +40,20 @@ **Note:** PyPy is currently not supported for 64 bit Windows, and translation will fail in this case. -The compiler is all you need to build pypy-c, but it will miss some +Python and a C compiler are all you need to build pypy, but it will miss some modules that relies on third-party libraries. See below how to get and build them. +Please see the `non-windows instructions`_ for more information, especially note +that translation is RAM-hungry. A standard translation requires around 4GB, so +special preparations are necessary, or you may want to use the method in the +notes of the `build instructions`_ to reduce memory usage at the price of a +slower translation:: + + set PYPY_GC_MAX_DELTA=200MB + pypy --jit loop_longevity=300 ../../rpython/bin/rpython -Ojit targetpypystandalone + set PYPY_GC_MAX_DELTA= + Preping Windows for the Large Build ----------------------------------- @@ -52,9 +68,10 @@ Then you need to execute:: - editbin /largeaddressaware pypy.exe + editbin /largeaddressaware translator.exe -on the pypy.exe file you compiled. +where ``translator.exe`` is the pypy.exe or cpython.exe you will use to +translate with. Installing external packages ---------------------------- @@ -244,7 +261,9 @@ .. _`msys for mingw`: http://sourceforge.net/projects/mingw-w64/files/External%20binary%20packages%20%28Win64%20hosted%29/MSYS%20%2832-bit%29 .. _`libffi source files`: http://sourceware.org/libffi/ .. _`RPython translation toolchain`: translation.html - +.. _`our downloads`: http://pypy.org/download.html +.. _`non-windows instructions`: getting-started-python.html#translating-the-pypy-python-interpreter +.. _`build instructions`: http://pypy.org/download.html#building-from-source What is missing for a full 64-bit translation --------------------------------------------- diff --git a/pypy/module/__builtin__/app_io.py b/pypy/module/__builtin__/app_io.py --- a/pypy/module/__builtin__/app_io.py +++ b/pypy/module/__builtin__/app_io.py @@ -4,6 +4,7 @@ """ import sys +from _ast import PyCF_ACCEPT_NULL_BYTES def execfile(filename, glob=None, loc=None): """execfile(filename[, globals[, locals]]) @@ -24,7 +25,8 @@ finally: f.close() #Don't exec the source directly, as this loses the filename info - co = compile(source.rstrip()+"\n", filename, 'exec') + co = compile(source.rstrip()+"\n", filename, 'exec', + PyCF_ACCEPT_NULL_BYTES) exec co in glob, loc def _write_prompt(stdout, prompt): diff --git a/pypy/module/__builtin__/test/test_builtin.py b/pypy/module/__builtin__/test/test_builtin.py --- a/pypy/module/__builtin__/test/test_builtin.py +++ b/pypy/module/__builtin__/test/test_builtin.py @@ -1,7 +1,10 @@ import sys +from rpython.tool.udir import udir + class AppTestBuiltinApp: def setup_class(cls): + space = cls.space class X(object): def __eq__(self, other): raise OverflowError @@ -11,18 +14,25 @@ try: d[X()] except OverflowError: - cls.w_sane_lookup = cls.space.wrap(True) + cls.w_sane_lookup = space.wrap(True) except KeyError: - cls.w_sane_lookup = cls.space.wrap(False) + cls.w_sane_lookup = space.wrap(False) # starting with CPython 2.6, when the stack is almost out, we # can get a random error, instead of just a RuntimeError. # For example if an object x has a __getattr__, we can get # AttributeError if attempting to call x.__getattr__ runs out # of stack. That's annoying, so we just work around it. if cls.runappdirect: - cls.w_safe_runtimerror = cls.space.wrap(True) + cls.w_safe_runtimerror = space.wrap(True) else: - cls.w_safe_runtimerror = cls.space.wrap(sys.version_info < (2, 6)) + cls.w_safe_runtimerror = space.wrap(sys.version_info < (2, 6)) + + emptyfile = udir.join('emptyfile.py') + emptyfile.write('') + nullbytes = udir.join('nullbytes.py') + nullbytes.write('#abc\x00def\n') + cls.w_emptyfile = space.wrap(str(emptyfile)) + cls.w_nullbytes = space.wrap(str(nullbytes)) def test_builtin_names(self): import __builtin__ @@ -431,7 +441,7 @@ assert setattr(x, 'x', 11) == None assert delattr(x, 'x') == None # To make this test, we need autopath to work in application space. - #self.assertEquals(execfile('emptyfile.py'), None) + assert execfile(self.emptyfile) == None def test_divmod(self): assert divmod(15,10) ==(1,5) @@ -611,14 +621,21 @@ assert firstlineno == 2 def test_compile_null_bytes(self): - import _ast raises(TypeError, compile, '\x00', 'mymod', 'exec', 0) - raises(SyntaxError, compile, '\x00', 'mymod', 'exec', - _ast.PyCF_ACCEPT_NULL_BYTES) src = "#abc\x00def\n" raises(TypeError, compile, src, 'mymod', 'exec') raises(TypeError, compile, src, 'mymod', 'exec', 0) - compile(src, 'mymod', 'exec', _ast.PyCF_ACCEPT_NULL_BYTES) # works + execfile(self.nullbytes) # works + + def test_compile_null_bytes_flag(self): + try: + from _ast import PyCF_ACCEPT_NULL_BYTES + except ImportError: + skip('PyPy only (requires _ast.PyCF_ACCEPT_NULL_BYTES)') + raises(SyntaxError, compile, '\x00', 'mymod', 'exec', + PyCF_ACCEPT_NULL_BYTES) + src = "#abc\x00def\n" + compile(src, 'mymod', 'exec', PyCF_ACCEPT_NULL_BYTES) # works def test_print_function(self): import __builtin__ @@ -720,7 +737,6 @@ class TestInternal: def test_execfile(self, space): - from rpython.tool.udir import udir fn = str(udir.join('test_execfile')) f = open(fn, 'w') print >>f, "i=42" diff --git a/pypy/module/micronumpy/tool/numready/page.html b/pypy/module/micronumpy/tool/numready/page.html --- a/pypy/module/micronumpy/tool/numready/page.html +++ b/pypy/module/micronumpy/tool/numready/page.html @@ -40,6 +40,7 @@

numpy compatability test results, generated automatically by running
pypy/module/micronumpy/tool/numready/main.py <path-to-latest-pypy>

Overall: {{ msg }}

+

Warning: a positive result does not mean the function is actually working! It only means that the function/module/constant is present. It may be missing other things.

diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py --- a/pypy/module/pypyjit/test_pypy_c/test_string.py +++ b/pypy/module/pypyjit/test_pypy_c/test_string.py @@ -101,39 +101,64 @@ log = self.run(main, [1000]) assert log.result == main(1000) loop, = log.loops_by_filename(self.filepath) + # NB: since the stringbuilder2-perf branch we get more operations than + # before, but a lot less branches that might fail randomly. assert loop.match(""" - i7 = int_gt(i4, 0) - guard_true(i7, descr=...) + i100 = int_gt(i95, 0) + guard_true(i100, descr=...) guard_not_invalidated(descr=...) - p9 = call(ConstClass(ll_int2dec__Signed), i4, descr=) + p101 = call(ConstClass(ll_int2dec__Signed), i95, descr=) guard_no_exception(descr=...) - i10 = strlen(p9) - i11 = int_is_true(i10) - guard_true(i11, descr=...) - i13 = strgetitem(p9, 0) - i15 = int_eq(i13, 45) - guard_false(i15, descr=...) - i17 = int_neg(i10) - i19 = int_gt(i10, 23) - guard_false(i19, descr=...) - p21 = newstr(23) - copystrcontent(p9, p21, 0, 0, i10) - i25 = int_add(1, i10) - i26 = int_gt(i25, 23) - guard_false(i26, descr=...) - strsetitem(p21, i10, 32) - i30 = int_add(i10, i25) - i31 = int_gt(i30, 23) - guard_false(i31, descr=...) - copystrcontent(p9, p21, 0, i25, i10) - i33 = int_lt(i30, 23) - guard_true(i33, descr=...) - p35 = call(ConstClass(ll_shrink_array__rpy_stringPtr_Signed), p21, i30, descr=) + i102 = strlen(p101) + i103 = int_is_true(i102) + guard_true(i103, descr=...) + i104 = strgetitem(p101, 0) + i105 = int_eq(i104, 45) + guard_false(i105, descr=...) + i106 = int_neg(i102) + i107 = int_gt(i102, 23) + p108 = new(descr=) + p110 = newstr(23) + setfield_gc(..., descr=) + setfield_gc(..., descr=) + setfield_gc(..., descr=) + cond_call(i107, ConstClass(stringbuilder_append_overflow__stringbuilderPtr_rpy_stringPtr_Signed), p108, p101, i102, descr=) guard_no_exception(descr=...) - i37 = strlen(p35) - i38 = int_add_ovf(i5, i37) + i111 = getfield_gc(p108, descr=) + i112 = int_sub(i102, i111) + i113 = getfield_gc(p108, descr=) + p114 = getfield_gc(p108, descr=) + copystrcontent(p101, p114, i111, i113, i112) + i115 = int_add(i113, i112) + i116 = getfield_gc(p108, descr=) + setfield_gc(p108, i115, descr=) + i117 = int_eq(i115, i116) + cond_call(i117, ConstClass(stringbuilder_grow__stringbuilderPtr_Signed), p108, 1, descr=) + guard_no_exception(descr=...) + i118 = getfield_gc(p108, descr=) + i119 = int_add(i118, 1) + p120 = getfield_gc(p108, descr=) + strsetitem(p120, i118, 32) + i121 = getfield_gc(p108, descr=) + i122 = int_sub(i121, i119) + setfield_gc(..., descr=) + setfield_gc(..., descr=) + i123 = int_gt(i102, i122) + cond_call(i123, ConstClass(stringbuilder_append_overflow__stringbuilderPtr_rpy_stringPtr_Signed), p108, p101, i102, descr=) + guard_no_exception(descr=...) + i124 = getfield_gc(p108, descr=) + i125 = int_sub(i102, i124) + i126 = getfield_gc(p108, descr=) + p127 = getfield_gc(p108, descr=) + copystrcontent(p101, p127, i124, i126, i125) + i128 = int_add(i126, i125) + setfield_gc(p108, i128, descr=) + p135 = call(..., descr= self._op_val(space, w_other) - except OperationError as e: - if e.match(space, space.w_TypeError): - return space.w_NotImplemented - raise - return space.newbool(res) + success, cmp, other_len = self._comparison_helper(space, w_other) + if not success: + return space.w_NotImplemented + return space.newbool(cmp > 0 or (cmp == 0 and self._len() > other_len)) def descr_ge(self, space, w_other): - try: - res = self._val(space) >= self._op_val(space, w_other) - except OperationError as e: - if e.match(space, space.w_TypeError): - return space.w_NotImplemented - raise - return space.newbool(res) + success, cmp, other_len = self._comparison_helper(space, w_other) + if not success: + return space.w_NotImplemented + return space.newbool(cmp > 0 or (cmp == 0 and self._len() >= other_len)) def descr_iter(self, space): return space.newseqiter(self) @@ -319,10 +366,19 @@ def descr_inplace_add(self, space, w_other): if isinstance(w_other, W_BytearrayObject): self.data += w_other.data + return self + + if isinstance(w_other, W_BytesObject): + self._inplace_add(self._op_val(space, w_other)) else: - self.data += self._op_val(space, w_other) + self._inplace_add(_get_buffer(space, w_other)) return self + @specialize.argtype(1) + def _inplace_add(self, other): + for i in range(len(other)): + self.data.append(other[i]) + def descr_inplace_mul(self, space, w_times): try: times = space.getindex_w(w_times, space.w_OverflowError) @@ -403,12 +459,33 @@ if space.isinstance_w(w_sub, space.w_int): char = space.int_w(w_sub) return _descr_contains_bytearray(self.data, space, char) + return self._StringMethods_descr_contains(space, w_sub) + def descr_add(self, space, w_other): + if isinstance(w_other, W_BytearrayObject): + return self._new(self.data + w_other.data) + + if isinstance(w_other, W_BytesObject): + return self._add(self._op_val(space, w_other)) + + try: + buffer = _get_buffer(space, w_other) + except OperationError as e: + if e.match(space, space.w_TypeError): + return space.w_NotImplemented + raise + return self._add(buffer) + + @specialize.argtype(1) + def _add(self, other): + return self._new(self.data + [other[i] for i in range(len(other))]) + def descr_reverse(self, space): self.data.reverse() + # ____________________________________________________________ # helpers for slow paths, moved out because they contain loops @@ -1152,3 +1229,13 @@ def setitem(self, index, char): self.data[index] = char + + + at specialize.argtype(1) +def _memcmp(selfvalue, buffer, length): + for i in range(length): + if selfvalue[i] < buffer[i]: + return -1 + if selfvalue[i] > buffer[i]: + return 1 + return 0 diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -430,6 +430,7 @@ _immutable_fields_ = ['_value'] def __init__(self, str): + assert str is not None self._value = str def __repr__(self): @@ -480,6 +481,12 @@ _val = str_w @staticmethod + def _use_rstr_ops(space, w_other): + from pypy.objspace.std.unicodeobject import W_UnicodeObject + return (isinstance(w_other, W_BytesObject) or + isinstance(w_other, W_UnicodeObject)) + + @staticmethod def _op_val(space, w_other): try: return space.str_w(w_other) diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -1,9 +1,11 @@ """Functionality shared between bytes/bytearray/unicode""" from rpython.rlib import jit -from rpython.rlib.objectmodel import specialize +from rpython.rlib.objectmodel import specialize, newlist_hint from rpython.rlib.rarithmetic import ovfcheck -from rpython.rlib.rstring import endswith, replace, rsplit, split, startswith +from rpython.rlib.rstring import ( + find, rfind, count, endswith, replace, rsplit, split, startswith) +from rpython.rlib.buffer import Buffer from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import WrappedDefault, unwrap_spec @@ -28,6 +30,9 @@ space, lenself, w_start, w_end, upper_bound=upper_bound) return (value, start, end) + def _multi_chr(self, c): + return c + def descr_len(self, space): return space.wrap(self._len()) @@ -36,17 +41,33 @@ def descr_contains(self, space, w_sub): value = self._val(space) - other = self._op_val(space, w_sub) - return space.newbool(value.find(other) >= 0) + if self._use_rstr_ops(space, w_sub): + other = self._op_val(space, w_sub) + return space.newbool(value.find(other) >= 0) + + from pypy.objspace.std.bytesobject import W_BytesObject + if isinstance(w_sub, W_BytesObject): + other = self._op_val(space, w_sub) + res = find(value, other, 0, len(value)) + else: + buffer = _get_buffer(space, w_sub) + res = find(value, buffer, 0, len(value)) + + return space.newbool(res >= 0) def descr_add(self, space, w_other): - try: - other = self._op_val(space, w_other) - except OperationError as e: - if e.match(space, space.w_TypeError): - return space.w_NotImplemented - raise - return self._new(self._val(space) + other) + if self._use_rstr_ops(space, w_other): + try: + other = self._op_val(space, w_other) + except OperationError as e: + if e.match(space, space.w_TypeError): + return space.w_NotImplemented + raise + return self._new(self._val(space) + other) + + # Bytearray overrides this method, CPython doesn't support contacting + # buffers and strs, and unicodes are always handled above + return space.w_NotImplemented def descr_mul(self, space, w_times): try: @@ -58,7 +79,7 @@ if times <= 0: return self._empty() if self._len() == 1: - return self._new(self._val(space)[0] * times) + return self._new(self._multi_chr(self._val(space)[0]) * times) return self._new(self._val(space) * times) descr_rmul = descr_mul @@ -119,7 +140,7 @@ d = width - len(value) if d > 0: offset = d//2 + (d & width & 1) - fillchar = fillchar[0] # annotator hint: it's a single character + fillchar = self._multi_chr(fillchar[0]) centered = offset * fillchar + value + (d - offset) * fillchar else: centered = value @@ -128,15 +149,32 @@ def descr_count(self, space, w_sub, w_start=None, w_end=None): value, start, end = self._convert_idx_params(space, w_start, w_end) - return space.newint(value.count(self._op_val(space, w_sub), start, - end)) + + if self._use_rstr_ops(space, w_sub): + return space.newint(value.count(self._op_val(space, w_sub), start, + end)) + + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + from pypy.objspace.std.bytesobject import W_BytesObject + if isinstance(w_sub, W_BytearrayObject): + res = count(value, w_sub.data, start, end) + elif isinstance(w_sub, W_BytesObject): + res = count(value, w_sub._value, start, end) + else: + buffer = _get_buffer(space, w_sub) + res = count(value, buffer, start, end) + + return space.wrap(max(res, 0)) def descr_decode(self, space, w_encoding=None, w_errors=None): from pypy.objspace.std.unicodeobject import ( _get_encoding_and_errors, decode_object, unicode_from_string) encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors) - if encoding is None and errors is None: + + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + if (encoding is None and errors is None and + not isinstance(self, W_BytearrayObject)): return unicode_from_string(space, self) return decode_object(space, self, encoding, errors) @@ -153,7 +191,11 @@ if not value: return self._empty() - splitted = value.split(self._chr('\t')) + if self._use_rstr_ops(space, self): + splitted = value.split(self._chr('\t')) + else: + splitted = split(value, self._chr('\t')) + try: ovfcheck(len(splitted) * tabsize) except OverflowError: @@ -161,7 +203,7 @@ expanded = oldtoken = splitted.pop(0) for token in splitted: - expanded += self._chr(' ') * self._tabindent(oldtoken, + expanded += self._multi_chr(self._chr(' ')) * self._tabindent(oldtoken, tabsize) + token oldtoken = token @@ -192,30 +234,80 @@ def descr_find(self, space, w_sub, w_start=None, w_end=None): (value, start, end) = self._convert_idx_params(space, w_start, w_end) - res = value.find(self._op_val(space, w_sub), start, end) + + if self._use_rstr_ops(space, w_sub): + res = value.find(self._op_val(space, w_sub), start, end) + return space.wrap(res) + + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + from pypy.objspace.std.bytesobject import W_BytesObject + if isinstance(w_sub, W_BytearrayObject): + res = find(value, w_sub.data, start, end) + elif isinstance(w_sub, W_BytesObject): + res = find(value, w_sub._value, start, end) + else: + buffer = _get_buffer(space, w_sub) + res = find(value, buffer, start, end) + return space.wrap(res) def descr_rfind(self, space, w_sub, w_start=None, w_end=None): (value, start, end) = self._convert_idx_params(space, w_start, w_end) - res = value.rfind(self._op_val(space, w_sub), start, end) + + if self._use_rstr_ops(space, w_sub): + res = value.rfind(self._op_val(space, w_sub), start, end) + return space.wrap(res) + + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + from pypy.objspace.std.bytesobject import W_BytesObject + if isinstance(w_sub, W_BytearrayObject): + res = rfind(value, w_sub.data, start, end) + elif isinstance(w_sub, W_BytesObject): + res = rfind(value, w_sub._value, start, end) + else: + buffer = _get_buffer(space, w_sub) + res = rfind(value, buffer, start, end) + return space.wrap(res) def descr_index(self, space, w_sub, w_start=None, w_end=None): (value, start, end) = self._convert_idx_params(space, w_start, w_end) - res = value.find(self._op_val(space, w_sub), start, end) + + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + from pypy.objspace.std.bytesobject import W_BytesObject + if self._use_rstr_ops(space, w_sub): + res = value.find(self._op_val(space, w_sub), start, end) + elif isinstance(w_sub, W_BytearrayObject): + res = find(value, w_sub.data, start, end) + elif isinstance(w_sub, W_BytesObject): + res = find(value, w_sub._value, start, end) + else: + buffer = _get_buffer(space, w_sub) + res = find(value, buffer, start, end) + if res < 0: raise oefmt(space.w_ValueError, "substring not found in string.index") - return space.wrap(res) def descr_rindex(self, space, w_sub, w_start=None, w_end=None): (value, start, end) = self._convert_idx_params(space, w_start, w_end) - res = value.rfind(self._op_val(space, w_sub), start, end) + + from pypy.objspace.std.bytearrayobject import W_BytearrayObject + from pypy.objspace.std.bytesobject import W_BytesObject + if self._use_rstr_ops(space, w_sub): + res = value.rfind(self._op_val(space, w_sub), start, end) + elif isinstance(w_sub, W_BytearrayObject): + res = rfind(value, w_sub.data, start, end) + elif isinstance(w_sub, W_BytesObject): + res = rfind(value, w_sub._value, start, end) + else: + buffer = _get_buffer(space, w_sub) + res = rfind(value, buffer, start, end) + if res < 0: raise oefmt(space.w_ValueError, "substring not found in string.rindex") - return space.wrap(res) @specialize.arg(2) @@ -328,6 +420,7 @@ value = self._val(space) prealloc_size = len(value) * (size - 1) + unwrapped = newlist_hint(size) for i in range(size): w_s = list_w[i] check_item = self._join_check_item(space, w_s) @@ -337,13 +430,16 @@ i, w_s) elif check_item == 2: return self._join_autoconvert(space, list_w) - prealloc_size += len(self._op_val(space, w_s)) + # XXX Maybe the extra copy here is okay? It was basically going to + # happen anyway, what with being placed into the builder + unwrapped.append(self._op_val(space, w_s)) + prealloc_size += len(unwrapped[i]) sb = self._builder(prealloc_size) for i in range(size): if value and i != 0: sb.append(value) - sb.append(self._op_val(space, list_w[i])) + sb.append(unwrapped[i]) return self._new(sb.build()) def _join_autoconvert(self, space, list_w): @@ -358,7 +454,7 @@ "ljust() argument 2 must be a single character") d = width - len(value) if d > 0: - fillchar = fillchar[0] # annotator hint: it's a single character + fillchar = self._multi_chr(fillchar[0]) value += d * fillchar return self._new(value) @@ -372,7 +468,7 @@ "rjust() argument 2 must be a single character") d = width - len(value) if d > 0: - fillchar = fillchar[0] # annotator hint: it's a single character + fillchar = self._multi_chr(fillchar[0]) value = d * fillchar + value return self._new(value) @@ -385,52 +481,76 @@ return self._new(builder.build()) def descr_partition(self, space, w_sub): + from pypy.objspace.std.bytearrayobject import W_BytearrayObject value = self._val(space) - sub = self._op_val(space, w_sub) - if not sub: - raise oefmt(space.w_ValueError, "empty separator") - pos = value.find(sub) + + if self._use_rstr_ops(space, w_sub): + sub = self._op_val(space, w_sub) + sublen = len(sub) + if sublen == 0: + raise oefmt(space.w_ValueError, "empty separator") + + pos = value.find(sub) + else: + sub = _get_buffer(space, w_sub) + sublen = sub.getlength() + if sublen == 0: + raise oefmt(space.w_ValueError, "empty separator") + + pos = find(value, sub, 0, len(value)) + if pos != -1 and isinstance(self, W_BytearrayObject): + w_sub = self._new_from_buffer(sub) + if pos == -1: - from pypy.objspace.std.bytearrayobject import W_BytearrayObject if isinstance(self, W_BytearrayObject): self = self._new(value) return space.newtuple([self, self._empty(), self._empty()]) else: - from pypy.objspace.std.bytearrayobject import W_BytearrayObject - if isinstance(self, W_BytearrayObject): - w_sub = self._new(sub) return space.newtuple( [self._sliced(space, value, 0, pos, self), w_sub, - self._sliced(space, value, pos+len(sub), len(value), self)]) + self._sliced(space, value, pos + sublen, len(value), self)]) def descr_rpartition(self, space, w_sub): + from pypy.objspace.std.bytearrayobject import W_BytearrayObject value = self._val(space) - sub = self._op_val(space, w_sub) - if not sub: - raise oefmt(space.w_ValueError, "empty separator") - pos = value.rfind(sub) + + if self._use_rstr_ops(space, w_sub): + sub = self._op_val(space, w_sub) + sublen = len(sub) + if sublen == 0: + raise oefmt(space.w_ValueError, "empty separator") + + pos = value.rfind(sub) + else: + sub = _get_buffer(space, w_sub) + sublen = sub.getlength() + if sublen == 0: + raise oefmt(space.w_ValueError, "empty separator") + + pos = rfind(value, sub, 0, len(value)) + if pos != -1 and isinstance(self, W_BytearrayObject): + w_sub = self._new_from_buffer(sub) + if pos == -1: - from pypy.objspace.std.bytearrayobject import W_BytearrayObject if isinstance(self, W_BytearrayObject): self = self._new(value) return space.newtuple([self._empty(), self._empty(), self]) else: - from pypy.objspace.std.bytearrayobject import W_BytearrayObject - if isinstance(self, W_BytearrayObject): - w_sub = self._new(sub) return space.newtuple( [self._sliced(space, value, 0, pos, self), w_sub, - self._sliced(space, value, pos+len(sub), len(value), self)]) + self._sliced(space, value, pos + sublen, len(value), self)]) @unwrap_spec(count=int) def descr_replace(self, space, w_old, w_new, count=-1): input = self._val(space) + sub = self._op_val(space, w_old) by = self._op_val(space, w_new) try: res = replace(input, sub, by, count) except OverflowError: raise oefmt(space.w_OverflowError, "replace string is too long") + return self._new(res) @unwrap_spec(maxsplit=int) @@ -442,10 +562,10 @@ return self._newlist_unwrapped(space, res) by = self._op_val(space, w_sep) - bylen = len(by) - if bylen == 0: + if len(by) == 0: raise oefmt(space.w_ValueError, "empty separator") res = split(value, by, maxsplit) + return self._newlist_unwrapped(space, res) @unwrap_spec(maxsplit=int) @@ -457,10 +577,10 @@ return self._newlist_unwrapped(space, res) by = self._op_val(space, w_sep) - bylen = len(by) - if bylen == 0: + if len(by) == 0: raise oefmt(space.w_ValueError, "empty separator") res = rsplit(value, by, maxsplit) + return self._newlist_unwrapped(space, res) @unwrap_spec(keepends=bool) @@ -616,10 +736,11 @@ for char in string: buf.append(table[ord(char)]) else: + # XXX Why not preallocate here too? buf = self._builder() deletion_table = [False] * 256 - for c in deletechars: - deletion_table[ord(c)] = True + for i in range(len(deletechars)): + deletion_table[ord(deletechars[i])] = True for char in string: if not deletion_table[ord(char)]: buf.append(table[ord(char)]) @@ -636,7 +757,7 @@ def descr_zfill(self, space, width): selfval = self._val(space) if len(selfval) == 0: - return self._new(self._chr('0') * width) + return self._new(self._multi_chr(self._chr('0')) * width) num_zeros = width - len(selfval) if num_zeros <= 0: # cannot return self, in case it is a subclass of str @@ -662,3 +783,8 @@ @specialize.argtype(0) def _descr_getslice_slowpath(selfvalue, start, step, sl): return [selfvalue[start + i*step] for i in range(sl)] + +def _get_buffer(space, w_obj): + return space.buffer_w(w_obj, space.BUF_SIMPLE) + + diff --git a/pypy/objspace/std/test/test_bytearrayobject.py b/pypy/objspace/std/test/test_bytearrayobject.py --- a/pypy/objspace/std/test/test_bytearrayobject.py +++ b/pypy/objspace/std/test/test_bytearrayobject.py @@ -178,8 +178,10 @@ assert bytearray('hello').rindex('l') == 3 assert bytearray('hello').index(bytearray('e')) == 1 assert bytearray('hello').find('l') == 2 + assert bytearray('hello').find('l', -2) == 3 assert bytearray('hello').rfind('l') == 3 + # these checks used to not raise in pypy but they should raises(TypeError, bytearray('hello').index, ord('e')) raises(TypeError, bytearray('hello').rindex, ord('e')) @@ -440,6 +442,7 @@ u = b.decode('utf-8') assert isinstance(u, unicode) assert u == u'abcdefghi' + assert b.decode().encode() == b def test_int(self): assert int(bytearray('-1234')) == -1234 diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -103,6 +103,12 @@ _val = unicode_w @staticmethod + def _use_rstr_ops(space, w_other): + # Always return true because we always need to copy the other + # operand(s) before we can do comparisons + return True + + @staticmethod def _op_val(space, w_other): if isinstance(w_other, W_UnicodeObject): return w_other._value diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py --- a/pypy/tool/release/package.py +++ b/pypy/tool/release/package.py @@ -1,23 +1,25 @@ #!/usr/bin/env python -""" A sample script that packages PyPy, provided that it's already built. +""" packages PyPy, provided that it's already built. It uses 'pypy/goal/pypy-c' and parts of the rest of the working copy. Usage: - package.py [--nostrip] [--without-tk] root-pypy-dir [name-of-archive] [name-of-pypy-c] [destination-for-tarball] [pypy-c-path] + package.py [--options] -Usually you would do: package.py ../../.. pypy-VER-PLATFORM -The output is found in the directory /tmp/usession-YOURNAME/build/. +Usually you would do: package.py --version-name pypy-VER-PLATFORM +The output is found in the directory from --builddir, +by default /tmp/usession-YOURNAME/build/. """ import shutil import sys import os #Add toplevel repository dir to sys.path -sys.path.insert(0,os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) +basedir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) +sys.path.insert(0,basedir) import py import fnmatch -from rpython.tool.udir import udir import subprocess +import glob if sys.version_info < (2,6): py.test.skip("requires 2.6 so far") @@ -40,17 +42,119 @@ class PyPyCNotFound(Exception): pass -def fix_permissions(basedir): +class MissingDependenciesError(Exception): + pass + +def fix_permissions(dirname): if sys.platform != 'win32': - os.system("chmod -R a+rX %s" % basedir) - os.system("chmod -R g-w %s" % basedir) + os.system("chmod -R a+rX %s" % dirname) + os.system("chmod -R g-w %s" % dirname) -def package(basedir, name='pypy-nightly', rename_pypy_c='pypy', - copy_to_dir=None, override_pypy_c=None, nostrip=False, - withouttk=False): - assert '/' not in rename_pypy_c +sep_template = "\nThis copy of PyPy includes a copy of %s, which is licensed under the following terms:\n\n" + +def generate_license_linux(basedir, options): + base_file = str(basedir.join('LICENSE')) + with open(base_file) as fid: + txt = fid.read() + searches = [("bzip2","libbz2-*", "copyright", '---------'), + ("openssl", "openssl*", "copyright", 'LICENSE ISSUES'), + ] + if not options.no_tk: + name = 'Tcl/Tk' + txt += "License for '%s'" %name + txt += '\n' + "="*(14 + len(name)) + '\n' + txt += sep_template % name + base_file = str(basedir.join('lib_pypy/_tkinter/license.terms')) + with open(base_file, 'r') as fid: + txt += fid.read() + for name, pat, fname, first_line in searches: + txt += "License for '" + name + "'" + txt += '\n' + "="*(14 + len(name)) + '\n' + txt += sep_template % name + dirs = glob.glob(options.license_base + "/" +pat) + if not dirs: + raise ValueError, "Could not find "+ options.license_base + "/" + pat + if len(dirs) > 2: + raise ValueError, "Multiple copies of "+pat + dir = dirs[0] + with open(os.path.join(dir, fname)) as fid: + # Read up to the line dividing the packaging header from the actual copyright + for line in fid: + if first_line in line: + break + txt += line + for line in fid: + txt += line + if len(line.strip())<1: + txt += '\n' + txt += third_party_header + # Do something for gdbm, which is GPL + txt += gdbm_bit + return txt + +def generate_license_windows(basedir, options): + base_file = str(basedir.join('LICENSE')) + with open(base_file) as fid: + txt = fid.read() + # shutil.copyfileobj(open("crtlicense.txt"), out) # We do not ship msvc runtime files + if not options.no_tk: + name = 'Tcl/Tk' + txt += "License for '%s'" %name + txt += '\n' + "="*(14 + len(name)) + '\n' + txt += sep_template % name + base_file = str(basedir.join('lib_pypy/_tkinter/license.terms')) + with open(base_file, 'r') as fid: + txt += fid.read() + for name, pat, file in (("bzip2","bzip2-*", "LICENSE"), + ("openssl", "openssl-*", "LICENSE")): + txt += sep_template % name + dirs = glob.glob(options.license_base + "/" +pat) + if not dirs: + raise ValueError, "Could not find "+ options.license_base + "/" + pat + if len(dirs) > 2: + raise ValueError, "Multiple copies of "+pat + dir = dirs[0] + with open(os.path.join(dir, file)) as fid: + txt += fid.read() + return txt + +def generate_license_darwin(basedir, options): + # where are copyright files on macos? + return generate_license_linux(basedir, options) + +if sys.platform == 'win32': + generate_license = generate_license_windows +elif sys.platform == 'darwin': + generate_license = generate_license_darwin +else: + generate_license = generate_license_linux + +def create_cffi_import_libraries(pypy_c, options): + modules = ['_sqlite3'] + subprocess.check_call([str(pypy_c), '-c', 'import _sqlite3']) + if not sys.platform == 'win32': + modules += ['_curses', 'syslog', 'gdbm', '_sqlite3'] + if not options.no_tk: + modules.append(('_tkinter')) + for module in modules: + try: + subprocess.check_call([str(pypy_c), '-c', 'import ' + module]) + except subprocess.CalledProcessError: + print >>sys.stderr, """Building {0} bindings failed. +You can either install development headers package or +add --without-{0} option to skip packaging binary CFFI extension.""".format(module) + raise MissingDependenciesError(module) + +def create_package(basedir, options): + retval = 0 + name = options.name + if not name: + name = 'pypy-nightly' + rename_pypy_c = options.pypy_c + override_pypy_c = options.override_pypy_c + basedir = py.path.local(basedir) - if override_pypy_c is None: + if not override_pypy_c: basename = 'pypy-c' if sys.platform == 'win32': basename += '.exe' @@ -68,28 +172,18 @@ raise PyPyCNotFound( 'Bogus path: %r does not exist (see docstring for more info)' % (os.path.dirname(str(pypy_c)),)) - win_extras = ['libpypy-c.dll', 'libexpat.dll', 'sqlite3.dll', - 'libeay32.dll', 'ssleay32.dll'] - subprocess.check_call([str(pypy_c), '-c', 'import _sqlite3']) - if not sys.platform == 'win32': - subprocess.check_call([str(pypy_c), '-c', 'import _curses']) - subprocess.check_call([str(pypy_c), '-c', 'import syslog']) - subprocess.check_call([str(pypy_c), '-c', 'import gdbm']) - if not withouttk: + if not options.no_cffi: try: - subprocess.check_call([str(pypy_c), '-c', 'import _tkinter']) - except subprocess.CalledProcessError: - print >>sys.stderr, """Building Tk bindings failed. -You can either install Tk development headers package or -add --without-tk option to skip packaging binary CFFI extension.""" - sys.exit(1) - #Can the dependencies be found from cffi somehow? - win_extras += ['tcl85.dll', 'tk85.dll'] + create_cffi_import_libraries(pypy_c, options) + except MissingDependenciesError: + # This is a non-fatal error + retval = -1 + if sys.platform == 'win32' and not rename_pypy_c.lower().endswith('.exe'): rename_pypy_c += '.exe' binaries = [(pypy_c, rename_pypy_c)] # - builddir = udir.ensure("build", dir=True) + builddir = options.builddir pypydir = builddir.ensure(name, dir=True) includedir = basedir.join('include') # Recursively copy all headers, shutil has only ignore @@ -102,10 +196,11 @@ pypydir.ensure('include', dir=True) if sys.platform == 'win32': - #Don't include a mscvrXX.dll, users should get their own. - #Instructions are provided on the website. - # Can't rename a DLL: it is always called 'libpypy-c.dll' + win_extras = ['libpypy-c.dll', 'libexpat.dll', 'sqlite3.dll', + 'libeay32.dll', 'ssleay32.dll'] + if not options.no_tk: + win_extras += ['tcl85.dll', 'tk85.dll'] for extra in win_extras: p = pypy_c.dirpath().join(extra) @@ -116,7 +211,7 @@ continue print "Picking %s" % p binaries.append((p, p.basename)) - importlib_name = 'python27.lib' + importlib_name = 'python27.lib' if pypy_c.dirpath().join(importlib_name).check(): shutil.copyfile(str(pypy_c.dirpath().join(importlib_name)), str(pypydir.join('include/python27.lib'))) @@ -127,7 +222,7 @@ # XXX users will complain that they cannot compile cpyext # modules for windows, has the lib moved or are there no # exported functions in the dll so no import library is created? - if not withouttk: + if not options.no_tk: try: p = pypy_c.dirpath().join('tcl85.dll') if not p.check(): @@ -139,7 +234,7 @@ tk85.dll and tcl85.dll found, expecting to find runtime in ..\\lib directory next to the dlls, as per build instructions.""" import traceback;traceback.print_exc() - sys.exit(1) + raise MissingDependenciesError('Tk runtime') # Careful: to copy lib_pypy, copying just the hg-tracked files # would not be enough: there are also ctypes_config_cache/_*_cache.py. @@ -150,11 +245,24 @@ str(pypydir.join('lib_pypy')), ignore=ignore_patterns('.svn', 'py', '*.pyc', '*~', '*.c', '*.o')) - for file in ['LICENSE', 'README.rst']: + for file in ['README.rst',]: shutil.copy(str(basedir.join(file)), str(pypydir)) for file in ['_testcapimodule.c', '_ctypes_test.c']: - shutil.copyfile(str(basedir.join('lib_pypy', file)), + shutil.copyfile(str(basedir.join('lib_pypy', file)), str(pypydir.join('lib_pypy', file))) + try: + license = generate_license(basedir, options) + with open(str(pypydir.join('LICENSE')), 'w') as LICENSE: + LICENSE.write(license) + except: + # Non-fatal error, use original LICENCE file + import traceback;traceback.print_exc() + base_file = str(basedir.join('LICENSE')) + with open(base_file) as fid: + license = fid.read() + with open(str(pypydir.join('LICENSE')), 'w') as LICENSE: + LICENSE.write(license) + retval = -1 # spdir = pypydir.ensure('site-packages', dir=True) shutil.copy(str(basedir.join('site-packages', 'README')), str(spdir)) @@ -167,17 +275,17 @@ for source, target in binaries: archive = bindir.join(target) shutil.copy(str(source), str(archive)) + fix_permissions(builddir) + old_dir = os.getcwd() - fix_permissions(builddir) try: os.chdir(str(builddir)) - # - # 'strip' fun: see issue #587 - if not nostrip: + if not options.nostrip: for source, target in binaries: if sys.platform == 'win32': pass elif sys.platform == 'darwin': + # 'strip' fun: see issue #587 for why -x os.system("strip -x " + str(bindir.join(target))) # ignore errors else: os.system("strip " + str(bindir.join(target))) # ignore errors @@ -208,41 +316,91 @@ raise OSError('"tar" returned exit status %r' % e) finally: os.chdir(old_dir) - if copy_to_dir is not None: - print "Copying %s to %s" % (archive, copy_to_dir) - shutil.copy(archive, str(copy_to_dir)) + if options.targetdir: + print "Copying %s to %s" % (archive, options.targetdir) + shutil.copy(archive, options.targetdir) else: print "Ready in %s" % (builddir,) - return builddir # for tests + return retval, builddir # for tests +def package(*args): + try: + import argparse + except ImportError: + import imp + argparse = imp.load_source('argparse', 'lib-python/2.7/argparse.py') + if sys.platform == 'win32': + pypy_exe = 'pypy.exe' + license_base = os.path.join(basedir, r'..\..\..\local') # as on buildbot YMMV + else: + pypy_exe = 'pypy' + license_base = '/usr/share/doc' + parser = argparse.ArgumentParser() + args = list(args) + args[0] = str(args[0]) + parser.add_argument('--without-tk', dest='no_tk', action='store_true', + help='build and package the cffi tkinter module') + parser.add_argument('--without-cffi', dest='no_cffi', action='store_true', + help='do not pre-import any cffi modules') + parser.add_argument('--nostrip', dest='nostrip', action='store_true', + help='do not strip the exe, making it ~10MB larger') + parser.add_argument('--rename_pypy_c', dest='pypy_c', type=str, default=pypy_exe, + help='target executable name, defaults to "pypy"') + parser.add_argument('--archive-name', dest='name', type=str, default='', + help='pypy-VER-PLATFORM') + parser.add_argument('--license_base', type=str, default=license_base, + help='where to start looking for third party upstream licensing info') + parser.add_argument('--builddir', type=str, default='', + help='tmp dir for packaging') + parser.add_argument('--targetdir', type=str, default='', + help='destination dir for archive') + parser.add_argument('--override_pypy_c', type=str, default='', + help='use as pypy exe instead of pypy/goal/pypy-c') + # Positional arguments, for backward compatability with buldbots + parser.add_argument('extra_args', help='optional interface to positional arguments', nargs=argparse.REMAINDER, + metavar='[root-pypy-dir] [name-of-archive] [name-of-pypy-c] [destination-for-tarball] [pypy-c-path]', + ) + options = parser.parse_args(args) -def print_usage(): - print >>sys.stderr, __doc__ - sys.exit(1) + # Handle positional arguments, choke if both methods are used + for i,target, default in ([1, 'name', ''], [2, 'pypy_c', pypy_exe], + [3, 'targetdir', ''], [4,'override_pypy_c', '']): + if len(options.extra_args)>i: + if getattr(options, target) != default: + print 'positional argument',i,target,'already has value',getattr(options, target) + parser.print_help() + return + setattr(options, target, options.extra_args[i]) + if os.environ.has_key("PYPY_PACKAGE_NOSTRIP"): + options.nostrip = True + + if os.environ.has_key("PYPY_PACKAGE_WITHOUTTK"): + options.tk = True + if not options.builddir: + # The import actually creates the udir directory + from rpython.tool.udir import udir + options.builddir = udir.ensure("build", dir=True) + assert '/' not in options.pypy_c + return create_package(basedir, options) + + +third_party_header = '''\n\nLicenses and Acknowledgements for Incorporated Software +======================================================= + +This section is an incomplete, but growing list of licenses and acknowledgements +for third-party software incorporated in the PyPy distribution. + +''' + +gdbm_bit = '''gdbm +---- + +The gdbm module includes code from gdbm.h, which is distributed under the terms +of the GPL license version 2 or any later version. +''' if __name__ == '__main__': - if len(sys.argv) == 1: - print_usage() - - args = sys.argv[1:] - kw = {} - - for i, arg in enumerate(args): - if arg == '--nostrip': - kw['nostrip'] = True - elif arg == '--without-tk': - kw['withouttk'] = True - elif not arg.startswith('--'): - break - else: - print_usage() - - if os.environ.has_key("PYPY_PACKAGE_NOSTRIP"): - kw['nostrip'] = True - - if os.environ.has_key("PYPY_PACKAGE_WITHOUTTK"): - kw['withouttk'] = True - - args = args[i:] - package(*args, **kw) + import sys + retval, _ = package(*sys.argv[1:]) + sys.exit(retval) diff --git a/pypy/tool/release/test/test_package.py b/pypy/tool/release/test/test_package.py --- a/pypy/tool/release/test/test_package.py +++ b/pypy/tool/release/test/test_package.py @@ -1,7 +1,7 @@ import py from pypy.conftest import pypydir -from pypy.tool.release import package +from pypy.tool.release import package, package from pypy.module.sys.version import CPYTHON_VERSION import tarfile, zipfile, sys @@ -18,15 +18,23 @@ pypy_c = py.path.local(pypydir).join('goal', basename) if not pypy_c.check(): if sys.platform == 'win32': - assert False, "test on win32 requires exe" - pypy_c.write("#!/bin/sh") - pypy_c.chmod(0755) + import os, shutil + for d in os.environ['PATH'].split(';'): + if os.path.exists(os.path.join(d, 'cmd.exe')): + shutil.copy(os.path.join(d, 'cmd.exe'), str(pypy_c)) + break + else: + assert False, 'could not find cmd.exe' + else: + pypy_c.write("#!/bin/sh") + pypy_c.chmod(0755) fake_pypy_c = True else: fake_pypy_c = False try: - builddir = package.package(py.path.local(pypydir).dirpath(), test, + retval, builddir = package.package(py.path.local(pypydir).dirpath(), test, rename_pypy_c) + assert retval == 0 prefix = builddir.join(test) cpyver = '%d.%d' % CPYTHON_VERSION[:2] assert prefix.join('lib-python', cpyver, 'test').check() @@ -74,7 +82,6 @@ pypy_c.remove() def test_with_zipfile_module(): - from pypy.tool.release import package prev = package.USE_ZIPFILE_MODULE try: package.USE_ZIPFILE_MODULE = True @@ -106,3 +113,21 @@ check(file1, 0644) check(file2, 0644) check(pypy, 0755) + +def test_generate_license(): + from os.path import dirname, abspath, join + class Options(object): + pass + options = Options() + basedir = dirname(dirname(dirname(dirname(dirname(abspath(__file__)))))) + options.no_tk = False + if sys.platform == 'win32': + # as on buildbot YMMV + options.license_base = join(basedir, r'..\..\..\local') + else: + options.license_base = '/usr/share/doc' + license = package.generate_license(py.path.local(basedir), options) + assert 'bzip2' in license + assert 'openssl' in license + assert 'Tcl' in license + diff --git a/rpython/annotator/binaryop.py b/rpython/annotator/binaryop.py --- a/rpython/annotator/binaryop.py +++ b/rpython/annotator/binaryop.py @@ -719,6 +719,14 @@ return super(thistype, pair(ins1, ins2)).improve() +class __extend__(pairtype(SomeInstance, SomeObject)): + def getitem((s_ins, s_idx)): + return s_ins._emulate_call("__getitem__", s_idx) + + def setitem((s_ins, s_idx), s_value): + return s_ins._emulate_call("__setitem__", s_idx, s_value) + + class __extend__(pairtype(SomeIterator, SomeIterator)): def union((iter1, iter2)): diff --git a/rpython/annotator/test/test_annrpython.py b/rpython/annotator/test/test_annrpython.py --- a/rpython/annotator/test/test_annrpython.py +++ b/rpython/annotator/test/test_annrpython.py @@ -3937,6 +3937,78 @@ s = a.build_types(fn, [int]) assert isinstance(s, annmodel.SomeInteger) + def test_instance_getitem(self): + class A(object): + def __getitem__(self, i): + return i * i + + def fn(i): + a = A() + return a[i] + + a = self.RPythonAnnotator() + s = a.build_types(fn, [int]) + assert len(a.translator.graphs) == 2 # fn, __getitem__ + assert isinstance(s, annmodel.SomeInteger) + + def test_instance_setitem(self): + class A(object): + def __setitem__(self, i, v): + self.value = i * v + + def fn(i, v): + a = A() + a[i] = v + return a.value + + a = self.RPythonAnnotator() + s = a.build_types(fn, [int, int]) + assert len(a.translator.graphs) == 2 # fn, __setitem__ + assert isinstance(s, annmodel.SomeInteger) + + def test_instance_getslice(self): + class A(object): + def __getslice__(self, stop, start): + return "Test"[stop:start] + + def fn(): + a = A() + return a[0:2] + + a = self.RPythonAnnotator() + s = a.build_types(fn, []) + assert len(a.translator.graphs) == 2 # fn, __getslice__ + assert isinstance(s, annmodel.SomeString) + + def test_instance_setslice(self): + class A(object): + def __setslice__(self, stop, start, value): + self.value = value + + def fn(): + a = A() + a[0:2] = '00' + return a.value + + a = self.RPythonAnnotator() + s = a.build_types(fn, []) + assert len(a.translator.graphs) == 2 # fn, __setslice__ + assert isinstance(s, annmodel.SomeString) + + def test_instance_len(self): + class A(object): + def __len__(self): + return 0 + + def fn(): + a = A() + return len(a) + + a = self.RPythonAnnotator() + s = a.build_types(fn, []) + assert len(a.translator.graphs) == 2 # fn, __len__ + assert isinstance(s, annmodel.SomeInteger) + From noreply at buildbot.pypy.org Mon Jun 16 21:43:21 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 16 Jun 2014 21:43:21 +0200 (CEST) Subject: [pypy-commit] pypy default: prevent unexpected failures from opening the dreaded AppCrash dialog box on windows Message-ID: <20140616194321.738E71C021D@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72079:0b28e987bc19 Date: 2014-06-15 23:54 +0300 http://bitbucket.org/pypy/pypy/changeset/0b28e987bc19/ Log: prevent unexpected failures from opening the dreaded AppCrash dialog box on windows diff --git a/rpython/translator/c/genc.py b/rpython/translator/c/genc.py --- a/rpython/translator/c/genc.py +++ b/rpython/translator/c/genc.py @@ -313,7 +313,7 @@ def cmdexec(self, args='', env=None, err=False, expect_crash=False): assert self._compiled - if expect_crash and sys.platform == 'win32': + if sys.platform == 'win32': #Prevent opening a dialog box import ctypes winapi = ctypes.windll.kernel32 @@ -330,7 +330,7 @@ SetErrorMode(old_mode | flags) res = self.translator.platform.execute(self.executable_name, args, env=env) - if expect_crash and sys.platform == 'win32': + if sys.platform == 'win32': SetErrorMode(old_mode) if res.returncode != 0: if expect_crash: From noreply at buildbot.pypy.org Mon Jun 16 21:43:22 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 16 Jun 2014 21:43:22 +0200 (CEST) Subject: [pypy-commit] pypy default: move failing tests that mess up global state to end so they do not cause other failures Message-ID: <20140616194322.B15751C021D@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72080:8830a6fd5a51 Date: 2014-06-16 22:40 +0300 http://bitbucket.org/pypy/pypy/changeset/8830a6fd5a51/ Log: move failing tests that mess up global state to end so they do not cause other failures diff --git a/rpython/rlib/test/test_rsocket.py b/rpython/rlib/test/test_rsocket.py --- a/rpython/rlib/test/test_rsocket.py +++ b/rpython/rlib/test/test_rsocket.py @@ -62,43 +62,6 @@ py.test.fail("could not find the localhost address in %r" % (address_list,)) -def test_thread_safe_gethostbyname_ex(): - import threading - nthreads = 10 - domain = 'google.com' - result = [0] * nthreads - threads = [None] * nthreads - lock = threading.Lock() - def lookup_name(i): - name, aliases, address_list = gethostbyname_ex(domain, lock) - if name == domain: - result[i] += 1 - for i in range(nthreads): - threads[i] = threading.Thread(target = lookup_name, args=[i]) - threads[i].start() - for i in range(nthreads): - threads[i].join() - assert sum(result) == nthreads - -def test_thread_safe_gethostbyaddr(): - import threading - nthreads = 10 - ip = '8.8.8.8' - domain = gethostbyaddr(ip)[0] - result = [0] * nthreads - threads = [None] * nthreads - lock = threading.Lock() - def lookup_addr(ip, i): - name, aliases, address_list = gethostbyaddr(ip, lock) - if name == domain: - result[i] += 1 - for i in range(nthreads): - threads[i] = threading.Thread(target = lookup_addr, args=[ip, i]) - threads[i].start() - for i in range(nthreads): - threads[i].join() - assert sum(result) == nthreads - def test_gethostbyaddr(): try: cpy_socket.gethostbyaddr("::1") @@ -356,18 +319,7 @@ getaddrinfo_pydotorg(0, result) assert result[0] == 1 -def test_getaddrinfo_pydotorg_threadsafe(): - import threading - nthreads = 10 - result = [0] * nthreads - threads = [None] * nthreads - for i in range(nthreads): - threads[i] = threading.Thread(target = getaddrinfo_pydotorg, args=[i, result]) - threads[i].start() - for i in range(nthreads): - threads[i].join() - assert sum(result) == nthreads - + def test_getaddrinfo_no_reverse_lookup(): # It seems that getaddrinfo never runs a reverse lookup on Linux. # Python2.3 on Windows returns the hostname. @@ -566,3 +518,53 @@ def test_no_AF_NETLINK(): _test_cond_include('AF_NETLINK') + +def test_thread_safe_gethostbyaddr(): + import threading + nthreads = 10 + ip = '8.8.8.8' + domain = gethostbyaddr(ip)[0] + result = [0] * nthreads + threads = [None] * nthreads + lock = threading.Lock() + def lookup_addr(ip, i): + name, aliases, address_list = gethostbyaddr(ip, lock) + if name == domain: + result[i] += 1 + for i in range(nthreads): + threads[i] = threading.Thread(target = lookup_addr, args=[ip, i]) + threads[i].start() + for i in range(nthreads): + threads[i].join() + assert sum(result) == nthreads + +def test_thread_safe_gethostbyname_ex(): + import threading + nthreads = 10 + domain = 'google.com' + result = [0] * nthreads + threads = [None] * nthreads + lock = threading.Lock() + def lookup_name(i): + name, aliases, address_list = gethostbyname_ex(domain, lock) + if name == domain: + result[i] += 1 + for i in range(nthreads): + threads[i] = threading.Thread(target = lookup_name, args=[i]) + threads[i].start() + for i in range(nthreads): + threads[i].join() + assert sum(result) == nthreads + +def test_getaddrinfo_pydotorg_threadsafe(): + import threading + nthreads = 10 + result = [0] * nthreads + threads = [None] * nthreads + for i in range(nthreads): + threads[i] = threading.Thread(target = getaddrinfo_pydotorg, args=[i, result]) + threads[i].start() + for i in range(nthreads): + threads[i].join() + assert sum(result) == nthreads + From noreply at buildbot.pypy.org Mon Jun 16 22:38:11 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 16 Jun 2014 22:38:11 +0200 (CEST) Subject: [pypy-commit] pypy default: skip windows as well as posix Message-ID: <20140616203811.2B19B1C0DCA@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72081:0681a125b579 Date: 2014-06-16 23:20 +0300 http://bitbucket.org/pypy/pypy/changeset/0681a125b579/ Log: skip windows as well as posix diff --git a/rpython/rlib/test/test_streamio.py b/rpython/rlib/test/test_streamio.py --- a/rpython/rlib/test/test_streamio.py +++ b/rpython/rlib/test/test_streamio.py @@ -698,8 +698,8 @@ return streamio.MMapFile(self.fd, mmapmode) def test_write(self): - if os.name == "posix": - return # write() does't work on Unix :-( + if os.name == "posix" or os.name == 'nt': + return # write() does't work on Unix nor on win32:-( file = self.makeStream(mode="w") file.write("BooHoo\n") file.write("Barf\n") From noreply at buildbot.pypy.org Mon Jun 16 22:38:12 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 16 Jun 2014 22:38:12 +0200 (CEST) Subject: [pypy-commit] pypy default: add default value Message-ID: <20140616203812.738561C0DCA@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72082:ecc6b0658fdb Date: 2014-06-16 23:27 +0300 http://bitbucket.org/pypy/pypy/changeset/ecc6b0658fdb/ Log: add default value diff --git a/rpython/rlib/streamio.py b/rpython/rlib/streamio.py --- a/rpython/rlib/streamio.py +++ b/rpython/rlib/streamio.py @@ -850,7 +850,7 @@ self.do_flush = base.flush_buffers self.lfbuffer = "" - def read(self, n): + def read(self, n=-1): data = self.lfbuffer + self.do_read(n) self.lfbuffer = "" if data.endswith("\r"): From noreply at buildbot.pypy.org Tue Jun 17 07:46:07 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Tue, 17 Jun 2014 07:46:07 +0200 (CEST) Subject: [pypy-commit] pypy gc-two-end-nursery: add gctransformer level operation Message-ID: <20140617054607.B3A241C3563@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc-two-end-nursery Changeset: r72083:395364919ac7 Date: 2014-06-17 01:43 -0400 http://bitbucket.org/pypy/pypy/changeset/395364919ac7/ Log: add gctransformer level operation diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -844,6 +844,19 @@ resultvar=op.result) self.pop_roots(hop, livevars) + def gct_do_malloc_fixedsize(self, hop): + op = hop.spaceop + [v_typeid, v_size, + v_has_finalizer, v_has_light_finalizer, v_contains_weakptr] = op.args + livevars = self.push_roots(hop) + hop.genop("direct_call", + [self.malloc_fixedsize_clear_ptr, self.c_const_gc, + v_typeid, v_size, + v_has_finalizer, v_has_light_finalizer, + v_contains_weakptr], + resultvar=op.result) + self.pop_roots(hop, livevars) + def gct_do_malloc_varsize_clear(self, hop): # used by the JIT (see rpython.jit.backend.llsupport.gc) op = hop.spaceop From noreply at buildbot.pypy.org Tue Jun 17 09:49:50 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 17 Jun 2014 09:49:50 +0200 (CEST) Subject: [pypy-commit] pypy default: Include the user name in the temporary directory Message-ID: <20140617074950.C1D6C1C3563@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72084:66e58e7fdc2b Date: 2014-06-17 09:46 +0200 http://bitbucket.org/pypy/pypy/changeset/66e58e7fdc2b/ Log: Include the user name in the temporary directory diff --git a/lib_pypy/_pypy_testcapi.py b/lib_pypy/_pypy_testcapi.py --- a/lib_pypy/_pypy_testcapi.py +++ b/lib_pypy/_pypy_testcapi.py @@ -13,7 +13,15 @@ k1 = k1.lstrip('0x').rstrip('L') k2 = hex(binascii.crc32(key[1::2]) & 0xffffffff) k2 = k2.lstrip('0').rstrip('L') - output_dir = tempfile.gettempdir() + os.path.sep + 'tmp_%s%s' %(k1, k2) + try: + username = os.environ['USER'] #linux, et al + except KeyError: + try: + username = os.environ['USERNAME'] #windows + except KeyError: + username = os.getuid() + output_dir = tempfile.gettempdir() + os.path.sep + 'tmp_%s_%s%s' % ( + username, k1, k2) if not os.path.exists(output_dir): os.mkdir(output_dir) return output_dir From noreply at buildbot.pypy.org Tue Jun 17 09:55:55 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 17 Jun 2014 09:55:55 +0200 (CEST) Subject: [pypy-commit] pypy default: Fix whatsnew Message-ID: <20140617075555.109651D23C1@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72085:0886deffb54d Date: 2014-06-17 09:55 +0200 http://bitbucket.org/pypy/pypy/changeset/0886deffb54d/ Log: Fix whatsnew diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -10,3 +10,15 @@ Added support for ``__getitem__``, ``__setitem__``, ``__getslice__``, ``__setslice__``, and ``__len__`` to RPython + +.. branch: stringbuilder2-perf +Give the StringBuilder a more flexible internal structure, with a +chained list of strings instead of just one string. This make it +more efficient when building large strings, e.g. with cStringIO(). + +Also, use systematically jit.conditional_call() instead of regular +branches. This lets the JIT make more linear code, at the cost of +forcing a bit more data (to be passed as arguments to +conditional_calls). I would expect the net result to be a slight +slow-down on some simple benchmarks and a speed-up on bigger +programs. From noreply at buildbot.pypy.org Tue Jun 17 10:02:09 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 17 Jun 2014 10:02:09 +0200 (CEST) Subject: [pypy-commit] pypy default: Fix the test Message-ID: <20140617080209.8B8F71C32BD@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72086:e2b1cc741f76 Date: 2014-06-17 10:01 +0200 http://bitbucket.org/pypy/pypy/changeset/e2b1cc741f76/ Log: Fix the test diff --git a/rpython/jit/metainterp/test/test_heapcache.py b/rpython/jit/metainterp/test/test_heapcache.py --- a/rpython/jit/metainterp/test/test_heapcache.py +++ b/rpython/jit/metainterp/test/test_heapcache.py @@ -1,6 +1,6 @@ from rpython.jit.metainterp.heapcache import HeapCache from rpython.jit.metainterp.resoperation import rop -from rpython.jit.metainterp.history import ConstInt, BoxInt +from rpython.jit.metainterp.history import ConstInt, BoxInt, BasicFailDescr box1 = "box1" box2 = "box2" @@ -550,7 +550,7 @@ assert h.is_unescaped(box2) assert h.getfield(box1, descr1) is box2 - def test_bug_heap_cache_is_cleared_but_not_is_unescaped(self): + def test_bug_heap_cache_is_cleared_but_not_is_unescaped_1(self): # bug if only the getfield() link is cleared (heap_cache) but not # the is_unescaped() flags: we can do later a GETFIELD(box1) which # will give us a fresh box3, which is actually equal to box2. This @@ -564,6 +564,27 @@ h.invalidate_caches(rop.SETFIELD_GC, None, [box1, box2]) assert h.getfield(box1, descr1) is box2 h.invalidate_caches(rop.CALL_MAY_FORCE, None, []) + assert not h.is_unescaped(box1) + assert not h.is_unescaped(box2) + assert h.getfield(box1, descr1) is None + + def test_bug_heap_cache_is_cleared_but_not_is_unescaped_2(self): + h = HeapCache() + h.new(box1) + h.new(box2) + h.setfield(box1, box2, descr1) + h.invalidate_caches(rop.SETFIELD_GC, None, [box1, box2]) + assert h.getfield(box1, descr1) is box2 + descr = BasicFailDescr() + class XTra: + oopspecindex = 0 + OS_ARRAYCOPY = 42 + extraeffect = 5 + EF_LOOPINVARIANT = 1 + EF_ELIDABLE_CANNOT_RAISE = 2 + EF_ELIDABLE_CAN_RAISE = 3 + descr.get_extra_info = XTra + h.invalidate_caches(rop.CALL, descr, []) assert h.is_unescaped(box1) assert h.is_unescaped(box2) assert h.getfield(box1, descr1) is box2 From noreply at buildbot.pypy.org Tue Jun 17 10:23:33 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 17 Jun 2014 10:23:33 +0200 (CEST) Subject: [pypy-commit] pypy default: Fix for test_recursive that shows that we're clearing too much random Message-ID: <20140617082333.1B8161D2AB3@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72087:74b48b794989 Date: 2014-06-17 10:19 +0200 http://bitbucket.org/pypy/pypy/changeset/74b48b794989/ Log: Fix for test_recursive that shows that we're clearing too much random stuff across a general CALL_ASSEMBLER for example diff --git a/rpython/jit/metainterp/heapcache.py b/rpython/jit/metainterp/heapcache.py --- a/rpython/jit/metainterp/heapcache.py +++ b/rpython/jit/metainterp/heapcache.py @@ -6,7 +6,7 @@ def __init__(self): self.reset() - def reset(self, reset_virtuals=True): + def reset(self, reset_virtuals=True, trace_branch=True): # contains boxes where the class is already known self.known_class_boxes = {} # store the boxes that contain newly allocated objects, this maps the @@ -14,14 +14,19 @@ # escaped the trace or not (True means the box never escaped, False # means it did escape), its presences in the mapping shows that it was # allocated inside the trace - self.new_boxes = {} + if trace_branch: + self.new_boxes = {} + else: + for box in self.new_boxes: + self.new_boxes[box] = False if reset_virtuals: self.likely_virtuals = {} # only for jit.isvirtual() # Tracks which boxes should be marked as escaped when the key box # escapes. self.dependencies = {} # contains frame boxes that are not virtualizables - self.nonstandard_virtualizables = {} + if trace_branch: + self.nonstandard_virtualizables = {} # heap cache # maps descrs to {from_box, to_box} dicts @@ -207,7 +212,7 @@ # state at least in the 'CALL_*' operations that release the GIL. We # tried to do only the kind of resetting done by the two loops just # above, but hit an assertion in "pypy test_multiprocessing.py". - self.reset(reset_virtuals=False) + self.reset(reset_virtuals=False, trace_branch=False) def is_class_known(self, box): return box in self.known_class_boxes From noreply at buildbot.pypy.org Tue Jun 17 14:43:23 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Tue, 17 Jun 2014 14:43:23 +0200 (CEST) Subject: [pypy-commit] extradoc extradoc: first round of integrating feedback Message-ID: <20140617124323.55FA71C03B3@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: extradoc Changeset: r5347:f0b53e06d9f7 Date: 2014-06-17 14:43 +0200 http://bitbucket.org/pypy/extradoc/changeset/f0b53e06d9f7/ Log: first round of integrating feedback diff --git a/talk/icooolps2014/position-paper.tex b/talk/icooolps2014/position-paper.tex --- a/talk/icooolps2014/position-paper.tex +++ b/talk/icooolps2014/position-paper.tex @@ -7,6 +7,7 @@ % 10pt To set in 10-point type instead of 9-point. % 11pt To set in 11-point type instead of 9-point. % authoryear To obtain author/year citation style instead of numeric. +\synctex=-1 \usepackage[utf8]{inputenc} \usepackage{array} @@ -15,6 +16,57 @@ \usepackage{amsmath} \usepackage{amssymb} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% listings +\usepackage{float} +\floatstyle{ruled} +\newfloat{code}{tbp}{loa} +\providecommand{\codename}{Listing} +\floatname{code}{\protect\codename} + + +% nice listings +\usepackage{xcolor} +\usepackage{newverbs} + +\usepackage{color} +\definecolor{verylightgray}{rgb}{0.93,0.93,0.93} +\definecolor{darkblue}{rgb}{0.2,0.2,0.6} +\definecolor{commentgreen}{rgb}{0.25,0.5,0.37} +\usepackage{letltxmacro} + +\usepackage{listings} + +\makeatletter +\LetLtxMacro{\oldlstinline}{\lstinline} + +\renewcommand\lstinline[1][]{% + \Collectverb{\@@myverb}% +} + +\def\@@myverb#1{% + \begingroup + \fboxsep=0.2em + \colorbox{verylightgray}{\oldlstinline|#1|}% + \endgroup +} +\makeatother + + +\lstset{backgroundcolor={\color{verylightgray}}, + basicstyle={\scriptsize\ttfamily}, + commentstyle={\ttfamily\color{commentgreen}}, + keywordstyle={\bfseries\color{darkblue}}, + morecomment={[l]{//}}, + tabsize=4, + morekeywords={foreach,in,def,type,dynamic,Int, + Boolean,infer,void,super,if,boolean,int,else, + while,do,extends,class,assert,for,switch,case, + private,protected,public,const,final,static, + interface,new,true,false,null,return}} +\renewcommand{\lstlistingname}{Listing} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5 + \newcommand{\mynote}[2]{% \textcolor{red}{% \fbox{\bfseries\sffamily\scriptsize#1}% @@ -117,7 +169,7 @@ the interpreter itself. These requirements are not easy to meet. We argue that STM is the -overall winner. While it has a big performance problem currently, it +overall winner. While it currently has a big performance problem, it gets more points in all the other categories. We think that it is the only solution that also provides a better synchronisation mechanism to the application in the form of parallelisable atomic blocks. @@ -158,18 +210,48 @@ \subsection{Why is there a GIL?} The GIL is a very simple synchronisation mechanism for supporting multithreading in an interpreter. The basic guarantee is that the GIL -may only be released in between bytecode instructions. The interpreter -can thus rely on complete isolation and atomicity of these -instructions. Additionally, it provides the application with a -sequential consistency model~\cite{lamport79}. As a consequence, -applications can rely on certain operations to be atomic and that they -will always be executed in the order in which they appear in the -code. While depending on this may not always be a good idea, it is -done in practice. A GIL-replacement should therefore uphold these -guarantees, while preferably also be as easily implementable as a GIL -for the interpreter. The latter can be especially important since -many of these languages are developed and maintained by very large -open-source communities, which are not easy to coordinate. +may only be released in between bytecode instructions\footnote{This +also applies to Abstract Syntax Tree (AST) interpreters, where the GIL +may only be released between interpreting two AST nodes.}. The interpreter +can thus rely on complete isolation and atomicity for the +instructions' execution. Thus, accesses to data structures like +dictionaries and lists happen atomically and do not need additional +protection from data races when shared between threads. + +The GIL also provides the application with a sequential consistency +model~\cite{lamport79}. This can be very valuable as it means less +surprises for the programmer. For example in Table~\ref{tab:seq_cons}, +the programmer can expect the critical section to only be entered by +one thread. If the model allowed to buffer the writes, both threads +may enter the critical section at the same time. + +\begin{table}[!ht] + \begin{center} + \begin{tabular}{|l|l|} + \hline + Thread 1 & Thread 2 \\ + \hline + \multicolumn{2}{|l|}{\texttt{A = B = 0}} \\ + \hline + \texttt{A = 1} & \texttt{B = 1}\\ + \texttt{if B == 0:} & \texttt{if A == 0:}\\ + \texttt{ critical section} & \texttt{ critical section}\\ + \hline + \end{tabular} + \caption{Critical section with a sequential consistency model.} + \label{tab:seq_cons} + \end{center} +\end{table} + +As a consequence, applications can rely on certain operations to be +atomic and that they will always be executed in the order in which +they appear in the code. While depending on this may not always be a +good idea, it is done in practice. A GIL-replacement should therefore +uphold these guarantees, while preferably also be as easily +implementable as a GIL for the interpreter. The latter can be +especially important since many of these languages are developed and +maintained by very large open-source communities, which are not easy +to coordinate. The GIL also allows for easy integration with external C libraries that may not be thread-safe. For the duration of the calls, we @@ -239,12 +321,15 @@ limitations: \begin{description} -\item[Performance:] How well does the approach perform compared to the - GIL on a single and on multiple threads? +\item[Performance:] How much does the approach impact performance on a single + and how much on multiple threads? Can it make use of parallelism? \item[Existing applications:] How big are the changes required to integrate with and parallelise existing applications? -\item[Better synchronisation:] Does the approach enable better, paralleliseable - synchronisation mechanisms for applications (e.g. atomic blocks)? +\item[Better synchronisation:] Does the approach enable better, + paralleliseable synchronisation mechanisms for applications + (e.g.\ atomic blocks)? Many synchronisation mechanisms can be built on + top of all solutions (e.g.\ message passing). We look for mechanisms + that are directly enabled by the contending approaches. \item[Implementation:] How difficult is it to implement the approach in the interpreter? \item[External libraries:] Does the approach allow for easy @@ -273,7 +358,11 @@ fine-grained locking is milder in Java than it would be in a typical piece of C code; see e.g.~\cite{biased}.} to correctly synchronise the interpreter. For a language like Python, one needs quite a few, -carefully placed locks. Since there is no central location, the +carefully placed locks -- every dictionary, list, instance, or mutable +object in general needs a lock. Compared to e.g.\ Java, object +attributes are backed by a dictionary. Accesses to it must be +synchronised because the interpreter could crash otherwise. Since +there is no central location for all these locks, the complexity of the implementation is quite a bit larger compared to using a GIL. Integrating external, non-thread-safe libraries should however be very simple too. One could simply use one lock per library @@ -282,11 +371,15 @@ In the end, fine-grained locking can transparently replace the GIL and therefore parallelise existing applications, generally without any changes\footnote{There are rare cases where not having atomic - bytecodes actually changes the semantics.}. An implementation has to -follow the GIL semantics very closely, otherwise it may expose some -latent data races in existing applications which are just not exposed -with a GIL. This approach does however not provide a better parallelising -synchronisation mechanism to the application like e.g. atomic blocks. + bytecodes actually changes the semantics. E.g.\ in Jython, + \texttt{dict1.update(dict2)} is not atomic: it first reads data from + \texttt{dict2} with \texttt{dict2}'s lock, and then puts it into + \texttt{dict1} with \texttt{dict1}'s lock. A lot can happen + in-between.}. An implementation has to follow the GIL semantics very +closely, otherwise it may expose some latent data races in existing +applications which are just not exposed with a GIL. This approach does +however not provide a better parallelising synchronisation mechanism +to the application like e.g. atomic blocks. %% - support of atomic blocks?\\ %% - hard to get right (deadlocks, performance, lock-granularity)\\ From noreply at buildbot.pypy.org Tue Jun 17 15:23:50 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Tue, 17 Jun 2014 15:23:50 +0200 (CEST) Subject: [pypy-commit] extradoc extradoc: second round of feedback Message-ID: <20140617132350.0FEB61C3563@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: extradoc Changeset: r5348:2947a2a7f631 Date: 2014-06-17 15:24 +0200 http://bitbucket.org/pypy/extradoc/changeset/2947a2a7f631/ Log: second round of feedback diff --git a/talk/icooolps2014/position-paper.pdf b/talk/icooolps2014/position-paper.pdf index 473787d313f0bad0af373cb268d3e6b7cdb17d6d..4c20e424eab52b3d1bdeed6cd22738c5db3b0800 GIT binary patch [cut] diff --git a/talk/icooolps2014/position-paper.tex b/talk/icooolps2014/position-paper.tex --- a/talk/icooolps2014/position-paper.tex +++ b/talk/icooolps2014/position-paper.tex @@ -229,13 +229,13 @@ \begin{center} \begin{tabular}{|l|l|} \hline - Thread 1 & Thread 2 \\ + Thread 1 & Thread 2 \\ \hline - \multicolumn{2}{|l|}{\texttt{A = B = 0}} \\ + \multicolumn{2}{|l|}{\texttt{A = B = 0}} \\ \hline - \texttt{A = 1} & \texttt{B = 1}\\ - \texttt{if B == 0:} & \texttt{if A == 0:}\\ - \texttt{ critical section} & \texttt{ critical section}\\ + \texttt{A = 1} & \texttt{B = 1} \\ + \texttt{if B == 0:} & \texttt{if A == 0:} \\ + \texttt{ critical section} & \texttt{ critical section} \\ \hline \end{tabular} \caption{Critical section with a sequential consistency model.} @@ -275,7 +275,8 @@ complexity of the program logic. For a better parallel programming model for dynamic languages, we propose another, well-known synchronisation mechanism called \emph{atomic - blocks}~\cite{tim03,tim05}. + blocks}~\cite{tim03,tim05}. This is also suggested by +\cite{christopher10,victor11} as an easier mechanism than locks. Atomic blocks are composable, deadlock-free, higher-level and expose useful atomicity and isolation guarantees to the application for a @@ -301,7 +302,7 @@ \hline Existing applications & ++ & ++ & -{-} & ++ & ++ \\ \hline - Better synchronisation & o & o & & o & ++ \\ + Better synchronisation & o & o & + & o & ++ \\ \hline Implementation & ++ & - & ++ & ++ & ++ \\ \hline @@ -315,6 +316,7 @@ \subsection{Potential Solutions} +\label{sec:pot_solutions} For the discussion we define a set of criteria to evaluate the multiple potential solutions for removing or avoiding the GIL and its @@ -379,14 +381,9 @@ closely, otherwise it may expose some latent data races in existing applications which are just not exposed with a GIL. This approach does however not provide a better parallelising synchronisation mechanism -to the application like e.g. atomic blocks. +to the application like e.g.\ atomic blocks. -%% - support of atomic blocks?\\ -%% - hard to get right (deadlocks, performance, lock-granularity)\\ -%% - very hard to get right for a large language\\ -%% - hard to retro-fit, as all existing code assumes GIL semantics\\ -%% - (there are some semantic differences, right? not given perfect lock-placement, but well) -%% ( http://www.jython.org/jythonbook/en/1.0/Concurrency.html ) + \subsubsection{Shared-Nothing} @@ -397,11 +394,16 @@ sensible to have one GIL per independent part. At the extreme, there are applications that parallelise perfectly simply by running independent processes; some web servers and some numeric computations -do. We will consider here a slightly more general approach: the -\emph{multiprocessing}\footnote{https://docs.python.org/2/library/multiprocessing.html} +do. + +We will consider here a slightly more general approach: the +\emph{multiprocessing}~\cite{multiprocessing} module of Python. In essence, it uses process-forking to provide the application with multiple interpreters that can run in parallel. -Communication is then done explicitly through pipes. +Communication is then done explicitly through pipes.\footnote{There +are multiple alternative designs like e.g.\ actors or tuple spaces. +Since they are similar and do not replace the GIL directly, we +focus on the example of \emph{multiprocessing}.} The model of explicit communication is sometimes seen as a superior way to synchronise concurrent applications because of its explicitness. @@ -414,10 +416,6 @@ several of them in parallel. That way, we also inherit the easy integration of external libraries without any changes. -%% - often needs major restructuring of programs (explicit data exchange)\\ -%% - sometimes communication overhead is too large\\ -%% - shared memory is a problem, copies of memory are too expensive - \subsubsection{Transactional Memory} Transactional memory (TM) can be used as a direct replacement for a @@ -511,17 +509,24 @@ Following the above argumentation for each approach, we assembled a -general overview in Table \ref{tab:comparison}. The general picture is -everything else than clear. It looks like HTM may be a good solution -to replace the GIL in the near future. Current implementations are -however far too limiting and do not provide good scaling. +general overview in Table \ref{tab:comparison}. The points were assigned +according to the criteria described in \ref{sec:pot_solutions}. Since +the criteria are defined intuitively, there are no formal justifications +for the assigned points. The reader is thus advised to take them with a +grain of salt. + +The general picture is everything else than clear. It looks like HTM +may be a good solution to replace the GIL in the near future. Current +implementations are however far too limiting, not widely available, +and do not provide good scaling. Allowing for parallel execution just means that dynamic languages catch up to all other languages that already provide real parallelism. This is why we think that only the STM approach is a -viable solution in the long-term. It provides the application with a -simple memory model (sequential consistency) and a composable way to -synchronise memory accesses using atomic blocks. +viable solution in the long-term. It unifies both, the simple memory +model (sequential consistency) and the synchronisation of memory accesses +using composable atomic blocks. It is not \emph{only} a simple GIL +replacement. Unfortunately, STM has a big performance problem. Particularly, for our use case there is not much static information available since we @@ -541,10 +546,10 @@ certain tasks, looks like a very promising direction of research too. -We believe that further work to reduce the overhead of STM is +We think that further work to reduce the overhead of STM is very worthwhile. In fact, considering some analogies that have been drawn between garbage collection and transactional memory~\cite{dan07}, -we believe that it is worthwhile to focus the STM research more +we think that it is worthwhile to focus the STM research more specifically onto the context shown in this paper --- for use in implementations of high-level languages, rather than as a tool directly used by the programmer. @@ -582,6 +587,10 @@ \bibitem{webjython} The Jython Project, \url{www.jython.org} +\bibitem{multiprocessing} + The Multiprocessing Module of Python, + \url{docs.python.org/2/library/multiprocessing.html} + \bibitem{odaira14} Odaira, Rei, Jose G. Castanos, and Hisanobu Tomari. "Eliminating global interpreter locks in Ruby through hardware transactional From noreply at buildbot.pypy.org Tue Jun 17 17:22:48 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Tue, 17 Jun 2014 17:22:48 +0200 (CEST) Subject: [pypy-commit] extradoc extradoc: some adjustments Message-ID: <20140617152248.D0F991D2D74@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: extradoc Changeset: r5349:897d0fa65804 Date: 2014-06-17 17:23 +0200 http://bitbucket.org/pypy/extradoc/changeset/897d0fa65804/ Log: some adjustments diff --git a/talk/icooolps2014/position-paper.pdf b/talk/icooolps2014/position-paper.pdf index 4c20e424eab52b3d1bdeed6cd22738c5db3b0800..e7a9e9a08d66ba2c9c26bafa1c59d5d4a2e7bf14 GIT binary patch [cut] diff --git a/talk/icooolps2014/position-paper.tex b/talk/icooolps2014/position-paper.tex --- a/talk/icooolps2014/position-paper.tex +++ b/talk/icooolps2014/position-paper.tex @@ -116,7 +116,7 @@ Dynamic languages became very popular in recent years. At some point, the need for concurrency arose, and many of them made the choice to use a single global interpreter lock (GIL) to synchronise - the interpreter in a multithreading scenario. This choice however + the interpreter in a multithreading scenario. This choice, however, makes it impossible to actually run code in parallel. Here we want to compare different approaches to replacing the GIL @@ -143,7 +143,7 @@ performance increases less and less every year, many dynamic languages have a problem. While there is certainly a lot of popularity around languages like Python and Ruby, their ability to make use of multiple -cores is somewhat limited. For ease of implementation they chose to +cores is somewhat limited. For ease of implementation, they chose to use a single, global interpreter lock (GIL) to synchronise the execution of code in multiple threads. While this is a straight-forward way to eliminate synchronisation issues in the @@ -168,43 +168,20 @@ threads for concurrency, as well as the changes that are required to the interpreter itself. -These requirements are not easy to meet. We argue that STM is the -overall winner. While it currently has a big performance problem, it -gets more points in all the other categories. We think that it is the -only solution that also provides a better synchronisation mechanism to -the application in the form of parallelisable atomic blocks. - -%% \subsection{Issue} -%% The issue that we want to discuss is how to efficiently support -%% multi-core parallel execution of code in dynamic languages that were -%% designed with GIL semantics in mind. - -%% Furthermore, a solution to this problem should also bring better -%% synchronisation mechanism with it... - -%% (supporting (large) atomic blocks for synchronisation) - -%% \subsection{Our Position} -%% Current solutions for replacing the GIL include STM, HTM, and -%% fine-grained locking. STM is usually too slow, HTM very limited, and -%% locking suffers from complexity that makes it hard to implement -%% correctly. We argue that the best way forward is still STM and that -%% its performance problem can be solved. - -%% Current solutions like STM, HTM, and fine-grained locking are slow, hard -%% to implement correctly, and don't fit the specific problems of dynamic -%% language. STM is the best way forward but has bad performance, so we -%% fix that. +These requirements are not easy to meet. The author's position is that +STM provides the best way forward. While STM currently has a big +performance problem, it gets more points in the other categories. We +think that it is the only solution that also provides a better +synchronisation mechanism to the application in the form of +parallelisable atomic blocks. In the following section, we try to +present a balanced view of the compared approaches. \section{Discussion} -In this section we examine the approaches and highlight their -advantages and disadvantages. -%% \paragraph{dynamic language VM problems} -%% XXX: -%% - high allocation rate (short lived objects)\\ -%% - (don't know anything about the program that runs until it actually runs: arbitrary atomic block size) +In this section we first explain the motivation for using a GIL and +then examine different approaches to remove or avoid it -- highlighting +their advantages and disadvantages. \subsection{Why is there a GIL?} @@ -214,7 +191,7 @@ also applies to Abstract Syntax Tree (AST) interpreters, where the GIL may only be released between interpreting two AST nodes.}. The interpreter can thus rely on complete isolation and atomicity for the -instructions' execution. Thus, accesses to data structures like +instructions' execution. Also, accesses to data structures like dictionaries and lists happen atomically and do not need additional protection from data races when shared between threads. @@ -222,8 +199,8 @@ model~\cite{lamport79}. This can be very valuable as it means less surprises for the programmer. For example in Table~\ref{tab:seq_cons}, the programmer can expect the critical section to only be entered by -one thread. If the model allowed to buffer the writes, both threads -may enter the critical section at the same time. +one thread. On the other hand, if the model allowed to buffer the +writes, both threads may enter the critical section at the same time. \begin{table}[!ht] \begin{center} @@ -231,11 +208,12 @@ \hline Thread 1 & Thread 2 \\ \hline - \multicolumn{2}{|l|}{\texttt{A = B = 0}} \\ + \multicolumn{2}{|c|}{\texttt{A = B = 0}} \\ \hline \texttt{A = 1} & \texttt{B = 1} \\ \texttt{if B == 0:} & \texttt{if A == 0:} \\ - \texttt{ critical section} & \texttt{ critical section} \\ + \multicolumn{2}{|c|}{only one thread enters here} \\ + \multicolumn{2}{|c|}{(e.g.\ critical section)} \\ \hline \end{tabular} \caption{Critical section with a sequential consistency model.} @@ -263,7 +241,7 @@ degree. Again, a potential solution should be able to integrate with external libraries with similar ease. We will however focus our argumentation more on running code in the interpreted language in -parallel, not the external C calls. +parallel, not the external C code. Since the GIL is mostly an implementation detail of the interpreter, it is not exposed to the application running on top of it. To @@ -284,7 +262,8 @@ that the GIL is not released during the execution of the atomic block. Of course, this still means that no two atomic blocks can execute in parallel or even concurrently. Potential solutions are -preferable if they provide a good way to implement atomic blocks that +preferable if they provide a good way to implement atomic blocks +(or another, comparable synchronisation mechanism) that are also able to be executed in parallel. @@ -318,8 +297,8 @@ \subsection{Potential Solutions} \label{sec:pot_solutions} -For the discussion we define a set of criteria to evaluate the -multiple potential solutions for removing or avoiding the GIL and its +For the discussion, we define a set of criteria to evaluate the +potential solutions for removing or avoiding the GIL and its limitations: \begin{description} @@ -350,14 +329,13 @@ and releasing locks produces. The former means that sometimes it is necessary to fall back to less fine-grained locking, preventing some potential parallelism in order to keep the complexity manageable. -The latter means that we lose a bit of performance in the -single-threaded case compared to the GIL, which requires much less -acquire-release operations. +The latter means that we lose a bit of performance compared to the +GIL, which requires much less acquire-release operations. Jython~\cite{webjython} is one project that implements an interpreter for Python on the Java Virtual Machine (JVM) and that uses fine-grained locking\footnote{The performance impact of -fine-grained locking is milder in Java than it would be in a typical piece +fine-grained locking is milder on the JVM than it would be in a typical piece of C code; see e.g.~\cite{biased}.} to correctly synchronise the interpreter. For a language like Python, one needs quite a few, carefully placed locks -- every dictionary, list, instance, or mutable @@ -367,21 +345,21 @@ there is no central location for all these locks, the complexity of the implementation is quite a bit larger compared to using a GIL. Integrating external, non-thread-safe libraries should -however be very simple too. One could simply use one lock per library +however be very simple too. One can simply use one lock per library to avoid this issue. In the end, fine-grained locking can transparently replace the GIL and therefore parallelise existing applications, generally without any -changes\footnote{There are rare cases where not having atomic - bytecodes actually changes the semantics. E.g.\ in Jython, - \texttt{dict1.update(dict2)} is not atomic: it first reads data from - \texttt{dict2} with \texttt{dict2}'s lock, and then puts it into - \texttt{dict1} with \texttt{dict1}'s lock. A lot can happen - in-between.}. An implementation has to follow the GIL semantics very +changes. An implementation has to follow the GIL semantics very closely, otherwise it may expose some latent data races in existing -applications which are just not exposed with a GIL. This approach does -however not provide a better parallelising synchronisation mechanism -to the application like e.g.\ atomic blocks. +applications which are just not exposed with a GIL\footnote{There are + rare cases where not having atomic bytecodes actually changes the + semantics. E.g.\ in Jython, \texttt{dict1.update(dict2)} is not + atomic: it first reads data from \texttt{dict2} with \texttt{dict2}'s + lock, and then puts it into \texttt{dict1} with \texttt{dict1}'s + lock. A lot can happen in-between.}. This approach does however not +provide a better parallelising synchronisation mechanism to the +application and still requires explicit locking in the application. @@ -389,7 +367,7 @@ There are also approaches that work around the GIL instead of trying to replace it. If an application can be split into completely -independent parts that only very rarely need to share anything, or +independent parts that only very rarely need to share something, or only do so via an external program like a database, then it is sensible to have one GIL per independent part. At the extreme, there are applications that parallelise perfectly simply by running @@ -450,7 +428,7 @@ suffered a lot from this. The performance of HTM is pretty good as it does not introduce much -overhead ($<40\%$ overhead~\cite{odaira14}). And it can transparently +overhead ($<40\%$~\cite{odaira14}). And it can transparently parallelise existing applications to some degree. The implementation is very straight-forward because it directly replaces the GIL in a central place. HTM is also directly compatible with any external @@ -512,8 +490,8 @@ general overview in Table \ref{tab:comparison}. The points were assigned according to the criteria described in \ref{sec:pot_solutions}. Since the criteria are defined intuitively, there are no formal justifications -for the assigned points. The reader is thus advised to take them with a -grain of salt. +for the number of points. The reader is thus advised to take the result +with a grain of salt. The general picture is everything else than clear. It looks like HTM may be a good solution to replace the GIL in the near future. Current @@ -537,9 +515,9 @@ One way to get more performance is to develop STM systems that make better use of low-level features in existing OS kernels. We are -currently working on a STM system that makes use of several such +currently working on an STM system that makes use of several such features like virtual memory and memory segmentation. We further -tailor the system to the discussed use case which gives us an +tailor the system to the discussed use case, which gives us an advantage over other STM systems that are more general. With this approach, initial results suggest that we can keep the overhead of STM well below 50\%. A hybrid TM system, which also uses HTM to accelerate From noreply at buildbot.pypy.org Tue Jun 17 19:22:17 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Tue, 17 Jun 2014 19:22:17 +0200 (CEST) Subject: [pypy-commit] pypy gc-two-end-nursery: fix the malloc function pointer Message-ID: <20140617172217.390471D2D38@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc-two-end-nursery Changeset: r72088:ff9a09762091 Date: 2014-06-17 13:14 -0400 http://bitbucket.org/pypy/pypy/changeset/ff9a09762091/ Log: fix the malloc function pointer diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -850,13 +850,13 @@ v_has_finalizer, v_has_light_finalizer, v_contains_weakptr] = op.args livevars = self.push_roots(hop) hop.genop("direct_call", - [self.malloc_fixedsize_clear_ptr, self.c_const_gc, + [self.malloc_fixedsize_ptr, self.c_const_gc, v_typeid, v_size, v_has_finalizer, v_has_light_finalizer, v_contains_weakptr], resultvar=op.result) self.pop_roots(hop, livevars) - + def gct_do_malloc_varsize_clear(self, hop): # used by the JIT (see rpython.jit.backend.llsupport.gc) op = hop.spaceop From noreply at buildbot.pypy.org Tue Jun 17 20:20:43 2014 From: noreply at buildbot.pypy.org (rlamy) Date: Tue, 17 Jun 2014 20:20:43 +0200 (CEST) Subject: [pypy-commit] pypy rpath-enforceargs: fix test: @enforceargs(str) does not accept None any more Message-ID: <20140617182043.3DB171D2AB3@cobra.cs.uni-duesseldorf.de> Author: Ronan Lamy Branch: rpath-enforceargs Changeset: r72089:79e019d50044 Date: 2014-06-17 19:19 +0100 http://bitbucket.org/pypy/pypy/changeset/79e019d50044/ Log: fix test: @enforceargs(str) does not accept None any more diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py --- a/rpython/rlib/test/test_objectmodel.py +++ b/rpython/rlib/test/test_objectmodel.py @@ -454,7 +454,8 @@ @enforceargs(str, unicode) def f(a, b): return a, b - assert f(None, None) == (None, None) + with py.test.raises(TypeError): + f(None, None) def test_enforceargs_complex_types(): @enforceargs([int], {str: int}) From noreply at buildbot.pypy.org Wed Jun 18 00:51:39 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Wed, 18 Jun 2014 00:51:39 +0200 (CEST) Subject: [pypy-commit] pypy default: prefer find/setitem_str shortcuts Message-ID: <20140617225139.6745E1D2D67@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: Changeset: r72090:f48daf83db6a Date: 2014-06-17 14:57 -0700 http://bitbucket.org/pypy/pypy/changeset/f48daf83db6a/ Log: prefer find/setitem_str shortcuts diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py --- a/pypy/interpreter/pyframe.py +++ b/pypy/interpreter/pyframe.py @@ -511,10 +511,10 @@ for i in range(min(len(varnames), self.getcode().co_nlocals)): name = varnames[i] w_value = self.locals_stack_w[i] - w_name = self.space.wrap(name) if w_value is not None: - self.space.setitem(self.w_locals, w_name, w_value) + self.space.setitem_str(self.w_locals, name, w_value) else: + w_name = self.space.wrap(name) try: self.space.delitem(self.w_locals, w_name) except OperationError as e: @@ -534,8 +534,7 @@ except ValueError: pass else: - w_name = self.space.wrap(name) - self.space.setitem(self.w_locals, w_name, w_value) + self.space.setitem_str(self.w_locals, name, w_value) @jit.unroll_safe @@ -548,13 +547,9 @@ new_fastlocals_w = [None] * numlocals for i in range(min(len(varnames), numlocals)): - w_name = self.space.wrap(varnames[i]) - try: - w_value = self.space.getitem(self.w_locals, w_name) - except OperationError, e: - if not e.match(self.space, self.space.w_KeyError): - raise - else: + name = varnames[i] + w_value = self.space.finditem_str(self.w_locals, name) + if w_value is not None: new_fastlocals_w[i] = w_value self.setfastscope(new_fastlocals_w) @@ -563,13 +558,8 @@ for i in range(len(freevarnames)): name = freevarnames[i] cell = self.cells[i] - w_name = self.space.wrap(name) - try: - w_value = self.space.getitem(self.w_locals, w_name) - except OperationError, e: - if not e.match(self.space, self.space.w_KeyError): - raise - else: + w_value = self.space.finditem_str(self.w_locals, name) + if w_value is not None: cell.set(w_value) @jit.unroll_safe From noreply at buildbot.pypy.org Wed Jun 18 00:51:40 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Wed, 18 Jun 2014 00:51:40 +0200 (CEST) Subject: [pypy-commit] pypy py3k: prefer find/setitem_str shortcuts Message-ID: <20140617225140.B8AA61D2D67@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: py3k Changeset: r72091:0ceafd01a9ce Date: 2014-06-17 14:57 -0700 http://bitbucket.org/pypy/pypy/changeset/0ceafd01a9ce/ Log: prefer find/setitem_str shortcuts (grafted from f48daf83db6a568e814467d1e2132ee8d046b1d3) diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py --- a/pypy/interpreter/pyframe.py +++ b/pypy/interpreter/pyframe.py @@ -514,10 +514,10 @@ for i in range(min(len(varnames), self.getcode().co_nlocals)): name = varnames[i] w_value = self.locals_stack_w[i] - w_name = self.space.wrap(name.decode('utf-8')) if w_value is not None: - self.space.setitem(self.w_locals, w_name, w_value) + self.space.setitem_str(self.w_locals, name, w_value) else: + w_name = self.space.wrap(name.decode('utf-8')) try: self.space.delitem(self.w_locals, w_name) except OperationError as e: @@ -537,8 +537,7 @@ except ValueError: pass else: - w_name = self.space.wrap(name) - self.space.setitem(self.w_locals, w_name, w_value) + self.space.setitem_str(self.w_locals, name, w_value) @jit.unroll_safe @@ -551,13 +550,9 @@ new_fastlocals_w = [None] * numlocals for i in range(min(len(varnames), numlocals)): - w_name = self.space.wrap(varnames[i].decode('utf-8')) - try: - w_value = self.space.getitem(self.w_locals, w_name) - except OperationError, e: - if not e.match(self.space, self.space.w_KeyError): - raise - else: + name = varnames[i] + w_value = self.space.finditem_str(self.w_locals, name) + if w_value is not None: new_fastlocals_w[i] = w_value self.setfastscope(new_fastlocals_w) @@ -566,13 +561,8 @@ for i in range(len(freevarnames)): name = freevarnames[i] cell = self.cells[i] - w_name = self.space.wrap(name) - try: - w_value = self.space.getitem(self.w_locals, w_name) - except OperationError, e: - if not e.match(self.space, self.space.w_KeyError): - raise - else: + w_value = self.space.finditem_str(self.w_locals, name) + if w_value is not None: cell.set(w_value) @jit.unroll_safe From noreply at buildbot.pypy.org Wed Jun 18 00:52:34 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Wed, 18 Jun 2014 00:52:34 +0200 (CEST) Subject: [pypy-commit] pypy py3k: _utf8 is quasi-immutable Message-ID: <20140617225234.96E1D1D2D67@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: py3k Changeset: r72092:f7ad4f75d93d Date: 2014-06-17 15:51 -0700 http://bitbucket.org/pypy/pypy/changeset/f7ad4f75d93d/ Log: _utf8 is quasi-immutable diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -24,7 +24,7 @@ class W_UnicodeObject(W_Root): import_from_mixin(StringMethods) - _immutable_fields_ = ['_value'] + _immutable_fields_ = ['_value', '_utf8?'] def __init__(w_self, unistr): assert isinstance(unistr, unicode) From noreply at buildbot.pypy.org Wed Jun 18 00:52:35 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Wed, 18 Jun 2014 00:52:35 +0200 (CEST) Subject: [pypy-commit] pypy py3k: reduce diff w/ default Message-ID: <20140617225235.DD8B71D2D67@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: py3k Changeset: r72093:04e9f1667223 Date: 2014-06-17 15:51 -0700 http://bitbucket.org/pypy/pypy/changeset/04e9f1667223/ Log: reduce diff w/ default diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py --- a/pypy/interpreter/pyframe.py +++ b/pypy/interpreter/pyframe.py @@ -164,12 +164,9 @@ for i in range(nfreevars): self.cells[i + ncellvars] = outer_func.closure[i] - def is_generator(self): - return self.getcode().co_flags & pycode.CO_GENERATOR - def run(self): """Start this frame's execution.""" - if self.is_generator(): + if self.getcode().co_flags & pycode.CO_GENERATOR: if self.getcode().co_flags & pycode.CO_YIELD_INSIDE_TRY: from pypy.interpreter.generator import GeneratorIteratorWithDel return self.space.wrap(GeneratorIteratorWithDel(self)) From noreply at buildbot.pypy.org Wed Jun 18 00:56:36 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Wed, 18 Jun 2014 00:56:36 +0200 (CEST) Subject: [pypy-commit] pypy py3k: prefer finditem_str Message-ID: <20140617225636.286911D2D67@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: py3k Changeset: r72094:5256cdf289b4 Date: 2014-06-17 15:53 -0700 http://bitbucket.org/pypy/pypy/changeset/5256cdf289b4/ Log: prefer finditem_str diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py --- a/pypy/interpreter/argument.py +++ b/pypy/interpreter/argument.py @@ -283,8 +283,7 @@ missing += 1 continue name = signature.kwonlyargnames[i - co_argcount] - w_name = self.space.wrap(name) - w_def = self.space.finditem(w_kw_defs, w_name) + w_def = self.space.finditem_str(w_kw_defs, name) if w_def is not None: scope_w[i] = w_def else: From noreply at buildbot.pypy.org Wed Jun 18 00:56:37 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Wed, 18 Jun 2014 00:56:37 +0200 (CEST) Subject: [pypy-commit] pypy py3k: w_kw_defs is quasi-immutable Message-ID: <20140617225637.9B0201D2D67@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: py3k Changeset: r72095:36152e49e8d9 Date: 2014-06-17 15:55 -0700 http://bitbucket.org/pypy/pypy/changeset/36152e49e8d9/ Log: w_kw_defs is quasi-immutable diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py --- a/pypy/interpreter/function.py +++ b/pypy/interpreter/function.py @@ -32,7 +32,8 @@ 'w_func_globals?', 'closure?[*]', 'defs_w?[*]', - 'name?'] + 'name?', + 'w_kw_defs?'] def __init__(self, space, code, w_globals=None, defs_w=[], w_kw_defs=None, closure=None, w_ann=None, forcename=None): From noreply at buildbot.pypy.org Wed Jun 18 03:37:50 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Wed, 18 Jun 2014 03:37:50 +0200 (CEST) Subject: [pypy-commit] pypy py3k: specialize range iterators when the ranges fit into machine sized integers and Message-ID: <20140618013750.D549C1D2E28@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: py3k Changeset: r72096:addd0f2aa776 Date: 2014-06-17 18:20 -0700 http://bitbucket.org/pypy/pypy/changeset/addd0f2aa776/ Log: specialize range iterators when the ranges fit into machine sized integers and further specialize when step is not specified (like the default branch does) diff --git a/pypy/module/__builtin__/functional.py b/pypy/module/__builtin__/functional.py --- a/pypy/module/__builtin__/functional.py +++ b/pypy/module/__builtin__/functional.py @@ -6,7 +6,8 @@ from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.error import OperationError, oefmt -from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault +from pypy.interpreter.gateway import ( + interp2app, interpindirect2app, unwrap_spec) from pypy.interpreter.typedef import TypeDef from rpython.rlib import jit from rpython.rlib.objectmodel import specialize @@ -307,15 +308,19 @@ class W_Range(W_Root): - def __init__(self, w_start, w_stop, w_step, w_length): + def __init__(self, w_start, w_stop, w_step, w_length, promote_step=False): self.w_start = w_start self.w_stop = w_stop self.w_step = w_step self.w_length = w_length + self.promote_step = promote_step - @unwrap_spec(w_step = WrappedDefault(1)) def descr_new(space, w_subtype, w_start, w_stop=None, w_step=None): w_start = space.index(w_start) + promote_step = False + if space.is_none(w_step): # no step argument provided + w_step = space.wrap(1) + promote_step = True if space.is_none(w_stop): # only 1 argument provided w_start, w_stop = space.newint(0), w_start else: @@ -331,7 +336,7 @@ "step argument must not be zero")) w_length = compute_range_length(space, w_start, w_stop, w_step) obj = space.allocate_instance(W_Range, w_subtype) - W_Range.__init__(obj, w_start, w_stop, w_step, w_length) + W_Range.__init__(obj, w_start, w_stop, w_step, w_length, promote_step) return space.wrap(obj) def descr_repr(self, space): @@ -386,8 +391,19 @@ return self._compute_item(space, w_index) def descr_iter(self, space): - return space.wrap(W_RangeIterator( - space, self.w_start, self.w_step, self.w_length)) + try: + start = space.int_w(self.w_start) + stop = space.int_w(self.w_stop) + step = space.int_w(self.w_step) + length = space.int_w(self.w_length) + except OperationError as e: + pass + else: + if self.promote_step: + return W_IntRangeStepOneIterator(space, start, stop) + return W_IntRangeIterator(space, start, length, step) + return W_LongRangeIterator(space, self.w_start, self.w_step, + self.w_length) def descr_reversed(self, space): # lastitem = self.start + (self.length-1) * self.step @@ -395,7 +411,7 @@ self.w_start, space.mul(space.sub(self.w_length, space.newint(1)), self.w_step)) - return space.wrap(W_RangeIterator( + return space.wrap(W_LongRangeIterator( space, w_lastitem, space.neg(self.w_step), self.w_length)) def descr_reduce(self, space): @@ -463,7 +479,22 @@ W_Range.typedef.acceptable_as_base_class = False -class W_RangeIterator(W_Root): +class W_AbstractRangeIterator(W_Root): + + def descr_iter(self, space): + return space.wrap(self) + + def descr_len(self, space): + raise NotImplementedError + + def descr_next(self, space): + raise NotImplementedError + + def descr_reduce(self, space): + raise NotImplementedError + + +class W_LongRangeIterator(W_AbstractRangeIterator): def __init__(self, space, w_start, w_step, w_len, w_index=None): self.w_start = w_start self.w_step = w_step @@ -472,9 +503,6 @@ w_index = space.newint(0) self.w_index = w_index - def descr_iter(self, space): - return space.wrap(self) - def descr_next(self, space): if space.is_true(space.lt(self.w_index, self.w_len)): w_index = space.add(self.w_index, space.newint(1)) @@ -489,23 +517,75 @@ def descr_reduce(self, space): from pypy.interpreter.mixedmodule import MixedModule + w_mod = space.getbuiltinmodule('_pickle_support') + mod = space.interp_w(MixedModule, w_mod) + w_args = space.newtuple([self.w_start, self.w_step, self.w_len, + self.w_index]) + return space.newtuple([mod.get('longrangeiter_new'), w_args]) + + +class W_IntRangeIterator(W_AbstractRangeIterator): + + def __init__(self, space, current, remaining, step): + self.current = current + self.remaining = remaining + self.step = step + + def descr_next(self, space): + return self.next(space) + + def next(self, space): + if self.remaining > 0: + item = self.current + self.current = item + self.step + self.remaining -= 1 + return space.wrap(item) + raise OperationError(space.w_StopIteration, space.w_None) + + def descr_len(self, space): + return self.get_remaining(space) + + def descr_reduce(self, space): + from pypy.interpreter.mixedmodule import MixedModule w_mod = space.getbuiltinmodule('_pickle_support') mod = space.interp_w(MixedModule, w_mod) + new_inst = mod.get('intrangeiter_new') + w = space.wrap + nt = space.newtuple - return space.newtuple( - [mod.get('rangeiter_new'), - space.newtuple([self.w_start, self.w_step, - self.w_len, self.w_index]), - ]) + tup = [w(self.current), self.get_remaining(space), w(self.step)] + return nt([new_inst, nt(tup)]) + def get_remaining(self, space): + return space.wrap(self.remaining) -W_RangeIterator.typedef = TypeDef("rangeiterator", - __iter__ = interp2app(W_RangeIterator.descr_iter), - __length_hint__ = interp2app(W_RangeIterator.descr_len), - __next__ = interp2app(W_RangeIterator.descr_next), - __reduce__ = interp2app(W_RangeIterator.descr_reduce), + +class W_IntRangeStepOneIterator(W_IntRangeIterator): + _immutable_fields_ = ['stop'] + + def __init__(self, space, start, stop): + self.current = start + self.stop = stop + self.step = 1 + + def next(self, space): + if self.current < self.stop: + item = self.current + self.current = item + 1 + return space.wrap(item) + raise OperationError(space.w_StopIteration, space.w_None) + + def get_remaining(self, space): + return space.wrap(self.stop - self.current) + + +W_AbstractRangeIterator.typedef = TypeDef("rangeiterator", + __iter__ = interp2app(W_AbstractRangeIterator.descr_iter), + __length_hint__ = interpindirect2app(W_AbstractRangeIterator.descr_len), + __next__ = interpindirect2app(W_AbstractRangeIterator.descr_next), + __reduce__ = interpindirect2app(W_AbstractRangeIterator.descr_reduce), ) -W_RangeIterator.typedef.acceptable_as_base_class = False +W_AbstractRangeIterator.typedef.acceptable_as_base_class = False class W_Map(W_Root): diff --git a/pypy/module/_pickle_support/__init__.py b/pypy/module/_pickle_support/__init__.py --- a/pypy/module/_pickle_support/__init__.py +++ b/pypy/module/_pickle_support/__init__.py @@ -19,7 +19,8 @@ 'frame_new' : 'maker.frame_new', 'traceback_new' : 'maker.traceback_new', 'generator_new' : 'maker.generator_new', - 'rangeiter_new': 'maker.rangeiter_new', + 'longrangeiter_new': 'maker.longrangeiter_new', + 'intrangeiter_new': 'maker.intrangeiter_new', 'builtin_code': 'maker.builtin_code', 'builtin_function' : 'maker.builtin_function', 'enumerate_new': 'maker.enumerate_new', diff --git a/pypy/module/_pickle_support/maker.py b/pypy/module/_pickle_support/maker.py --- a/pypy/module/_pickle_support/maker.py +++ b/pypy/module/_pickle_support/maker.py @@ -62,9 +62,15 @@ new_generator = instantiate(GeneratorIteratorWithDel) return space.wrap(new_generator) -def rangeiter_new(space, w_start, w_step, w_len, w_index): - from pypy.module.__builtin__.functional import W_RangeIterator - new_iter = W_RangeIterator(space, w_start, w_step, w_len, w_index) +def longrangeiter_new(space, w_start, w_step, w_len, w_index): + from pypy.module.__builtin__.functional import W_LongRangeIterator + new_iter = W_LongRangeIterator(space, w_start, w_step, w_len, w_index) + return space.wrap(new_iter) + + at unwrap_spec(current=int, remaining=int, step=int) +def intrangeiter_new(space, current, remaining, step): + from pypy.module.__builtin__.functional import W_IntRangeIterator + new_iter = W_IntRangeIterator(space, current, remaining, step) return space.wrap(new_iter) def operationerror_new(space): From noreply at buildbot.pypy.org Wed Jun 18 05:14:26 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Wed, 18 Jun 2014 05:14:26 +0200 (CEST) Subject: [pypy-commit] pypy py3k: improve __context__ setup vis-a-vis the JIT: Message-ID: <20140618031426.5E2591D2D67@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: py3k Changeset: r72097:53a145f0af1d Date: 2014-06-17 20:10 -0700 http://bitbucket.org/pypy/pypy/changeset/53a145f0af1d/ Log: improve __context__ setup vis-a-vis the JIT: defer its setup, when possible, until __context__ is explicitly requested by searching for it through the traceback. it's not possible to defer when __context__ comes from the current frame as the new exception eventually overwrites it (in frame.last_exception) in that case we do the setup sooner, when recording the traceback, with little cost.. until we need to break __context__ chain cycles (which I'll partly disable in the next commit) diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py --- a/pypy/interpreter/error.py +++ b/pypy/interpreter/error.py @@ -36,44 +36,11 @@ def setup(self, w_type, w_value=None): assert w_type is not None - from pypy.objspace.std.typeobject import W_TypeObject self.w_type = w_type self._w_value = w_value - # HACK: isinstance(w_type, W_TypeObject) won't translate under - # the fake objspace, but w_type.__class__ is W_TypeObject does - # and short circuits to a False constant there, causing the - # isinstance to be ignored =[ - if (w_type is not None and w_type.__class__ is W_TypeObject and - isinstance(w_type, W_TypeObject)): - self.setup_context(w_type.space) if not we_are_translated(): self.debug_excs = [] - def setup_context(self, space): - # Implicit exception chaining - last_operror = space.getexecutioncontext().sys_exc_info() - if (last_operror is None or - last_operror is get_cleared_operation_error(space)): - return - - # We must normalize the value right now to check for cycles - self.normalize_exception(space) - w_value = self.get_w_value(space) - w_last_value = last_operror.get_w_value(space) - if not space.is_w(w_value, w_last_value): - # Avoid reference cycles through the context chain. This is - # O(chain length) but context chains are usually very short. - w_obj = w_last_value - while True: - w_context = space.getattr(w_obj, space.wrap('__context__')) - if space.is_w(w_context, space.w_None): - break - if space.is_w(w_context, w_value): - space.setattr(w_obj, space.wrap('__context__'), space.w_None) - break - w_obj = w_context - space.setattr(w_value, space.wrap('__context__'), w_last_value) - def clear(self, space): # XXX remove this method. The point is that we cannot always # hack at 'self' to clear w_type and _w_value, because in some @@ -350,6 +317,51 @@ """ self._application_traceback = traceback + def record_context(self, space, frame): + """Record a __context__ for this exception from the current + frame if one exists. + + __context__ is otherwise lazily determined from the + traceback. However the current frame.last_exception must be + checked for a __context__ before this OperationError overwrites + it (making the previous last_exception unavailable later on). + """ + last_exception = frame.last_exception + if (last_exception is not None and not frame.hide() or + last_exception is get_cleared_operation_error(space)): + # normalize w_value so setup_context can check for cycles + self.normalize_exception(space) + w_value = self.get_w_value(space) + w_context = setup_context(space, w_value, + last_exception.get_w_value(space)) + space.setattr(w_value, space.wrap('__context__'), w_context) + + +def setup_context(space, w_exc, w_last): + """Determine the __context__ for w_exc from w_last and break + reference cycles in the __context__ chain. + """ + if space.is_w(w_exc, w_last): + w_last = space.w_None + # w_last may also be space.w_None if from ClearedOpErr + if not space.is_w(w_last, space.w_None): + # Avoid reference cycles through the context chain. This is + # O(chain length) but context chains are usually very short. + w_obj = w_last + while True: + # XXX: __context__ becomes not so lazy when we're forced to + # access it here! Could this be defered till later? Or at + # least limit the check to W_BaseException.w_context + # (avoiding W_BaseException._setup_context) + w_context = space.getattr(w_obj, space.wrap('__context__')) + if space.is_w(w_context, space.w_None): + break + if space.is_w(w_context, w_exc): + space.setattr(w_obj, space.wrap('__context__'), space.w_None) + break + w_obj = w_context + return w_last + class ClearedOpErr: def __init__(self, space): diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py --- a/pypy/interpreter/executioncontext.py +++ b/pypy/interpreter/executioncontext.py @@ -202,18 +202,21 @@ self._trace(frame, 'exception', None, operationerr) #operationerr.print_detailed_traceback(self.space) + @staticmethod + def last_operr(space, frame): + while frame: + last = frame.last_exception + if (last is not None and + (not frame.hide() or + last is get_cleared_operation_error(space))): + return last + frame = frame.f_backref() + return None + def sys_exc_info(self): # attn: the result is not the wrapped sys.exc_info() !!! """Implements sys.exc_info(). Return an OperationError instance or None.""" - frame = self.gettopframe() - while frame: - if frame.last_exception is not None: - if (not frame.hide() or - frame.last_exception is - get_cleared_operation_error(self.space)): - return frame.last_exception - frame = frame.f_backref() - return None + return self.last_operr(self.space, self.gettopframe()) def set_sys_exc_info(self, operror): frame = self.gettopframe_nohidden() diff --git a/pypy/interpreter/pytraceback.py b/pypy/interpreter/pytraceback.py --- a/pypy/interpreter/pytraceback.py +++ b/pypy/interpreter/pytraceback.py @@ -57,6 +57,7 @@ tb = operror.get_traceback() tb = PyTraceback(space, frame, last_instruction, tb) operror.set_traceback(tb) + operror.record_context(space, frame) def check_traceback(space, w_tb, msg): diff --git a/pypy/interpreter/test/test_raise.py b/pypy/interpreter/test/test_raise.py --- a/pypy/interpreter/test/test_raise.py +++ b/pypy/interpreter/test/test_raise.py @@ -369,6 +369,23 @@ else: fail("No exception raised") + def test_context_once_removed(self): + context = IndexError() + def func1(): + func2() + def func2(): + try: + 1/0 + except ZeroDivisionError as e: + assert e.__context__ is context + else: + fail('No exception raised') + try: + raise context + except: + func1() + + class AppTestTraceback: def test_raise_with___traceback__(self): diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py --- a/pypy/module/exceptions/interp_exceptions.py +++ b/pypy/module/exceptions/interp_exceptions.py @@ -76,8 +76,8 @@ from pypy.interpreter.typedef import (TypeDef, GetSetProperty, descr_get_dict, descr_set_dict, descr_del_dict) from pypy.interpreter.gateway import interp2app -from pypy.interpreter.error import OperationError -from pypy.interpreter.pytraceback import check_traceback +from pypy.interpreter.error import OperationError, setup_context +from pypy.interpreter.pytraceback import PyTraceback, check_traceback from rpython.rlib import rwin32 @@ -156,7 +156,27 @@ self.w_cause = w_newcause def descr_getcontext(self, space): - return self.w_context + w_context = self.w_context + if w_context is None: + self.w_context = w_context = self._setup_context(space) + return w_context + + def _setup_context(self, space): + """Lazily determine __context__ from w_traceback""" + # XXX: w_traceback can be overwritten: it's not necessarily the + # authoratative traceback! + last_operr = None + w_traceback = self.w_traceback + if w_traceback is not None and isinstance(w_traceback, PyTraceback): + ec = space.getexecutioncontext() + # search for __context__ beginning in the previous frame. A + # __context__ from the top most frame would have already + # been handled by OperationError.record_context + last_operr = ec.last_operr(space, w_traceback.frame.f_backref()) + if last_operr is None: + # no __context__ + return space.w_None + return setup_context(space, self, last_operr.get_w_value(space)) def descr_setcontext(self, space, w_newcontext): if not (space.is_w(w_newcontext, space.w_None) or From noreply at buildbot.pypy.org Wed Jun 18 09:58:26 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Wed, 18 Jun 2014 09:58:26 +0200 (CEST) Subject: [pypy-commit] extradoc extradoc: some more adjustments Message-ID: <20140618075826.C4C551D2371@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: extradoc Changeset: r5350:e2e5d3146e7d Date: 2014-06-18 09:58 +0200 http://bitbucket.org/pypy/extradoc/changeset/e2e5d3146e7d/ Log: some more adjustments diff --git a/talk/icooolps2014/position-paper.pdf b/talk/icooolps2014/position-paper.pdf index e7a9e9a08d66ba2c9c26bafa1c59d5d4a2e7bf14..b00fd170ada9fd1fb8aad7baab5faa1349382066 GIT binary patch [cut] diff --git a/talk/icooolps2014/position-paper.tex b/talk/icooolps2014/position-paper.tex --- a/talk/icooolps2014/position-paper.tex +++ b/talk/icooolps2014/position-paper.tex @@ -16,6 +16,11 @@ \usepackage{amsmath} \usepackage{amssymb} +% Keine "Schusterjungen" +\clubpenalty = 10000 +% Keine "Hurenkinder" +\widowpenalty = 10000 \displaywidowpenalty = 10000 + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% listings \usepackage{float} @@ -307,9 +312,9 @@ \item[Existing applications:] How big are the changes required to integrate with and parallelise existing applications? \item[Better synchronisation:] Does the approach enable better, - paralleliseable synchronisation mechanisms for applications + parallelisable synchronisation mechanisms for applications (e.g.\ atomic blocks)? Many synchronisation mechanisms can be built on - top of all solutions (e.g.\ message passing). We look for mechanisms + top of all solutions (e.g.\ message passing, monitors). We look for mechanisms that are directly enabled by the contending approaches. \item[Implementation:] How difficult is it to implement the approach in the interpreter? @@ -456,14 +461,18 @@ However, STM compared to HTM does not suffer from the same restricting limitations. Transactions can in principle be arbitrarily long. This makes it -possible to actually expose transactions to the application in the -form of atomic blocks. This is the only approach that enables a better -synchronisation mechanism than locks for applications \emph{and} still -parallelises when using it. We think this is a very important point -because it not only gives dynamic languages the ability to parallelise -(already commonplace in most other languages), but also pushes -parallel programming forward. Together with sequential consistency it -provides a lot of simplification for parallel applications. +possible to expose transactions to the application in the +form of atomic blocks -- thereby attacking the issues +of parallelisation and synchronisation in a unified way. While many +synchronisation mechanisms can be bolted on top of any GIL replacement, +this is the only approach that directly enables a better, parallelising +synchronisation mechanism than locks. We think this is a very +important point because it not only gives dynamic languages the +ability to parallelise (already commonplace in most other languages), +but also pushes parallel programming forward in a way that other +approaches cannot. Together with sequential consistency, it provides an +environment for parallel applications that has much less surprises than +e.g.\ Java or C\#. On the implementation level, while one can argue that STM requires the insertion of read and write @@ -491,7 +500,7 @@ according to the criteria described in \ref{sec:pot_solutions}. Since the criteria are defined intuitively, there are no formal justifications for the number of points. The reader is thus advised to take the result -with a grain of salt. +with a grain of salt and form their own opinion. The general picture is everything else than clear. It looks like HTM may be a good solution to replace the GIL in the near future. Current @@ -503,10 +512,11 @@ parallelism. This is why we think that only the STM approach is a viable solution in the long-term. It unifies both, the simple memory model (sequential consistency) and the synchronisation of memory accesses -using composable atomic blocks. It is not \emph{only} a simple GIL +using composable atomic blocks. It is not \emph{just} a simple GIL replacement. -Unfortunately, STM has a big performance problem. Particularly, for +Unfortunately, STM has a big performance problem, which currently +makes it lose this comparison. Particularly, for our use case there is not much static information available since we are executing a program only known at runtime. Additionally, replacing the GIL means running every part of the application in transactions, @@ -518,7 +528,8 @@ currently working on an STM system that makes use of several such features like virtual memory and memory segmentation. We further tailor the system to the discussed use case, which gives us an -advantage over other STM systems that are more general. With this +advantage over other STM systems that try to be more general or +simply focus on other use cases. With this approach, initial results suggest that we can keep the overhead of STM well below 50\%. A hybrid TM system, which also uses HTM to accelerate certain tasks, looks like a very promising direction of research @@ -528,7 +539,7 @@ very worthwhile. In fact, considering some analogies that have been drawn between garbage collection and transactional memory~\cite{dan07}, we think that it is worthwhile to focus the STM research more -specifically onto the context shown in this paper --- for use in +specifically onto the context shown in this paper -- for use in implementations of high-level languages, rather than as a tool directly used by the programmer. From noreply at buildbot.pypy.org Wed Jun 18 11:23:02 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 18 Jun 2014 11:23:02 +0200 (CEST) Subject: [pypy-commit] extradoc extradoc: Two minor fixes (too late? never mind) Message-ID: <20140618092302.09AFD1D257D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: extradoc Changeset: r5351:d4f5afac3210 Date: 2014-06-18 11:22 +0200 http://bitbucket.org/pypy/extradoc/changeset/d4f5afac3210/ Log: Two minor fixes (too late? never mind) diff --git a/talk/icooolps2014/position-paper.tex b/talk/icooolps2014/position-paper.tex --- a/talk/icooolps2014/position-paper.tex +++ b/talk/icooolps2014/position-paper.tex @@ -194,7 +194,9 @@ multithreading in an interpreter. The basic guarantee is that the GIL may only be released in between bytecode instructions\footnote{This also applies to Abstract Syntax Tree (AST) interpreters, where the GIL -may only be released between interpreting two AST nodes.}. The interpreter +may only be released between interpreting two AST nodes. We talk about +``bytecode instructions'' in a general way as a basic step in the +interpreter.}. The interpreter can thus rely on complete isolation and atomicity for the instructions' execution. Also, accesses to data structures like dictionaries and lists happen atomically and do not need additional @@ -307,7 +309,8 @@ limitations: \begin{description} -\item[Performance:] How much does the approach impact performance on a single +\item[Performance:] How much does the approach impact performance + on a single thread and how much on multiple threads? Can it make use of parallelism? \item[Existing applications:] How big are the changes required to integrate with and parallelise existing applications? From noreply at buildbot.pypy.org Wed Jun 18 11:42:48 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 18 Jun 2014 11:42:48 +0200 (CEST) Subject: [pypy-commit] extradoc extradoc: UTFize Fijal's name Message-ID: <20140618094248.8C3B61D2DBE@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: extradoc Changeset: r5352:0ee593f18c83 Date: 2014-06-18 11:42 +0200 http://bitbucket.org/pypy/extradoc/changeset/0ee593f18c83/ Log: UTFize Fijal's name diff --git a/talk/icooolps2014/position-paper.pdf b/talk/icooolps2014/position-paper.pdf index b00fd170ada9fd1fb8aad7baab5faa1349382066..cf64998cd6ea41afd4e7e312b959b6da1fcc7087 GIT binary patch [cut] diff --git a/talk/icooolps2014/position-paper.tex b/talk/icooolps2014/position-paper.tex --- a/talk/icooolps2014/position-paper.tex +++ b/talk/icooolps2014/position-paper.tex @@ -558,7 +558,7 @@ %% This is the text of the appendix, if you need one. \acks -We would like to thank Maciej Fijalkowski and Carl Friedrich Bolz for +We would like to thank Maciej Fijałkowski and Carl Friedrich Bolz for their valuable inputs and the many fruitful discussions. % We recommend abbrvnat bibliography style. From noreply at buildbot.pypy.org Wed Jun 18 23:06:33 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Wed, 18 Jun 2014 23:06:33 +0200 (CEST) Subject: [pypy-commit] pypy py3k: avoid triggering the now lazy __context__ setup 'up front' (when grabbing it Message-ID: <20140618210633.4C8291C362C@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: py3k Changeset: r72098:3856e33d85ad Date: 2014-06-18 13:09 -0700 http://bitbucket.org/pypy/pypy/changeset/3856e33d85ad/ Log: avoid triggering the now lazy __context__ setup 'up front' (when grabbing it from the current frame) when breaking __context__ chain cycles, for now. this is a trade-off: we won't break some cycles in obscure situations for the sake of not paying a cost in probably more situations diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py --- a/pypy/interpreter/error.py +++ b/pypy/interpreter/error.py @@ -332,15 +332,16 @@ # normalize w_value so setup_context can check for cycles self.normalize_exception(space) w_value = self.get_w_value(space) - w_context = setup_context(space, w_value, - last_exception.get_w_value(space)) + w_last = last_exception.get_w_value(space) + w_context = setup_context(space, w_value, w_last, lazy=True) space.setattr(w_value, space.wrap('__context__'), w_context) -def setup_context(space, w_exc, w_last): +def setup_context(space, w_exc, w_last, lazy=False): """Determine the __context__ for w_exc from w_last and break reference cycles in the __context__ chain. """ + from pypy.module.exceptions.interp_exceptions import W_BaseException if space.is_w(w_exc, w_last): w_last = space.w_None # w_last may also be space.w_None if from ClearedOpErr @@ -349,15 +350,16 @@ # O(chain length) but context chains are usually very short. w_obj = w_last while True: - # XXX: __context__ becomes not so lazy when we're forced to - # access it here! Could this be defered till later? Or at - # least limit the check to W_BaseException.w_context - # (avoiding W_BaseException._setup_context) - w_context = space.getattr(w_obj, space.wrap('__context__')) - if space.is_w(w_context, space.w_None): + assert isinstance(w_obj, W_BaseException) + if lazy: + w_context = w_obj.w_context + else: + # triggers W_BaseException._setup_context + w_context = space.getattr(w_obj, space.wrap('__context__')) + if space.is_none(w_context): break if space.is_w(w_context, w_exc): - space.setattr(w_obj, space.wrap('__context__'), space.w_None) + w_obj.w_context = space.w_None break w_obj = w_context return w_last diff --git a/pypy/interpreter/test/test_raise.py b/pypy/interpreter/test/test_raise.py --- a/pypy/interpreter/test/test_raise.py +++ b/pypy/interpreter/test/test_raise.py @@ -385,6 +385,27 @@ except: func1() + @py.test.mark.xfail(reason="A somewhat contrived case that may burden the " + "JIT to fully support") + def test_frame_spanning_cycle_broken(self): + context = IndexError() + def func(): + try: + 1/0 + except Exception as e1: + try: + raise context + except Exception as e2: + assert e2.__context__ is e1 + # XXX: + assert e1.__context__ is None + else: + fail('No exception raised') + try: + raise context + except: + func() + class AppTestTraceback: From noreply at buildbot.pypy.org Wed Jun 18 23:23:23 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Wed, 18 Jun 2014 23:23:23 +0200 (CEST) Subject: [pypy-commit] pypy py3k: unneeded import Message-ID: <20140618212323.155F61C362C@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: py3k Changeset: r72099:6f7eee4959af Date: 2014-06-18 14:20 -0700 http://bitbucket.org/pypy/pypy/changeset/6f7eee4959af/ Log: unneeded import diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py --- a/pypy/module/exceptions/interp_exceptions.py +++ b/pypy/module/exceptions/interp_exceptions.py @@ -187,7 +187,6 @@ self.w_context = w_newcontext def descr_gettraceback(self, space): - from pypy.interpreter.pytraceback import PyTraceback tb = self.w_traceback if tb is not None and isinstance(tb, PyTraceback): # tb escapes to app level (see OperationError.get_traceback) From noreply at buildbot.pypy.org Wed Jun 18 23:48:36 2014 From: noreply at buildbot.pypy.org (mattip) Date: Wed, 18 Jun 2014 23:48:36 +0200 (CEST) Subject: [pypy-commit] pypy default: fix for MSVC by allowing additional code in init function (like cpyext/test/foo3.c) Message-ID: <20140618214836.C6BAF1C1017@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72100:6b9f7c1857b7 Date: 2014-06-18 22:07 +0300 http://bitbucket.org/pypy/pypy/changeset/6b9f7c1857b7/ Log: fix for MSVC by allowing additional code in init function (like cpyext/test/foo3.c) diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -301,9 +301,9 @@ space.sys.get('modules'), space.wrap(name)) - @unwrap_spec(modname=str, prologue=str, PY_SSIZE_T_CLEAN=bool) + @unwrap_spec(modname=str, prologue=str, more_init=str, PY_SSIZE_T_CLEAN=bool) def import_extension(space, modname, w_functions, prologue="", - PY_SSIZE_T_CLEAN=False): + more_init="", PY_SSIZE_T_CLEAN=False): functions = space.unwrap(w_functions) methods_table = [] codes = [] @@ -326,6 +326,8 @@ }; """ % ('\n'.join(methods_table),) init = """Py_InitModule("%s", methods);""" % (modname,) + if more_init: + init += more_init return import_module(space, name=modname, init=init, body=body, PY_SSIZE_T_CLEAN=PY_SSIZE_T_CLEAN) diff --git a/pypy/module/cpyext/test/test_intobject.py b/pypy/module/cpyext/test/test_intobject.py --- a/pypy/module/cpyext/test/test_intobject.py +++ b/pypy/module/cpyext/test/test_intobject.py @@ -150,7 +150,7 @@ /*tp_methods*/ 0, /*tp_members*/ enum_members, /*tp_getset*/ 0, - /*tp_base*/ &PyInt_Type, + /*tp_base*/ 0, /* set to &PyInt_Type in init function for MSVC */ /*tp_dict*/ 0, /*tp_descr_get*/ 0, /*tp_descr_set*/ 0, @@ -159,7 +159,9 @@ /*tp_alloc*/ 0, /*tp_new*/ 0 }; - """) + """, more_init = ''' + Enum_Type.tp_base = &PyInt_Type; + ''') a = module.newEnum("ULTIMATE_ANSWER", 42) assert type(a).__name__ == "Enum" @@ -173,12 +175,13 @@ ("test_int", "METH_NOARGS", """ PyObject * obj = PyInt_FromLong(42); + PyObject * val; if (!PyInt_Check(obj)) { Py_DECREF(obj); PyErr_SetNone(PyExc_ValueError); return NULL; } - PyObject * val = PyInt_FromLong(((PyIntObject *)obj)->ob_ival); + val = PyInt_FromLong(((PyIntObject *)obj)->ob_ival); Py_DECREF(obj); return val; """ From noreply at buildbot.pypy.org Thu Jun 19 10:56:53 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Thu, 19 Jun 2014 10:56:53 +0200 (CEST) Subject: [pypy-commit] stmgc instrumented: some more data Message-ID: <20140619085653.91B671D293A@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: instrumented Changeset: r1250:c26593f884a3 Date: 2014-05-28 09:16 +0200 http://bitbucket.org/pypy/stmgc/changeset/c26593f884a3/ Log: some more data diff --git a/c7/stm/setup.c b/c7/stm/setup.c --- a/c7/stm/setup.c +++ b/c7/stm/setup.c @@ -94,8 +94,35 @@ #ifdef MEASURE_MEM struct rusage usage; getrusage(RUSAGE_SELF, &usage); - fprintf(stderr, "{%f:%ld/%ld}\n", time, - (long)pages_ctl.total_allocated, usage.ru_maxrss*1024); + + uintptr_t pagenum, endpagenum, + total_used_pages = 0, total_privatized_pages = 0; + pagenum = END_NURSERY_PAGE; /* starts after the nursery */ + endpagenum = (uninitialized_page_start - stm_object_pages) / 4096UL; + while (1) { + if (UNLIKELY(pagenum == endpagenum)) { + /* we reach this point usually twice, because there are + more pages after 'uninitialized_page_stop' */ + if (endpagenum == NB_PAGES) + break; /* done */ + pagenum = (uninitialized_page_stop - stm_object_pages) / 4096UL; + endpagenum = NB_PAGES; + continue; + } + + total_used_pages++; + long i; + for (i = 1; i <= NB_SEGMENTS; i++) { + if (is_private_page(i, pagenum)) { + total_privatized_pages++; + } + } + pagenum++; + } + + fprintf(stderr, "{%f:%ld/%ld/%f}\n", time, + (long)pages_ctl.total_allocated, usage.ru_maxrss*1024, + (float)total_privatized_pages / total_used_pages); #endif } return NULL; From noreply at buildbot.pypy.org Thu Jun 19 10:56:54 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Thu, 19 Jun 2014 10:56:54 +0200 (CEST) Subject: [pypy-commit] stmgc card-marking: Try to cope with the missing HAS_CARDS flag and the refactoring. Unclear if Message-ID: <20140619085654.C11D31D293A@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: card-marking Changeset: r1251:aa5874a86f3a Date: 2014-06-19 10:57 +0200 http://bitbucket.org/pypy/stmgc/changeset/aa5874a86f3a/ Log: Try to cope with the missing HAS_CARDS flag and the refactoring. Unclear if better, since it is slower in some cases. Not sure if I grasped the full idea. diff --git a/c7/stm/core.c b/c7/stm/core.c --- a/c7/stm/core.c +++ b/c7/stm/core.c @@ -48,8 +48,6 @@ i.e. it comes from before the most recent minor collection. */ assert(STM_PSEGMENT->objects_pointing_to_nursery != NULL); - dprintf_test(("write_slowpath %p -> ovf obj_to_nurs, index:%lu\n", - obj, mark_card ? index : (uintptr_t)-1)); assert(obj->stm_flags & GCFLAG_WRITE_BARRIER); if (!mark_card) { @@ -57,6 +55,11 @@ into 'objects_pointing_to_nursery', and remove the flag so that the write_slowpath will not be called again until the next minor collection. */ + if (obj->stm_flags & GCFLAG_CARDS_SET) { + /* if we clear this flag, we also need to clear the cards */ + _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num), + obj, CARD_CLEAR, false); + } obj->stm_flags &= ~(GCFLAG_WRITE_BARRIER | GCFLAG_CARDS_SET); LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj); } @@ -195,6 +198,13 @@ LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj); } + if (obj->stm_flags & GCFLAG_CARDS_SET) { + /* if we clear this flag, we have to tell sync_old_objs that + everything needs to be synced */ + _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num), + obj, CARD_MARKED_OLD, true); /* mark all */ + } + /* remove GCFLAG_WRITE_BARRIER if we succeeded in getting the base write-lock (not for card marking). */ obj->stm_flags &= ~(GCFLAG_WRITE_BARRIER | GCFLAG_CARDS_SET); @@ -213,7 +223,19 @@ void _stm_write_slowpath(object_t *obj) { - write_slowpath_common(obj, /*mark_card=*/false, -1); + write_slowpath_common(obj, /*mark_card=*/false); +} + +static bool obj_should_use_cards(object_t *obj) +{ + struct object_s *realobj = (struct object_s *) + REAL_ADDRESS(STM_SEGMENT->segment_base, obj); + size_t size = stmcb_size_rounded_up(realobj); + + if (size < _STM_MIN_CARD_OBJ_SIZE) + return false; + + return !!stmcb_should_use_cards(realobj); } void _stm_write_slowpath_card(object_t *obj, uintptr_t index) @@ -223,12 +245,15 @@ card marking instead. */ if (!(obj->stm_flags & GCFLAG_CARDS_SET)) { - bool mark_card = obj_uses_cards(obj); + bool mark_card = obj_should_use_cards(obj); write_slowpath_common(obj, mark_card); if (!mark_card) return; } + dprintf_test(("write_slowpath_card %p -> index:%lu\n", + obj, index)); + /* We reach this point if we have to mark the card. */ assert(obj->stm_flags & GCFLAG_WRITE_BARRIER); @@ -258,9 +283,10 @@ /* More debug checks */ dprintf(("mark %p index %lu, card:%lu with %d\n", obj, index, get_index_to_card_index(index), CARD_MARKED)); - assert(IMPLY(IS_OVERFLOW_OBJ(obj), write_locks[base_lock_idx] == 0)); - assert(IMPLY(!IS_OVERFLOW_OBJ(obj), write_locks[base_lock_idx] == - STM_PSEGMENT->write_lock_num)); + assert(IMPLY(IS_OVERFLOW_OBJ(STM_PSEGMENT, obj), + write_locks[base_lock_idx] == 0)); + assert(IMPLY(!IS_OVERFLOW_OBJ(STM_PSEGMENT, obj), + write_locks[base_lock_idx] == STM_PSEGMENT->write_lock_num)); } static void reset_transaction_read_version(void) @@ -530,7 +556,7 @@ static void _card_wise_synchronize_object_now(object_t *obj) { - assert(obj->stm_flags & GCFLAG_HAS_CARDS); + assert(obj_should_use_cards(obj)); assert(!(obj->stm_flags & GCFLAG_CARDS_SET)); assert(!IS_OVERFLOW_OBJ(STM_PSEGMENT, obj)); @@ -563,6 +589,7 @@ of pages (except _has_private_page_in_range) */ uintptr_t base_offset; ssize_t item_size; + bool all_cards_were_cleared = true; stmcb_get_card_base_itemsize(realobj, &base_offset, &item_size); uintptr_t start_card_index = -1; @@ -573,6 +600,7 @@ OPT_ASSERT(card_value != CARD_MARKED); /* always only MARKED_OLD or CLEAR */ if (card_value == CARD_MARKED_OLD) { + all_cards_were_cleared = false; write_locks[card_lock_idx] = CARD_CLEAR; if (start_card_index == -1) { /* first marked card */ @@ -637,6 +665,13 @@ card_index++; } + if (all_cards_were_cleared) { + /* well, seems like we never called stm_write_card() on it, so actually + we need to fall back to synchronize the whole object */ + _page_wise_synchronize_object_now(obj); + return; + } + #ifndef NDEBUG char *src = REAL_ADDRESS(stm_object_pages, (uintptr_t)obj); char *dst; @@ -661,9 +696,9 @@ assert(STM_PSEGMENT->privatization_lock == 1); if (obj->stm_flags & GCFLAG_SMALL_UNIFORM) { - assert(!(obj->stm_flags & GCFLAG_HAS_CARDS)); + assert(!(obj->stm_flags & GCFLAG_CARDS_SET)); abort();//XXX WRITE THE FAST CASE - } else if (ignore_cards || !(obj->stm_flags & GCFLAG_HAS_CARDS)) { + } else if (ignore_cards || !obj_should_use_cards(obj)) { _page_wise_synchronize_object_now(obj); } else { _card_wise_synchronize_object_now(obj); @@ -722,7 +757,7 @@ /* reset these lists to NULL for the next transaction */ _verify_cards_cleared_in_all_lists(get_priv_segment(STM_SEGMENT->segment_num)); LIST_FREE(STM_PSEGMENT->objects_pointing_to_nursery); - LIST_FREE(STM_PSEGMENT->old_objects_with_cards); + list_clear(STM_PSEGMENT->old_objects_with_cards); LIST_FREE(STM_PSEGMENT->large_overflow_objects); timing_end_transaction(attribute_to); @@ -833,7 +868,7 @@ ssize_t size = stmcb_size_rounded_up((struct object_s *)src); memcpy(dst, src, size); - if (item->stm_flags & GCFLAG_HAS_CARDS) + if (obj_should_use_cards(item)) _reset_object_cards(pseg, item, CARD_CLEAR, false); /* objects in 'modified_old_objects' usually have the @@ -910,7 +945,7 @@ /* reset these lists to NULL too on abort */ LIST_FREE(pseg->objects_pointing_to_nursery); - LIST_FREE(pseg->old_objects_with_cards); + list_clear(pseg->old_objects_with_cards); LIST_FREE(pseg->large_overflow_objects); list_clear(pseg->young_weakrefs); #pragma pop_macro("STM_SEGMENT") diff --git a/c7/stm/core.h b/c7/stm/core.h --- a/c7/stm/core.h +++ b/c7/stm/core.h @@ -56,9 +56,10 @@ after the object. */ GCFLAG_HAS_SHADOW = 0x04, - /* Set on objects that are large enough to have multiple cards - (at least _STM_MIN_CARD_COUNT), and that have at least one card - marked. This flag implies GCFLAG_WRITE_BARRIER. */ + /* Set on objects that are large enough (_STM_MIN_CARD_OBJ_SIZE) + to have multiple cards (at least _STM_MIN_CARD_COUNT), and that + have at least one card marked. This flag implies + GCFLAG_WRITE_BARRIER. */ GCFLAG_CARDS_SET = _STM_GCFLAG_CARDS_SET, /* All remaining bits of the 32-bit 'stm_flags' field are taken by diff --git a/c7/stm/gcpage.c b/c7/stm/gcpage.c --- a/c7/stm/gcpage.c +++ b/c7/stm/gcpage.c @@ -119,8 +119,6 @@ object_t *o = (object_t *)(p - stm_object_pages); o->stm_flags = GCFLAG_WRITE_BARRIER; - if (use_cards && size_rounded_up > CARD_SIZE) - o->stm_flags |= GCFLAG_HAS_CARDS; if (testing_prebuilt_objs == NULL) testing_prebuilt_objs = list_create(); @@ -472,42 +470,39 @@ realobj->stm_flags |= GCFLAG_WRITE_BARRIER; - /* logic corresponds to _collect_now() in nursery.c */ - if (realobj->stm_flags & GCFLAG_HAS_CARDS) { - /* We called a normal WB on these objs. If we wrote - a value to some place in them, we need to - synchronise the whole object on commit */ - if (IS_OVERFLOW_OBJ(pseg, realobj)) { - /* we do not need the old cards for overflow objects */ + if (realobj->stm_flags & GCFLAG_CARDS_SET) { + /* we called a normal WB on this object, so all cards + need to be marked OLD */ + if (!IS_OVERFLOW_OBJ(pseg, realobj)) { + _reset_object_cards(pseg, item, CARD_MARKED_OLD, true); /* mark all */ + } else { + /* simply clear overflow */ _reset_object_cards(pseg, item, CARD_CLEAR, false); - } else { - _reset_object_cards(pseg, item, CARD_MARKED_OLD, true); /* mark all */ } } })); list_clear(lst); - - lst = pseg->old_objects_with_cards; - LIST_FOREACH_R(lst, object_t* /*item*/, - ({ - struct object_s *realobj = (struct object_s *) - REAL_ADDRESS(pseg->pub.segment_base, item); - OPT_ASSERT(realobj->stm_flags & GCFLAG_CARDS_SET); - OPT_ASSERT(realobj->stm_flags & GCFLAG_WRITE_BARRIER); - - /* logic corresponds to _trace_card_object() in nursery.c */ - uint8_t mark_value = IS_OVERFLOW_OBJ(pseg, realobj) ? - CARD_CLEAR : CARD_MARKED_OLD; - _reset_object_cards(pseg, item, mark_value, false); - })); - list_clear(lst); - } else { /* if here MINOR_NOTHING_TO_DO() was true before, it's like we "didn't do a collection" at all. So nothing to do on modified_old_objs. */ } + lst = pseg->old_objects_with_cards; + LIST_FOREACH_R(lst, object_t* /*item*/, + ({ + struct object_s *realobj = (struct object_s *) + REAL_ADDRESS(pseg->pub.segment_base, item); + OPT_ASSERT(realobj->stm_flags & GCFLAG_CARDS_SET); + OPT_ASSERT(realobj->stm_flags & GCFLAG_WRITE_BARRIER); + + /* clear cards if overflow, or mark marked cards as old otherwise */ + uint8_t mark_value = IS_OVERFLOW_OBJ(pseg, realobj) ? + CARD_CLEAR : CARD_MARKED_OLD; + _reset_object_cards(pseg, item, mark_value, false); + })); + list_clear(lst); + /* Remove from 'large_overflow_objects' all objects that die */ lst = pseg->large_overflow_objects; if (lst != NULL) { diff --git a/c7/stm/nursery.c b/c7/stm/nursery.c --- a/c7/stm/nursery.c +++ b/c7/stm/nursery.c @@ -114,8 +114,6 @@ copy_large_object:; char *realnobj = REAL_ADDRESS(STM_SEGMENT->segment_base, nobj); memcpy(realnobj, realobj, size); - if (size > CARD_SIZE && stmcb_should_use_cards((struct object_s*)realnobj)) - nobj->stm_flags |= GCFLAG_HAS_CARDS; nobj_sync_now = ((uintptr_t)nobj) | FLAG_SYNC_LARGE; } @@ -141,11 +139,6 @@ nobj = obj; tree_delete_item(STM_PSEGMENT->young_outside_nursery, (uintptr_t)nobj); nobj_sync_now = ((uintptr_t)nobj) | FLAG_SYNC_LARGE; - - realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj); - size = stmcb_size_rounded_up((struct object_s *)realobj); - if (size > CARD_SIZE && stmcb_should_use_cards((struct object_s*)realobj)) - nobj->stm_flags |= GCFLAG_HAS_CARDS; } /* Set the overflow_number if nedeed */ @@ -197,8 +190,8 @@ struct object_s *realobj = (struct object_s *)REAL_ADDRESS(pseg->pub.segment_base, obj); size_t size = stmcb_size_rounded_up(realobj); - if (!(realobj->stm_flags & GCFLAG_HAS_CARDS)) - return; + if (size < _STM_MIN_CARD_OBJ_SIZE) + return; /* too small for cards */ uintptr_t first_card_index = get_write_lock_idx((uintptr_t)obj); uintptr_t card_index = 1; @@ -233,11 +226,9 @@ pseg->objects_pointing_to_nursery, object_t * /*item*/, _cards_cleared_in_object(pseg, item)); } - if (pseg->old_objects_with_cards) { - LIST_FOREACH_R( - pseg->old_objects_with_cards, object_t * /*item*/, - _cards_cleared_in_object(pseg, item)); - } + LIST_FOREACH_R( + pseg->old_objects_with_cards, object_t * /*item*/, + _cards_cleared_in_object(pseg, item)); #endif } @@ -252,8 +243,7 @@ struct object_s *realobj = (struct object_s *)REAL_ADDRESS(pseg->pub.segment_base, obj); size_t size = stmcb_size_rounded_up(realobj); - OPT_ASSERT(size >= 32); - assert(realobj->stm_flags & GCFLAG_HAS_CARDS); + OPT_ASSERT(size >= _STM_MIN_CARD_OBJ_SIZE); assert(IMPLY(mark_value == CARD_CLEAR, !mark_all)); /* not necessary */ assert(IMPLY(mark_all, mark_value == CARD_MARKED_OLD)); /* set *all* to OLD */ assert(IMPLY(IS_OVERFLOW_OBJ(pseg, realobj), @@ -349,29 +339,24 @@ stmcb_trace((struct object_s *)realobj, &minor_trace_if_young); obj->stm_flags |= GCFLAG_WRITE_BARRIER; - if (obj->stm_flags & GCFLAG_HAS_CARDS) { + if (obj->stm_flags & GCFLAG_CARDS_SET) { /* all objects that had WB cleared need to be fully synchronised on commit, so we have to mark all their cards */ struct stm_priv_segment_info_s *pseg = get_priv_segment( STM_SEGMENT->segment_num); - if (was_definitely_young) { - /* stm_wb-slowpath should never have triggered for young objs */ - assert(!(obj->stm_flags & GCFLAG_CARDS_SET)); - return; - } + /* stm_wb-slowpath should never have triggered for young objs */ + assert(!was_definitely_young); - if (IS_OVERFLOW_OBJ(STM_PSEGMENT, obj)) { - /* we do not need the old cards for overflow objects */ + if (!IS_OVERFLOW_OBJ(STM_PSEGMENT, obj)) { + _reset_object_cards(pseg, obj, CARD_MARKED_OLD, true); /* mark all */ + } else { + /* simply clear overflow */ _reset_object_cards(pseg, obj, CARD_CLEAR, false); - } else { - _reset_object_cards(pseg, obj, CARD_MARKED_OLD, true); /* mark all */ } } - } if (obj->stm_flags & GCFLAG_CARDS_SET) { - assert(!was_definitely_young); - _trace_card_object(obj); } + /* else traced in collect_cardrefs_to_nursery if necessary */ } @@ -384,8 +369,16 @@ object_t *obj = (object_t*)list_pop_item(lst); assert(!_is_young(obj)); - assert(obj->stm_flags & GCFLAG_CARDS_SET); - _collect_now(obj, false); + + if (!(obj->stm_flags & GCFLAG_CARDS_SET)) { + /* handled in _collect_now() */ + continue; + } + + /* traces cards, clears marked cards or marks them old if + necessary */ + _trace_card_object(obj); + assert(!(obj->stm_flags & GCFLAG_CARDS_SET)); } } @@ -563,9 +556,7 @@ to hold the ones we didn't trace so far. */ uintptr_t num_old; if (STM_PSEGMENT->objects_pointing_to_nursery == NULL) { - assert(STM_PSEGMENT->old_objects_with_cards == NULL); STM_PSEGMENT->objects_pointing_to_nursery = list_create(); - STM_PSEGMENT->old_objects_with_cards = list_create(); /* See the doc of 'objects_pointing_to_nursery': if it is NULL, then it is implicitly understood to be equal to diff --git a/c7/stm/setup.c b/c7/stm/setup.c --- a/c7/stm/setup.c +++ b/c7/stm/setup.c @@ -118,7 +118,7 @@ pr->pub.segment_num = i; pr->pub.segment_base = segment_base; pr->objects_pointing_to_nursery = NULL; - pr->old_objects_with_cards = NULL; + pr->old_objects_with_cards = list_create(); pr->large_overflow_objects = NULL; pr->modified_old_objects = list_create(); pr->modified_old_objects_markers = list_create(); @@ -158,7 +158,7 @@ for (i = 1; i <= NB_SEGMENTS; i++) { struct stm_priv_segment_info_s *pr = get_priv_segment(i); assert(pr->objects_pointing_to_nursery == NULL); - assert(pr->old_objects_with_cards == NULL); + list_free(pr->old_objects_with_cards); assert(pr->large_overflow_objects == NULL); list_free(pr->modified_old_objects); list_free(pr->modified_old_objects_markers); diff --git a/c7/stmgc.h b/c7/stmgc.h --- a/c7/stmgc.h +++ b/c7/stmgc.h @@ -150,6 +150,7 @@ #define _STM_GCFLAG_CARDS_SET 0x08 #define _STM_CARD_SIZE 32 /* must be >= 32 */ #define _STM_MIN_CARD_COUNT 17 +#define _STM_MIN_CARD_OBJ_SIZE (_STM_CARD_SIZE * _STM_MIN_CARD_COUNT) #define _STM_NSE_SIGNAL_MAX _STM_TIME_N #define _STM_FAST_ALLOC (66*1024) diff --git a/c7/test/support.py b/c7/test/support.py --- a/c7/test/support.py +++ b/c7/test/support.py @@ -372,7 +372,6 @@ HDR = lib.SIZEOF_MYOBJ assert HDR == 8 GCFLAG_WRITE_BARRIER = lib._STM_GCFLAG_WRITE_BARRIER -GCFLAG_HAS_CARDS = lib._STM_GCFLAG_HAS_CARDS CARD_SIZE = lib._STM_CARD_SIZE # 16b at least NB_SEGMENTS = lib.STM_NB_SEGMENTS FAST_ALLOC = lib._STM_FAST_ALLOC From noreply at buildbot.pypy.org Thu Jun 19 12:43:28 2014 From: noreply at buildbot.pypy.org (mswart) Date: Thu, 19 Jun 2014 12:43:28 +0200 (CEST) Subject: [pypy-commit] lang-smalltalk stmgc-c7: primitives: implement GET_ATTRIBUTE Message-ID: <20140619104328.EAB711C00B9@cobra.cs.uni-duesseldorf.de> Author: Malte Swart Branch: stmgc-c7 Changeset: r846:247710759f02 Date: 2014-06-19 12:43 +0200 http://bitbucket.org/pypy/lang-smalltalk/changeset/247710759f02/ Log: primitives: implement GET_ATTRIBUTE Implement GET_ATTRIBUTE primitive to let the VM access command line and smalltalk arguments. diff --git a/spyvm/interpreter.py b/spyvm/interpreter.py --- a/spyvm/interpreter.py +++ b/spyvm/interpreter.py @@ -132,7 +132,8 @@ class Interpreter(object): _immutable_fields_ = ["space", "image", "image_name", "max_stack_depth", "interrupt_counter_size", - "startup_time", "evented"] + "startup_time", "evented", + "vm_args", "smalltalk_args"] _w_last_active_context = None cnt = 0 _last_indent = "" @@ -145,7 +146,7 @@ ) def __init__(self, space, image=None, image_name="", trace=False, - evented=True, + evented=True, vm_args=['unknown'], smalltalk_args=[], max_stack_depth=constants.MAX_LOOP_DEPTH): import time self.space = space @@ -160,6 +161,8 @@ self._loop = False self.next_wakeup_tick = 0 self.evented = evented + self.vm_args = vm_args + self.smalltalk_args = smalltalk_args try: self.interrupt_counter_size = int(os.environ["SPY_ICS"]) except KeyError: diff --git a/spyvm/primitives.py b/spyvm/primitives.py --- a/spyvm/primitives.py +++ b/spyvm/primitives.py @@ -1101,6 +1101,7 @@ SHORT_AT_PUT = 144 FILL = 145 CLONE = 148 +GET_ATTRIBUTE = 149 @expose_primitive(BEEP, unwrap_spec=[object]) def func(interp, s_frame, w_receiver): @@ -1143,6 +1144,42 @@ def func(interp, s_frame, w_arg): return w_arg.clone(interp.space) + at expose_primitive(GET_ATTRIBUTE, unwrap_spec=[object, int]) +def func(interp, s_frame, _, idx): + if idx < 0: # VM argument + vm_arg = -idx + if vm_arg < len(interp.vm_args): + return interp.space.wrap_string(interp.vm_args[vm_arg]) + else: + return interp.space.w_nil + elif idx == 0: + return interp.space.wrap_string(interp.vm_args[0]) + elif idx == 1: + return interp.space.wrap_string(interp.image_name) + elif idx == 1001: + # OS type: "unix", "win32", "mac", ... + return interp.space.w_nil + elif idx == 1002: + # OS name: "solaris2.5" on unix, "win95" on win32, ... + return interp.space.w_nil + elif idx == 1003: + # processor architecture: "68k", "x86", "PowerPC", ... + return interp.space.w_nil + elif idx == 1004: + # Interpreter version string + return interp.space.w_nil + elif idx == 1005: + # window system name + return interp.space.w_nil + elif idx == 1006: + # vm build string + return interp.space.w_nil + else: + smalltalk_arg = idx - 2 + if smalltalk_arg < len(interp.smalltalk_args): + return interp.space.wrap_string(interp.smalltalk_args[smalltalk_arg]) + return interp.space.w_nil + # ___________________________________________________________________________ # File primitives (150-169) # (XXX they are obsolete in Squeak and done with a plugin) diff --git a/spyvm/test/test_primitives.py b/spyvm/test/test_primitives.py --- a/spyvm/test/test_primitives.py +++ b/spyvm/test/test_primitives.py @@ -37,7 +37,7 @@ IMAGENAME = "anImage.image" -def mock(stack, context = None): +def mock(stack, context = None, interp=None): mapped_stack = [wrap(x) for x in stack] if context is None: frame = MockFrame(mapped_stack) @@ -45,11 +45,12 @@ frame = context for i in range(len(stack)): frame.as_context_get_shadow(space).push(stack[i]) - interp = interpreter.Interpreter(space, image_name=IMAGENAME) + if interp is None: + interp = interpreter.Interpreter(space, image_name=IMAGENAME) return (interp, frame, len(stack)) -def prim(code, stack, context = None): - interp, w_frame, argument_count = mock(stack, context) +def prim(code, stack, context = None, interp=None): + interp, w_frame, argument_count = mock(stack, context, interp=interp) prim_table[code](interp, w_frame.as_context_get_shadow(space), argument_count-1) res = w_frame.as_context_get_shadow(space).pop() s_frame = w_frame.as_context_get_shadow(space) @@ -823,3 +824,39 @@ # primitives.VALUE_WITH_ARGS is tested in test_interpreter # primitives.OBJECT_AT is tested in test_interpreter # primitives.OBJECT_AT_PUT is tested in test_interpreter + + +class TestPrimGetAttribute: + def get_argument(self, idx, **kwargs): + kwargs.setdefault('image_name', IMAGENAME) + interp = interpreter.Interpreter(space, **kwargs) + result = prim(primitives.GET_ATTRIBUTE, [idx], interp=interp) + if result.is_same_object(space.w_nil): + return None + else: + return result.as_string() + + # VM args: + def test_not_passed_vm_argument(self): + assert self.get_argument(-1, vm_args=['rsqueask']) is None + + def test_first_vm_argument(self): + assert self.get_argument(-1, vm_args=['rsqueask', 'test']) == 'test' + + def test_second_vm_argument(self): + assert self.get_argument(-2, vm_args=['rsqueask', 'test', 'foo']) == 'foo' + + # interp name: + def test_interp_name(self): + assert self.get_argument(0) == 'unknown' + + # image name: + def test_image_name(self): + assert self.get_argument(1) == IMAGENAME + + # smalltalk args: + def test_smalltalk_first_argument(self): + assert self.get_argument(2, smalltalk_args=['example.st']) == 'example.st' + + def test_smalltalk_no_argument(self): + assert self.get_argument(2) is None diff --git a/targetimageloadingsmalltalk.py b/targetimageloadingsmalltalk.py --- a/targetimageloadingsmalltalk.py +++ b/targetimageloadingsmalltalk.py @@ -144,10 +144,13 @@ stringarg = "" code = None as_benchmark = False + smalltalk_args = [] while idx < len(argv): arg = argv[idx] - if arg in ["-h", "--help"]: + if path is not None: # smalltalk args + smalltalk_args.append(arg) + elif arg in ["-h", "--help"]: _usage(argv) return 0 elif arg in ["-j", "--jit"]: @@ -204,7 +207,10 @@ image_reader = squeakimage.reader_for_image(space, squeakimage.Stream(data=imagedata)) image = create_image(space, image_reader) - interp = interpreter.Interpreter(space, image, image_name=path, trace=trace, evented=evented) + interp = interpreter.Interpreter(space, image, trace=trace, evented=evented, + image_name=path, + vm_args=argv, smalltalk_args=smalltalk_args, + ) space.runtime_setup(argv[0]) if benchmark is not None: return _run_benchmark(interp, number, benchmark, stringarg) From noreply at buildbot.pypy.org Thu Jun 19 13:46:09 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Thu, 19 Jun 2014 13:46:09 +0200 (CEST) Subject: [pypy-commit] stmgc parallel-pulling: try the most naive way to parallelize commit pushing Message-ID: <20140619114609.D38031C0299@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: parallel-pulling Changeset: r1252:a68ae11d76fa Date: 2014-06-19 13:46 +0200 http://bitbucket.org/pypy/stmgc/changeset/a68ae11d76fa/ Log: try the most naive way to parallelize commit pushing diff --git a/c7/stm/core.c b/c7/stm/core.c --- a/c7/stm/core.c +++ b/c7/stm/core.c @@ -225,6 +225,7 @@ dprintf(("start_transaction\n")); s_mutex_unlock(); + pull_committed_changes(); /* Now running the SP_RUNNING start. We can set our 'transaction_read_version' after releasing the mutex, @@ -355,7 +356,7 @@ } } -static void synchronize_object_now(object_t *obj) +static void synchronize_object_now(object_t *obj, bool lazy_on_commit) { /* Copy around the version of 'obj' that lives in our own segment. It is first copied into the shared pages, and then into other @@ -381,6 +382,12 @@ uintptr_t last_page = (end - 1) / 4096UL; long i, myself = STM_SEGMENT->segment_num; + bool private_in_segment[NB_SEGMENTS]; + if (lazy_on_commit) { + for (i = 1; i <= NB_SEGMENTS; i++) + private_in_segment[i-1] = false; + } + for (; first_page <= last_page; first_page++) { uintptr_t copy_size; @@ -410,20 +417,25 @@ assert(memcmp(dst, src, copy_size) == 0); /* same page */ } + /* now copy from the shared page to all private pages */ + src = REAL_ADDRESS(stm_object_pages, start); for (i = 1; i <= NB_SEGMENTS; i++) { if (i == myself) continue; - src = REAL_ADDRESS(stm_object_pages, start); dst = REAL_ADDRESS(get_segment_base(i), start); if (is_private_page(i, first_page)) { /* The page is a private page. We need to diffuse this fragment of object from the shared page to this private page. */ - if (copy_size == 4096) - pagecopy(dst, src); - else - memcpy(dst, src, copy_size); + if (!lazy_on_commit) { + if (copy_size == 4096) + pagecopy(dst, src); + else + memcpy(dst, src, copy_size); + } + + private_in_segment[i-1] = true; } else { assert(!memcmp(dst, src, copy_size)); /* same page */ @@ -432,6 +444,15 @@ start = (start + 4096) & ~4095; } + + if (lazy_on_commit) { + for (i = 1; i <= NB_SEGMENTS; i++) { + if (private_in_segment[i-1]) { + struct stm_priv_segment_info_s *pseg = get_priv_segment(i); + LIST_APPEND(pseg->outdated_objects, obj); + } + } + } } } @@ -442,7 +463,7 @@ acquire_privatization_lock(); LIST_FOREACH_R(STM_PSEGMENT->large_overflow_objects, object_t *, - synchronize_object_now(item)); + synchronize_object_now(item, false)); release_privatization_lock(); } @@ -466,7 +487,7 @@ /* copy the object to the shared page, and to the other private pages as needed */ - synchronize_object_now(item); + synchronize_object_now(item, true); })); release_privatization_lock(); @@ -559,6 +580,7 @@ /* cannot access STM_SEGMENT or STM_PSEGMENT from here ! */ s_mutex_unlock(); + pull_committed_changes(); } void stm_abort_transaction(void) @@ -567,6 +589,41 @@ abort_with_mutex(); } +static void copy_objs_from_segment_0(int segment_num, struct list_s *lst) +{ + /* pull the list of objects from segment 0. This either resets + modifications or just updates the view of the current segment. + */ + char *local_base = get_segment_base(segment_num); + char *zero_base = get_segment_base(0); + + LIST_FOREACH_R(lst, object_t * /*item*/, + ({ + /* memcpy in the opposite direction than + push_modified_to_other_segments() */ + char *src = REAL_ADDRESS(zero_base, item); + char *dst = REAL_ADDRESS(local_base, item); + ssize_t size = stmcb_size_rounded_up((struct object_s *)src); + memcpy(dst, src, size); + + /* all objs in segment 0 should have the WB flag: */ + assert(((struct object_s *)dst)->stm_flags & GCFLAG_WRITE_BARRIER); + })); + write_fence(); +} + +static void pull_committed_changes() +{ + struct list_s *lst = STM_PSEGMENT->outdated_objects; + + if (list_count(lst)) { + dprintf(("pulling %lu objects from shared segment\n", list_count(lst))); + copy_objs_from_segment_0(STM_SEGMENT->segment_num, lst); + list_clear(lst); + } +} + + static void reset_modified_from_other_segments(int segment_num) { @@ -723,6 +780,7 @@ { s_mutex_lock(); enter_safe_point_if_requested(); + pull_committed_changes(); /* XXX: not sure if necessary */ if (STM_PSEGMENT->transaction_state == TS_REGULAR) { dprintf(("become_inevitable: %s\n", msg)); @@ -739,6 +797,7 @@ } s_mutex_unlock(); + pull_committed_changes(); } void stm_become_globally_unique_transaction(stm_thread_local_t *tl, @@ -749,4 +808,5 @@ s_mutex_lock(); synchronize_all_threads(STOP_OTHERS_AND_BECOME_GLOBALLY_UNIQUE); s_mutex_unlock(); + pull_committed_changes(); } diff --git a/c7/stm/core.h b/c7/stm/core.h --- a/c7/stm/core.h +++ b/c7/stm/core.h @@ -75,6 +75,10 @@ struct stm_priv_segment_info_s { struct stm_segment_info_s pub; + /* list of objects that were committed while we waited in a + safe point. This means we have an outdated copy of them. */ + struct list_s *outdated_objects; + /* List of old objects (older than the current transaction) that the current transaction attempts to modify. This is used to track the STM status: they are old objects that where written to and @@ -257,7 +261,8 @@ } static void copy_object_to_shared(object_t *obj, int source_segment_num); -static void synchronize_object_now(object_t *obj); +static void synchronize_object_now(object_t *obj, bool lazy_on_commit); +static void pull_committed_changes(); static inline void acquire_privatization_lock(void) { diff --git a/c7/stm/forksupport.c b/c7/stm/forksupport.c --- a/c7/stm/forksupport.c +++ b/c7/stm/forksupport.c @@ -66,6 +66,7 @@ s_mutex_lock(); synchronize_all_threads(STOP_OTHERS_UNTIL_MUTEX_UNLOCK); + pull_committed_changes(); /* XXX: unclear if necessary */ /* Make a new mmap at some other address, but of the same size as the standard mmap at stm_object_pages diff --git a/c7/stm/gcpage.c b/c7/stm/gcpage.c --- a/c7/stm/gcpage.c +++ b/c7/stm/gcpage.c @@ -153,6 +153,7 @@ } s_mutex_unlock(); + pull_committed_changes(); } diff --git a/c7/stm/nursery.c b/c7/stm/nursery.c --- a/c7/stm/nursery.c +++ b/c7/stm/nursery.c @@ -216,7 +216,7 @@ */ if (STM_PSEGMENT->minor_collect_will_commit_now) { acquire_privatization_lock(); - synchronize_object_now(obj); + synchronize_object_now(obj, false); release_privatization_lock(); } else { LIST_APPEND(STM_PSEGMENT->large_overflow_objects, obj); diff --git a/c7/stm/setup.c b/c7/stm/setup.c --- a/c7/stm/setup.c +++ b/c7/stm/setup.c @@ -118,6 +118,7 @@ pr->pub.segment_base = segment_base; pr->objects_pointing_to_nursery = NULL; pr->large_overflow_objects = NULL; + pr->outdated_objects = list_create(); pr->modified_old_objects = list_create(); pr->modified_old_objects_markers = list_create(); pr->young_weakrefs = list_create(); @@ -157,6 +158,7 @@ struct stm_priv_segment_info_s *pr = get_priv_segment(i); assert(pr->objects_pointing_to_nursery == NULL); assert(pr->large_overflow_objects == NULL); + list_free(pr->outdated_objects); list_free(pr->modified_old_objects); list_free(pr->modified_old_objects_markers); list_free(pr->young_weakrefs); diff --git a/c7/stm/sync.c b/c7/stm/sync.c --- a/c7/stm/sync.c +++ b/c7/stm/sync.c @@ -255,6 +255,7 @@ STM_PSEGMENT->safe_point = SP_RUNNING; stm_safe_point(); + pull_committed_changes(); } #endif @@ -439,4 +440,5 @@ s_mutex_lock(); enter_safe_point_if_requested(); s_mutex_unlock(); + pull_committed_changes(); } From noreply at buildbot.pypy.org Thu Jun 19 13:57:56 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Thu, 19 Jun 2014 13:57:56 +0200 (CEST) Subject: [pypy-commit] stmgc parallel-pulling: add missing pull_committed_changes() Message-ID: <20140619115756.A18EC1C1017@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: parallel-pulling Changeset: r1253:d22ba5bca030 Date: 2014-06-19 13:58 +0200 http://bitbucket.org/pypy/stmgc/changeset/d22ba5bca030/ Log: add missing pull_committed_changes() diff --git a/c7/stm/core.c b/c7/stm/core.c --- a/c7/stm/core.c +++ b/c7/stm/core.c @@ -540,6 +540,8 @@ if (detect_write_read_conflicts()) goto restart; + pull_committed_changes(); + /* cannot abort any more from here */ dprintf(("commit_transaction\n")); From noreply at buildbot.pypy.org Thu Jun 19 14:22:10 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Thu, 19 Jun 2014 14:22:10 +0200 (CEST) Subject: [pypy-commit] stmgc parallel-pulling: small fix Message-ID: <20140619122210.AF9F61C3619@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: parallel-pulling Changeset: r1254:598efe29057c Date: 2014-06-19 14:22 +0200 http://bitbucket.org/pypy/stmgc/changeset/598efe29057c/ Log: small fix diff --git a/c7/stm/core.c b/c7/stm/core.c --- a/c7/stm/core.c +++ b/c7/stm/core.c @@ -428,14 +428,17 @@ /* The page is a private page. We need to diffuse this fragment of object from the shared page to this private page. */ - if (!lazy_on_commit) { + if ((!lazy_on_commit) + || (get_priv_segment(i)->safe_point == SP_NO_TRANSACTION)) { + /* not lazily synchronize or there is no transaction running there + (this is to avoid the list of outdated objs growing infinitely) */ if (copy_size == 4096) pagecopy(dst, src); else memcpy(dst, src, copy_size); + } else { + private_in_segment[i-1] = true; } - - private_in_segment[i-1] = true; } else { assert(!memcmp(dst, src, copy_size)); /* same page */ @@ -540,6 +543,8 @@ if (detect_write_read_conflicts()) goto restart; + /* pull changes in case we waited for a transaction to commit + in contention management. */ pull_committed_changes(); /* cannot abort any more from here */ From noreply at buildbot.pypy.org Thu Jun 19 15:24:59 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Thu, 19 Jun 2014 15:24:59 +0200 (CEST) Subject: [pypy-commit] stmgc parallel-pulling: some fixes. it seems to improve threadworms by a bit (15%) in the best case. Message-ID: <20140619132459.989BA1C1017@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: parallel-pulling Changeset: r1255:de4815efb600 Date: 2014-06-19 15:25 +0200 http://bitbucket.org/pypy/stmgc/changeset/de4815efb600/ Log: some fixes. it seems to improve threadworms by a bit (15%) in the best case. this may however also get nullified by card-marking diff --git a/c7/stm/core.c b/c7/stm/core.c --- a/c7/stm/core.c +++ b/c7/stm/core.c @@ -225,7 +225,7 @@ dprintf(("start_transaction\n")); s_mutex_unlock(); - pull_committed_changes(); + pull_committed_changes(get_priv_segment(STM_SEGMENT->segment_num)); /* Now running the SP_RUNNING start. We can set our 'transaction_read_version' after releasing the mutex, @@ -543,10 +543,6 @@ if (detect_write_read_conflicts()) goto restart; - /* pull changes in case we waited for a transaction to commit - in contention management. */ - pull_committed_changes(); - /* cannot abort any more from here */ dprintf(("commit_transaction\n")); @@ -587,7 +583,6 @@ /* cannot access STM_SEGMENT or STM_PSEGMENT from here ! */ s_mutex_unlock(); - pull_committed_changes(); } void stm_abort_transaction(void) @@ -608,24 +603,63 @@ ({ /* memcpy in the opposite direction than push_modified_to_other_segments() */ - char *src = REAL_ADDRESS(zero_base, item); - char *dst = REAL_ADDRESS(local_base, item); - ssize_t size = stmcb_size_rounded_up((struct object_s *)src); - memcpy(dst, src, size); + char *realobj = REAL_ADDRESS(zero_base, item); + ssize_t size = stmcb_size_rounded_up((struct object_s *)realobj); + + /* XXX: copied from sync_object_now */ + uintptr_t start = (uintptr_t)item; + uintptr_t first_page = start / 4096UL; + + if (((struct object_s *)realobj)->stm_flags & GCFLAG_SMALL_UNIFORM) { + abort();//XXX WRITE THE FAST CASE + } + else { + uintptr_t end = start + size; + uintptr_t last_page = (end - 1) / 4096UL; + long myself = segment_num; + + for (; first_page <= last_page; first_page++) { + uintptr_t copy_size; + if (first_page == last_page) { + /* this is the final fragment */ + copy_size = end - start; + } + else { + /* this is a non-final fragment, going up to the + page's end */ + copy_size = 4096 - (start & 4095); + } + + /* copy from shared page to private, if needed */ + char *dst = REAL_ADDRESS(local_base, start); + char *src = REAL_ADDRESS(zero_base, start); + if (is_private_page(myself, first_page)) { + if (copy_size == 4096) + pagecopy(dst, src); + else + memcpy(dst, src, copy_size); + } + else { + assert(memcmp(dst, src, copy_size) == 0); /* same page */ + } + + start = (start + 4096) & ~4095; + } + } /* all objs in segment 0 should have the WB flag: */ - assert(((struct object_s *)dst)->stm_flags & GCFLAG_WRITE_BARRIER); + assert(((struct object_s *)realobj)->stm_flags & GCFLAG_WRITE_BARRIER); })); write_fence(); } -static void pull_committed_changes() +static void pull_committed_changes(struct stm_priv_segment_info_s *pseg) { - struct list_s *lst = STM_PSEGMENT->outdated_objects; + struct list_s *lst = pseg->outdated_objects; if (list_count(lst)) { dprintf(("pulling %lu objects from shared segment\n", list_count(lst))); - copy_objs_from_segment_0(STM_SEGMENT->segment_num, lst); + copy_objs_from_segment_0(pseg->pub.segment_num, lst); list_clear(lst); } } @@ -787,7 +821,7 @@ { s_mutex_lock(); enter_safe_point_if_requested(); - pull_committed_changes(); /* XXX: not sure if necessary */ + pull_committed_changes(get_priv_segment(STM_SEGMENT->segment_num)); /* XXX: not sure if necessary */ if (STM_PSEGMENT->transaction_state == TS_REGULAR) { dprintf(("become_inevitable: %s\n", msg)); @@ -804,7 +838,7 @@ } s_mutex_unlock(); - pull_committed_changes(); + pull_committed_changes(get_priv_segment(STM_SEGMENT->segment_num)); } void stm_become_globally_unique_transaction(stm_thread_local_t *tl, @@ -815,5 +849,5 @@ s_mutex_lock(); synchronize_all_threads(STOP_OTHERS_AND_BECOME_GLOBALLY_UNIQUE); s_mutex_unlock(); - pull_committed_changes(); + pull_committed_changes(get_priv_segment(STM_SEGMENT->segment_num)); } diff --git a/c7/stm/core.h b/c7/stm/core.h --- a/c7/stm/core.h +++ b/c7/stm/core.h @@ -262,7 +262,7 @@ static void copy_object_to_shared(object_t *obj, int source_segment_num); static void synchronize_object_now(object_t *obj, bool lazy_on_commit); -static void pull_committed_changes(); +static void pull_committed_changes(struct stm_priv_segment_info_s *pseg); static inline void acquire_privatization_lock(void) { diff --git a/c7/stm/forksupport.c b/c7/stm/forksupport.c --- a/c7/stm/forksupport.c +++ b/c7/stm/forksupport.c @@ -66,7 +66,7 @@ s_mutex_lock(); synchronize_all_threads(STOP_OTHERS_UNTIL_MUTEX_UNLOCK); - pull_committed_changes(); /* XXX: unclear if necessary */ + pull_committed_changes(get_priv_segment(STM_SEGMENT->segment_num)); /* XXX: unclear if necessary */ /* Make a new mmap at some other address, but of the same size as the standard mmap at stm_object_pages @@ -81,6 +81,8 @@ for (i = 1; i <= NB_SEGMENTS; i++) { char *src, *dst; struct stm_priv_segment_info_s *psrc = get_priv_segment(i); + pull_committed_changes(psrc); + dst = big_copy + (((char *)psrc) - stm_object_pages); *(struct stm_priv_segment_info_s *)dst = *psrc; diff --git a/c7/stm/gcpage.c b/c7/stm/gcpage.c --- a/c7/stm/gcpage.c +++ b/c7/stm/gcpage.c @@ -153,7 +153,7 @@ } s_mutex_unlock(); - pull_committed_changes(); + pull_committed_changes(get_priv_segment(STM_SEGMENT->segment_num)); } diff --git a/c7/stm/nursery.c b/c7/stm/nursery.c --- a/c7/stm/nursery.c +++ b/c7/stm/nursery.c @@ -477,6 +477,11 @@ for (i = 1; i <= NB_SEGMENTS; i++) { struct stm_priv_segment_info_s *pseg = get_priv_segment(i); + + /* we pull committed changes here so that we are definitely + sure all segments are up-to-date */ + pull_committed_changes(pseg); + if (MINOR_NOTHING_TO_DO(pseg)) /*TS_NONE segments have NOTHING_TO_DO*/ continue; diff --git a/c7/stm/sync.c b/c7/stm/sync.c --- a/c7/stm/sync.c +++ b/c7/stm/sync.c @@ -255,7 +255,7 @@ STM_PSEGMENT->safe_point = SP_RUNNING; stm_safe_point(); - pull_committed_changes(); + pull_committed_changes(get_priv_segment(STM_SEGMENT->segment_num)); } #endif @@ -440,5 +440,5 @@ s_mutex_lock(); enter_safe_point_if_requested(); s_mutex_unlock(); - pull_committed_changes(); + pull_committed_changes(get_priv_segment(STM_SEGMENT->segment_num)); } From noreply at buildbot.pypy.org Thu Jun 19 17:03:50 2014 From: noreply at buildbot.pypy.org (fijal) Date: Thu, 19 Jun 2014 17:03:50 +0200 (CEST) Subject: [pypy-commit] pypy default: two missing casts Message-ID: <20140619150350.0C9411C00B9@cobra.cs.uni-duesseldorf.de> Author: Maciej Fijalkowski Branch: Changeset: r72101:c6da7a6927b6 Date: 2014-06-19 17:03 +0200 http://bitbucket.org/pypy/pypy/changeset/c6da7a6927b6/ Log: two missing casts diff --git a/rpython/rlib/rsocket.py b/rpython/rlib/rsocket.py --- a/rpython/rlib/rsocket.py +++ b/rpython/rlib/rsocket.py @@ -826,7 +826,9 @@ elif timeout == 0: raw_buf, gc_buf = rffi.alloc_buffer(buffersize) try: - read_bytes = _c.socketrecv(self.fd, raw_buf, buffersize, flags) + read_bytes = _c.socketrecv(self.fd, + rffi.cast(rffi.VOIDP, raw_buf), + buffersize, flags) if read_bytes >= 0: return rffi.str_from_buffer(raw_buf, gc_buf, buffersize, read_bytes) finally: @@ -1318,7 +1320,8 @@ try: dstbuf = mallocbuf(dstsize) try: - res = _c.inet_ntop(family, srcbuf, dstbuf, dstsize) + res = _c.inet_ntop(family, rffi.cast(rffi.VOIDP, srcbuf), + dstbuf, dstsize) if not res: raise last_error() return rffi.charp2str(res) From noreply at buildbot.pypy.org Thu Jun 19 18:17:26 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Thu, 19 Jun 2014 18:17:26 +0200 (CEST) Subject: [pypy-commit] pypy pypy3-release-2.3.x: merge py3k Message-ID: <20140619161726.9B7921C3619@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: pypy3-release-2.3.x Changeset: r72102:986752d005bb Date: 2014-06-19 09:16 -0700 http://bitbucket.org/pypy/pypy/changeset/986752d005bb/ Log: merge py3k diff --git a/lib_pypy/gdbm.py b/lib_pypy/_gdbm.py rename from lib_pypy/gdbm.py rename to lib_pypy/_gdbm.py --- a/lib_pypy/gdbm.py +++ b/lib_pypy/_gdbm.py @@ -1,4 +1,5 @@ import cffi, os +import sys ffi = cffi.FFI() ffi.cdef(''' @@ -46,12 +47,15 @@ # failure must be due to missing gdbm dev libs raise ImportError('%s: %s' %(e.__class__.__name__, e)) -class error(Exception): +class error(IOError): pass def _fromstr(key): - if not isinstance(key, str): - raise TypeError("gdbm mappings have string indices only") + if isinstance(key, str): + key = key.encode(sys.getdefaultencoding()) + elif not isinstance(key, bytes): + msg = "gdbm mappings have bytes or string indices only, not {!r}" + raise TypeError(msg.format(type(key).__name__)) return {'dptr': ffi.new("char[]", key), 'dsize': len(key)} class gdbm(object): @@ -98,21 +102,27 @@ return lib.gdbm_exists(self.ll_dbm, _fromstr(key)) has_key = __contains__ - def __getitem__(self, key): + def get(self, key, default=None): self._check_closed() drec = lib.gdbm_fetch(self.ll_dbm, _fromstr(key)) if not drec.dptr: - raise KeyError(key) - res = str(ffi.buffer(drec.dptr, drec.dsize)) + return default + res = bytes(ffi.buffer(drec.dptr, drec.dsize)) lib.free(drec.dptr) return res + def __getitem__(self, key): + value = self.get(key) + if value is None: + raise KeyError(key) + return value + def keys(self): self._check_closed() l = [] key = lib.gdbm_firstkey(self.ll_dbm) while key.dptr: - l.append(str(ffi.buffer(key.dptr, key.dsize))) + l.append(bytes(ffi.buffer(key.dptr, key.dsize))) nextkey = lib.gdbm_nextkey(self.ll_dbm, key) lib.free(key.dptr) key = nextkey @@ -122,7 +132,7 @@ self._check_closed() key = lib.gdbm_firstkey(self.ll_dbm) if key.dptr: - res = str(ffi.buffer(key.dptr, key.dsize)) + res = bytes(ffi.buffer(key.dptr, key.dsize)) lib.free(key.dptr) return res @@ -130,7 +140,7 @@ self._check_closed() key = lib.gdbm_nextkey(self.ll_dbm, _fromstr(key)) if key.dptr: - res = str(ffi.buffer(key.dptr, key.dsize)) + res = bytes(ffi.buffer(key.dptr, key.dsize)) lib.free(key.dptr) return res @@ -149,7 +159,18 @@ self._check_closed() lib.gdbm_sync(self.ll_dbm) + def setdefault(self, key, default=None): + value = self.get(key) + if value is not None: + return value + self[key] = default + return default + def open(filename, flags='r', mode=0o666): + if not isinstance(filename, str): + raise TypeError("must be str, not %s" % type(filename).__name__) + filename = filename.encode(sys.getdefaultencoding()) + if flags[0] == 'r': iflags = lib.GDBM_READER elif flags[0] == 'w': diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py --- a/pypy/interpreter/argument.py +++ b/pypy/interpreter/argument.py @@ -283,8 +283,7 @@ missing += 1 continue name = signature.kwonlyargnames[i - co_argcount] - w_name = self.space.wrap(name) - w_def = self.space.finditem(w_kw_defs, w_name) + w_def = self.space.finditem_str(w_kw_defs, name) if w_def is not None: scope_w[i] = w_def else: diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py --- a/pypy/interpreter/error.py +++ b/pypy/interpreter/error.py @@ -36,44 +36,11 @@ def setup(self, w_type, w_value=None): assert w_type is not None - from pypy.objspace.std.typeobject import W_TypeObject self.w_type = w_type self._w_value = w_value - # HACK: isinstance(w_type, W_TypeObject) won't translate under - # the fake objspace, but w_type.__class__ is W_TypeObject does - # and short circuits to a False constant there, causing the - # isinstance to be ignored =[ - if (w_type is not None and w_type.__class__ is W_TypeObject and - isinstance(w_type, W_TypeObject)): - self.setup_context(w_type.space) if not we_are_translated(): self.debug_excs = [] - def setup_context(self, space): - # Implicit exception chaining - last_operror = space.getexecutioncontext().sys_exc_info() - if (last_operror is None or - last_operror is get_cleared_operation_error(space)): - return - - # We must normalize the value right now to check for cycles - self.normalize_exception(space) - w_value = self.get_w_value(space) - w_last_value = last_operror.get_w_value(space) - if not space.is_w(w_value, w_last_value): - # Avoid reference cycles through the context chain. This is - # O(chain length) but context chains are usually very short. - w_obj = w_last_value - while True: - w_context = space.getattr(w_obj, space.wrap('__context__')) - if space.is_w(w_context, space.w_None): - break - if space.is_w(w_context, w_value): - space.setattr(w_obj, space.wrap('__context__'), space.w_None) - break - w_obj = w_context - space.setattr(w_value, space.wrap('__context__'), w_last_value) - def clear(self, space): # XXX remove this method. The point is that we cannot always # hack at 'self' to clear w_type and _w_value, because in some @@ -350,6 +317,53 @@ """ self._application_traceback = traceback + def record_context(self, space, frame): + """Record a __context__ for this exception from the current + frame if one exists. + + __context__ is otherwise lazily determined from the + traceback. However the current frame.last_exception must be + checked for a __context__ before this OperationError overwrites + it (making the previous last_exception unavailable later on). + """ + last_exception = frame.last_exception + if (last_exception is not None and not frame.hide() or + last_exception is get_cleared_operation_error(space)): + # normalize w_value so setup_context can check for cycles + self.normalize_exception(space) + w_value = self.get_w_value(space) + w_last = last_exception.get_w_value(space) + w_context = setup_context(space, w_value, w_last, lazy=True) + space.setattr(w_value, space.wrap('__context__'), w_context) + + +def setup_context(space, w_exc, w_last, lazy=False): + """Determine the __context__ for w_exc from w_last and break + reference cycles in the __context__ chain. + """ + from pypy.module.exceptions.interp_exceptions import W_BaseException + if space.is_w(w_exc, w_last): + w_last = space.w_None + # w_last may also be space.w_None if from ClearedOpErr + if not space.is_w(w_last, space.w_None): + # Avoid reference cycles through the context chain. This is + # O(chain length) but context chains are usually very short. + w_obj = w_last + while True: + assert isinstance(w_obj, W_BaseException) + if lazy: + w_context = w_obj.w_context + else: + # triggers W_BaseException._setup_context + w_context = space.getattr(w_obj, space.wrap('__context__')) + if space.is_none(w_context): + break + if space.is_w(w_context, w_exc): + w_obj.w_context = space.w_None + break + w_obj = w_context + return w_last + class ClearedOpErr: def __init__(self, space): diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py --- a/pypy/interpreter/executioncontext.py +++ b/pypy/interpreter/executioncontext.py @@ -202,18 +202,21 @@ self._trace(frame, 'exception', None, operationerr) #operationerr.print_detailed_traceback(self.space) + @staticmethod + def last_operr(space, frame): + while frame: + last = frame.last_exception + if (last is not None and + (not frame.hide() or + last is get_cleared_operation_error(space))): + return last + frame = frame.f_backref() + return None + def sys_exc_info(self): # attn: the result is not the wrapped sys.exc_info() !!! """Implements sys.exc_info(). Return an OperationError instance or None.""" - frame = self.gettopframe() - while frame: - if frame.last_exception is not None: - if (not frame.hide() or - frame.last_exception is - get_cleared_operation_error(self.space)): - return frame.last_exception - frame = frame.f_backref() - return None + return self.last_operr(self.space, self.gettopframe()) def set_sys_exc_info(self, operror): frame = self.gettopframe_nohidden() diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py --- a/pypy/interpreter/function.py +++ b/pypy/interpreter/function.py @@ -32,7 +32,8 @@ 'w_func_globals?', 'closure?[*]', 'defs_w?[*]', - 'name?'] + 'name?', + 'w_kw_defs?'] def __init__(self, space, code, w_globals=None, defs_w=[], w_kw_defs=None, closure=None, w_ann=None, forcename=None): diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py --- a/pypy/interpreter/pyframe.py +++ b/pypy/interpreter/pyframe.py @@ -164,12 +164,9 @@ for i in range(nfreevars): self.cells[i + ncellvars] = outer_func.closure[i] - def is_generator(self): - return self.getcode().co_flags & pycode.CO_GENERATOR - def run(self): """Start this frame's execution.""" - if self.is_generator(): + if self.getcode().co_flags & pycode.CO_GENERATOR: if self.getcode().co_flags & pycode.CO_YIELD_INSIDE_TRY: from pypy.interpreter.generator import GeneratorIteratorWithDel return self.space.wrap(GeneratorIteratorWithDel(self)) @@ -514,10 +511,10 @@ for i in range(min(len(varnames), self.getcode().co_nlocals)): name = varnames[i] w_value = self.locals_stack_w[i] - w_name = self.space.wrap(name.decode('utf-8')) if w_value is not None: - self.space.setitem(self.w_locals, w_name, w_value) + self.space.setitem_str(self.w_locals, name, w_value) else: + w_name = self.space.wrap(name.decode('utf-8')) try: self.space.delitem(self.w_locals, w_name) except OperationError as e: @@ -537,8 +534,7 @@ except ValueError: pass else: - w_name = self.space.wrap(name) - self.space.setitem(self.w_locals, w_name, w_value) + self.space.setitem_str(self.w_locals, name, w_value) @jit.unroll_safe @@ -551,13 +547,9 @@ new_fastlocals_w = [None] * numlocals for i in range(min(len(varnames), numlocals)): - w_name = self.space.wrap(varnames[i].decode('utf-8')) - try: - w_value = self.space.getitem(self.w_locals, w_name) - except OperationError, e: - if not e.match(self.space, self.space.w_KeyError): - raise - else: + name = varnames[i] + w_value = self.space.finditem_str(self.w_locals, name) + if w_value is not None: new_fastlocals_w[i] = w_value self.setfastscope(new_fastlocals_w) @@ -566,13 +558,8 @@ for i in range(len(freevarnames)): name = freevarnames[i] cell = self.cells[i] - w_name = self.space.wrap(name) - try: - w_value = self.space.getitem(self.w_locals, w_name) - except OperationError, e: - if not e.match(self.space, self.space.w_KeyError): - raise - else: + w_value = self.space.finditem_str(self.w_locals, name) + if w_value is not None: cell.set(w_value) @jit.unroll_safe diff --git a/pypy/interpreter/pytraceback.py b/pypy/interpreter/pytraceback.py --- a/pypy/interpreter/pytraceback.py +++ b/pypy/interpreter/pytraceback.py @@ -57,6 +57,7 @@ tb = operror.get_traceback() tb = PyTraceback(space, frame, last_instruction, tb) operror.set_traceback(tb) + operror.record_context(space, frame) def check_traceback(space, w_tb, msg): diff --git a/pypy/interpreter/test/test_raise.py b/pypy/interpreter/test/test_raise.py --- a/pypy/interpreter/test/test_raise.py +++ b/pypy/interpreter/test/test_raise.py @@ -369,6 +369,44 @@ else: fail("No exception raised") + def test_context_once_removed(self): + context = IndexError() + def func1(): + func2() + def func2(): + try: + 1/0 + except ZeroDivisionError as e: + assert e.__context__ is context + else: + fail('No exception raised') + try: + raise context + except: + func1() + + @py.test.mark.xfail(reason="A somewhat contrived case that may burden the " + "JIT to fully support") + def test_frame_spanning_cycle_broken(self): + context = IndexError() + def func(): + try: + 1/0 + except Exception as e1: + try: + raise context + except Exception as e2: + assert e2.__context__ is e1 + # XXX: + assert e1.__context__ is None + else: + fail('No exception raised') + try: + raise context + except: + func() + + class AppTestTraceback: def test_raise_with___traceback__(self): diff --git a/pypy/module/__builtin__/functional.py b/pypy/module/__builtin__/functional.py --- a/pypy/module/__builtin__/functional.py +++ b/pypy/module/__builtin__/functional.py @@ -6,7 +6,8 @@ from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.error import OperationError, oefmt -from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault +from pypy.interpreter.gateway import ( + interp2app, interpindirect2app, unwrap_spec) from pypy.interpreter.typedef import TypeDef from rpython.rlib import jit from rpython.rlib.objectmodel import specialize @@ -307,15 +308,19 @@ class W_Range(W_Root): - def __init__(self, w_start, w_stop, w_step, w_length): + def __init__(self, w_start, w_stop, w_step, w_length, promote_step=False): self.w_start = w_start self.w_stop = w_stop self.w_step = w_step self.w_length = w_length + self.promote_step = promote_step - @unwrap_spec(w_step = WrappedDefault(1)) def descr_new(space, w_subtype, w_start, w_stop=None, w_step=None): w_start = space.index(w_start) + promote_step = False + if space.is_none(w_step): # no step argument provided + w_step = space.wrap(1) + promote_step = True if space.is_none(w_stop): # only 1 argument provided w_start, w_stop = space.newint(0), w_start else: @@ -331,7 +336,7 @@ "step argument must not be zero")) w_length = compute_range_length(space, w_start, w_stop, w_step) obj = space.allocate_instance(W_Range, w_subtype) - W_Range.__init__(obj, w_start, w_stop, w_step, w_length) + W_Range.__init__(obj, w_start, w_stop, w_step, w_length, promote_step) return space.wrap(obj) def descr_repr(self, space): @@ -386,8 +391,19 @@ return self._compute_item(space, w_index) def descr_iter(self, space): - return space.wrap(W_RangeIterator( - space, self.w_start, self.w_step, self.w_length)) + try: + start = space.int_w(self.w_start) + stop = space.int_w(self.w_stop) + step = space.int_w(self.w_step) + length = space.int_w(self.w_length) + except OperationError as e: + pass + else: + if self.promote_step: + return W_IntRangeStepOneIterator(space, start, stop) + return W_IntRangeIterator(space, start, length, step) + return W_LongRangeIterator(space, self.w_start, self.w_step, + self.w_length) def descr_reversed(self, space): # lastitem = self.start + (self.length-1) * self.step @@ -395,7 +411,7 @@ self.w_start, space.mul(space.sub(self.w_length, space.newint(1)), self.w_step)) - return space.wrap(W_RangeIterator( + return space.wrap(W_LongRangeIterator( space, w_lastitem, space.neg(self.w_step), self.w_length)) def descr_reduce(self, space): @@ -463,7 +479,22 @@ W_Range.typedef.acceptable_as_base_class = False -class W_RangeIterator(W_Root): +class W_AbstractRangeIterator(W_Root): + + def descr_iter(self, space): + return space.wrap(self) + + def descr_len(self, space): + raise NotImplementedError + + def descr_next(self, space): + raise NotImplementedError + + def descr_reduce(self, space): + raise NotImplementedError + + +class W_LongRangeIterator(W_AbstractRangeIterator): def __init__(self, space, w_start, w_step, w_len, w_index=None): self.w_start = w_start self.w_step = w_step @@ -472,9 +503,6 @@ w_index = space.newint(0) self.w_index = w_index - def descr_iter(self, space): - return space.wrap(self) - def descr_next(self, space): if space.is_true(space.lt(self.w_index, self.w_len)): w_index = space.add(self.w_index, space.newint(1)) @@ -489,23 +517,75 @@ def descr_reduce(self, space): from pypy.interpreter.mixedmodule import MixedModule + w_mod = space.getbuiltinmodule('_pickle_support') + mod = space.interp_w(MixedModule, w_mod) + w_args = space.newtuple([self.w_start, self.w_step, self.w_len, + self.w_index]) + return space.newtuple([mod.get('longrangeiter_new'), w_args]) + + +class W_IntRangeIterator(W_AbstractRangeIterator): + + def __init__(self, space, current, remaining, step): + self.current = current + self.remaining = remaining + self.step = step + + def descr_next(self, space): + return self.next(space) + + def next(self, space): + if self.remaining > 0: + item = self.current + self.current = item + self.step + self.remaining -= 1 + return space.wrap(item) + raise OperationError(space.w_StopIteration, space.w_None) + + def descr_len(self, space): + return self.get_remaining(space) + + def descr_reduce(self, space): + from pypy.interpreter.mixedmodule import MixedModule w_mod = space.getbuiltinmodule('_pickle_support') mod = space.interp_w(MixedModule, w_mod) + new_inst = mod.get('intrangeiter_new') + w = space.wrap + nt = space.newtuple - return space.newtuple( - [mod.get('rangeiter_new'), - space.newtuple([self.w_start, self.w_step, - self.w_len, self.w_index]), - ]) + tup = [w(self.current), self.get_remaining(space), w(self.step)] + return nt([new_inst, nt(tup)]) + def get_remaining(self, space): + return space.wrap(self.remaining) -W_RangeIterator.typedef = TypeDef("rangeiterator", - __iter__ = interp2app(W_RangeIterator.descr_iter), - __length_hint__ = interp2app(W_RangeIterator.descr_len), - __next__ = interp2app(W_RangeIterator.descr_next), - __reduce__ = interp2app(W_RangeIterator.descr_reduce), + +class W_IntRangeStepOneIterator(W_IntRangeIterator): + _immutable_fields_ = ['stop'] + + def __init__(self, space, start, stop): + self.current = start + self.stop = stop + self.step = 1 + + def next(self, space): + if self.current < self.stop: + item = self.current + self.current = item + 1 + return space.wrap(item) + raise OperationError(space.w_StopIteration, space.w_None) + + def get_remaining(self, space): + return space.wrap(self.stop - self.current) + + +W_AbstractRangeIterator.typedef = TypeDef("rangeiterator", + __iter__ = interp2app(W_AbstractRangeIterator.descr_iter), + __length_hint__ = interpindirect2app(W_AbstractRangeIterator.descr_len), + __next__ = interpindirect2app(W_AbstractRangeIterator.descr_next), + __reduce__ = interpindirect2app(W_AbstractRangeIterator.descr_reduce), ) -W_RangeIterator.typedef.acceptable_as_base_class = False +W_AbstractRangeIterator.typedef.acceptable_as_base_class = False class W_Map(W_Root): diff --git a/pypy/module/_pickle_support/__init__.py b/pypy/module/_pickle_support/__init__.py --- a/pypy/module/_pickle_support/__init__.py +++ b/pypy/module/_pickle_support/__init__.py @@ -19,7 +19,8 @@ 'frame_new' : 'maker.frame_new', 'traceback_new' : 'maker.traceback_new', 'generator_new' : 'maker.generator_new', - 'rangeiter_new': 'maker.rangeiter_new', + 'longrangeiter_new': 'maker.longrangeiter_new', + 'intrangeiter_new': 'maker.intrangeiter_new', 'builtin_code': 'maker.builtin_code', 'builtin_function' : 'maker.builtin_function', 'enumerate_new': 'maker.enumerate_new', diff --git a/pypy/module/_pickle_support/maker.py b/pypy/module/_pickle_support/maker.py --- a/pypy/module/_pickle_support/maker.py +++ b/pypy/module/_pickle_support/maker.py @@ -62,9 +62,15 @@ new_generator = instantiate(GeneratorIteratorWithDel) return space.wrap(new_generator) -def rangeiter_new(space, w_start, w_step, w_len, w_index): - from pypy.module.__builtin__.functional import W_RangeIterator - new_iter = W_RangeIterator(space, w_start, w_step, w_len, w_index) +def longrangeiter_new(space, w_start, w_step, w_len, w_index): + from pypy.module.__builtin__.functional import W_LongRangeIterator + new_iter = W_LongRangeIterator(space, w_start, w_step, w_len, w_index) + return space.wrap(new_iter) + + at unwrap_spec(current=int, remaining=int, step=int) +def intrangeiter_new(space, current, remaining, step): + from pypy.module.__builtin__.functional import W_IntRangeIterator + new_iter = W_IntRangeIterator(space, current, remaining, step) return space.wrap(new_iter) def operationerror_new(space): diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py --- a/pypy/module/exceptions/interp_exceptions.py +++ b/pypy/module/exceptions/interp_exceptions.py @@ -76,8 +76,8 @@ from pypy.interpreter.typedef import (TypeDef, GetSetProperty, descr_get_dict, descr_set_dict, descr_del_dict) from pypy.interpreter.gateway import interp2app -from pypy.interpreter.error import OperationError -from pypy.interpreter.pytraceback import check_traceback +from pypy.interpreter.error import OperationError, setup_context +from pypy.interpreter.pytraceback import PyTraceback, check_traceback from rpython.rlib import rwin32 @@ -156,7 +156,27 @@ self.w_cause = w_newcause def descr_getcontext(self, space): - return self.w_context + w_context = self.w_context + if w_context is None: + self.w_context = w_context = self._setup_context(space) + return w_context + + def _setup_context(self, space): + """Lazily determine __context__ from w_traceback""" + # XXX: w_traceback can be overwritten: it's not necessarily the + # authoratative traceback! + last_operr = None + w_traceback = self.w_traceback + if w_traceback is not None and isinstance(w_traceback, PyTraceback): + ec = space.getexecutioncontext() + # search for __context__ beginning in the previous frame. A + # __context__ from the top most frame would have already + # been handled by OperationError.record_context + last_operr = ec.last_operr(space, w_traceback.frame.f_backref()) + if last_operr is None: + # no __context__ + return space.w_None + return setup_context(space, self, last_operr.get_w_value(space)) def descr_setcontext(self, space, w_newcontext): if not (space.is_w(w_newcontext, space.w_None) or @@ -167,7 +187,6 @@ self.w_context = w_newcontext def descr_gettraceback(self, space): - from pypy.interpreter.pytraceback import PyTraceback tb = self.w_traceback if tb is not None and isinstance(tb, PyTraceback): # tb escapes to app level (see OperationError.get_traceback) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -24,7 +24,7 @@ class W_UnicodeObject(W_Root): import_from_mixin(StringMethods) - _immutable_fields_ = ['_value'] + _immutable_fields_ = ['_value', '_utf8?'] def __init__(w_self, unistr): assert isinstance(unistr, unicode) diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py --- a/pypy/tool/release/package.py +++ b/pypy/tool/release/package.py @@ -133,7 +133,7 @@ modules = ['_sqlite3'] subprocess.check_call([str(pypy_c), '-c', 'import _sqlite3']) if not sys.platform == 'win32': - modules += ['_curses', 'syslog', 'gdbm', '_sqlite3'] + modules += ['_curses', 'syslog', '_gdbm', '_sqlite3'] if not options.no_tk: modules.append(('_tkinter')) for module in modules: @@ -402,10 +402,10 @@ ''' -gdbm_bit = '''gdbm +gdbm_bit = '''_gdbm ---- -The gdbm module includes code from gdbm.h, which is distributed under the terms +The _gdbm module includes code from gdbm.h, which is distributed under the terms of the GPL license version 2 or any later version. ''' From noreply at buildbot.pypy.org Thu Jun 19 21:40:35 2014 From: noreply at buildbot.pypy.org (arigo) Date: Thu, 19 Jun 2014 21:40:35 +0200 (CEST) Subject: [pypy-commit] pypy.org extradoc: update the values Message-ID: <20140619194035.8D6701C0299@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: extradoc Changeset: r509:19c72646af82 Date: 2014-06-19 21:40 +0200 http://bitbucket.org/pypy/pypy.org/changeset/19c72646af82/ Log: update the values diff --git a/don1.html b/don1.html --- a/don1.html +++ b/don1.html @@ -9,13 +9,13 @@ - $51043 of $105000 (48.6%) + $51688 of $105000 (49.2%)
diff --git a/don3.html b/don3.html --- a/don3.html +++ b/don3.html @@ -9,13 +9,13 @@ - $48130 of $60000 (80.2%) + $48322 of $60000 (80.5%)
diff --git a/don4.html b/don4.html --- a/don4.html +++ b/don4.html @@ -9,7 +9,7 @@ @@ -17,7 +17,7 @@ 2nd call: - $2097 of $80000 (2.6%) + $2829 of $80000 (3.5%)
From noreply at buildbot.pypy.org Fri Jun 20 07:43:00 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Fri, 20 Jun 2014 07:43:00 +0200 (CEST) Subject: [pypy-commit] pypy py3.3: merge py3k Message-ID: <20140620054300.A03DF1C0299@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: py3.3 Changeset: r72103:7c07509a944c Date: 2014-06-19 22:41 -0700 http://bitbucket.org/pypy/pypy/changeset/7c07509a944c/ Log: merge py3k diff --git a/lib_pypy/gdbm.py b/lib_pypy/_gdbm.py rename from lib_pypy/gdbm.py rename to lib_pypy/_gdbm.py --- a/lib_pypy/gdbm.py +++ b/lib_pypy/_gdbm.py @@ -1,4 +1,5 @@ import cffi, os +import sys ffi = cffi.FFI() ffi.cdef(''' @@ -46,12 +47,15 @@ # failure must be due to missing gdbm dev libs raise ImportError('%s: %s' %(e.__class__.__name__, e)) -class error(Exception): +class error(IOError): pass def _fromstr(key): - if not isinstance(key, str): - raise TypeError("gdbm mappings have string indices only") + if isinstance(key, str): + key = key.encode(sys.getdefaultencoding()) + elif not isinstance(key, bytes): + msg = "gdbm mappings have bytes or string indices only, not {!r}" + raise TypeError(msg.format(type(key).__name__)) return {'dptr': ffi.new("char[]", key), 'dsize': len(key)} class gdbm(object): @@ -98,21 +102,27 @@ return lib.gdbm_exists(self.ll_dbm, _fromstr(key)) has_key = __contains__ - def __getitem__(self, key): + def get(self, key, default=None): self._check_closed() drec = lib.gdbm_fetch(self.ll_dbm, _fromstr(key)) if not drec.dptr: - raise KeyError(key) - res = str(ffi.buffer(drec.dptr, drec.dsize)) + return default + res = bytes(ffi.buffer(drec.dptr, drec.dsize)) lib.free(drec.dptr) return res + def __getitem__(self, key): + value = self.get(key) + if value is None: + raise KeyError(key) + return value + def keys(self): self._check_closed() l = [] key = lib.gdbm_firstkey(self.ll_dbm) while key.dptr: - l.append(str(ffi.buffer(key.dptr, key.dsize))) + l.append(bytes(ffi.buffer(key.dptr, key.dsize))) nextkey = lib.gdbm_nextkey(self.ll_dbm, key) lib.free(key.dptr) key = nextkey @@ -122,7 +132,7 @@ self._check_closed() key = lib.gdbm_firstkey(self.ll_dbm) if key.dptr: - res = str(ffi.buffer(key.dptr, key.dsize)) + res = bytes(ffi.buffer(key.dptr, key.dsize)) lib.free(key.dptr) return res @@ -130,7 +140,7 @@ self._check_closed() key = lib.gdbm_nextkey(self.ll_dbm, _fromstr(key)) if key.dptr: - res = str(ffi.buffer(key.dptr, key.dsize)) + res = bytes(ffi.buffer(key.dptr, key.dsize)) lib.free(key.dptr) return res @@ -149,7 +159,18 @@ self._check_closed() lib.gdbm_sync(self.ll_dbm) + def setdefault(self, key, default=None): + value = self.get(key) + if value is not None: + return value + self[key] = default + return default + def open(filename, flags='r', mode=0o666): + if not isinstance(filename, str): + raise TypeError("must be str, not %s" % type(filename).__name__) + filename = filename.encode(sys.getdefaultencoding()) + if flags[0] == 'r': iflags = lib.GDBM_READER elif flags[0] == 'w': diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py --- a/pypy/interpreter/argument.py +++ b/pypy/interpreter/argument.py @@ -283,8 +283,7 @@ missing += 1 continue name = signature.kwonlyargnames[i - co_argcount] - w_name = self.space.wrap(name) - w_def = self.space.finditem(w_kw_defs, w_name) + w_def = self.space.finditem_str(w_kw_defs, name) if w_def is not None: scope_w[i] = w_def else: diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py --- a/pypy/interpreter/error.py +++ b/pypy/interpreter/error.py @@ -36,44 +36,11 @@ def setup(self, w_type, w_value=None): assert w_type is not None - from pypy.objspace.std.typeobject import W_TypeObject self.w_type = w_type self._w_value = w_value - # HACK: isinstance(w_type, W_TypeObject) won't translate under - # the fake objspace, but w_type.__class__ is W_TypeObject does - # and short circuits to a False constant there, causing the - # isinstance to be ignored =[ - if (w_type is not None and w_type.__class__ is W_TypeObject and - isinstance(w_type, W_TypeObject)): - self.setup_context(w_type.space) if not we_are_translated(): self.debug_excs = [] - def setup_context(self, space): - # Implicit exception chaining - last_operror = space.getexecutioncontext().sys_exc_info() - if (last_operror is None or - last_operror is get_cleared_operation_error(space)): - return - - # We must normalize the value right now to check for cycles - self.normalize_exception(space) - w_value = self.get_w_value(space) - w_last_value = last_operror.get_w_value(space) - if not space.is_w(w_value, w_last_value): - # Avoid reference cycles through the context chain. This is - # O(chain length) but context chains are usually very short. - w_obj = w_last_value - while True: - w_context = space.getattr(w_obj, space.wrap('__context__')) - if space.is_w(w_context, space.w_None): - break - if space.is_w(w_context, w_value): - space.setattr(w_obj, space.wrap('__context__'), space.w_None) - break - w_obj = w_context - space.setattr(w_value, space.wrap('__context__'), w_last_value) - def clear(self, space): # XXX remove this method. The point is that we cannot always # hack at 'self' to clear w_type and _w_value, because in some @@ -353,6 +320,53 @@ """ self._application_traceback = traceback + def record_context(self, space, frame): + """Record a __context__ for this exception from the current + frame if one exists. + + __context__ is otherwise lazily determined from the + traceback. However the current frame.last_exception must be + checked for a __context__ before this OperationError overwrites + it (making the previous last_exception unavailable later on). + """ + last_exception = frame.last_exception + if (last_exception is not None and not frame.hide() or + last_exception is get_cleared_operation_error(space)): + # normalize w_value so setup_context can check for cycles + self.normalize_exception(space) + w_value = self.get_w_value(space) + w_last = last_exception.get_w_value(space) + w_context = setup_context(space, w_value, w_last, lazy=True) + space.setattr(w_value, space.wrap('__context__'), w_context) + + +def setup_context(space, w_exc, w_last, lazy=False): + """Determine the __context__ for w_exc from w_last and break + reference cycles in the __context__ chain. + """ + from pypy.module.exceptions.interp_exceptions import W_BaseException + if space.is_w(w_exc, w_last): + w_last = space.w_None + # w_last may also be space.w_None if from ClearedOpErr + if not space.is_w(w_last, space.w_None): + # Avoid reference cycles through the context chain. This is + # O(chain length) but context chains are usually very short. + w_obj = w_last + while True: + assert isinstance(w_obj, W_BaseException) + if lazy: + w_context = w_obj.w_context + else: + # triggers W_BaseException._setup_context + w_context = space.getattr(w_obj, space.wrap('__context__')) + if space.is_none(w_context): + break + if space.is_w(w_context, w_exc): + w_obj.w_context = space.w_None + break + w_obj = w_context + return w_last + class ClearedOpErr: def __init__(self, space): diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py --- a/pypy/interpreter/executioncontext.py +++ b/pypy/interpreter/executioncontext.py @@ -202,18 +202,21 @@ self._trace(frame, 'exception', None, operationerr) #operationerr.print_detailed_traceback(self.space) + @staticmethod + def last_operr(space, frame): + while frame: + last = frame.last_exception + if (last is not None and + (not frame.hide() or + last is get_cleared_operation_error(space))): + return last + frame = frame.f_backref() + return None + def sys_exc_info(self): # attn: the result is not the wrapped sys.exc_info() !!! """Implements sys.exc_info(). Return an OperationError instance or None.""" - frame = self.gettopframe() - while frame: - if frame.last_exception is not None: - if (not frame.hide() or - frame.last_exception is - get_cleared_operation_error(self.space)): - return frame.last_exception - frame = frame.f_backref() - return None + return self.last_operr(self.space, self.gettopframe()) def set_sys_exc_info(self, operror): frame = self.gettopframe_nohidden() diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py --- a/pypy/interpreter/function.py +++ b/pypy/interpreter/function.py @@ -32,7 +32,8 @@ 'w_func_globals?', 'closure?[*]', 'defs_w?[*]', - 'name?'] + 'name?', + 'w_kw_defs?'] def __init__(self, space, code, w_globals=None, defs_w=[], w_kw_defs=None, closure=None, w_ann=None, forcename=None): diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py --- a/pypy/interpreter/pyframe.py +++ b/pypy/interpreter/pyframe.py @@ -164,12 +164,9 @@ for i in range(nfreevars): self.cells[i + ncellvars] = outer_func.closure[i] - def is_generator(self): - return self.getcode().co_flags & pycode.CO_GENERATOR - def run(self): """Start this frame's execution.""" - if self.is_generator(): + if self.getcode().co_flags & pycode.CO_GENERATOR: if self.getcode().co_flags & pycode.CO_YIELD_INSIDE_TRY: from pypy.interpreter.generator import GeneratorIteratorWithDel return self.space.wrap(GeneratorIteratorWithDel(self)) @@ -514,10 +511,10 @@ for i in range(min(len(varnames), self.getcode().co_nlocals)): name = varnames[i] w_value = self.locals_stack_w[i] - w_name = self.space.wrap(name.decode('utf-8')) if w_value is not None: - self.space.setitem(self.w_locals, w_name, w_value) + self.space.setitem_str(self.w_locals, name, w_value) else: + w_name = self.space.wrap(name.decode('utf-8')) try: self.space.delitem(self.w_locals, w_name) except OperationError as e: @@ -537,8 +534,7 @@ except ValueError: pass else: - w_name = self.space.wrap(name) - self.space.setitem(self.w_locals, w_name, w_value) + self.space.setitem_str(self.w_locals, name, w_value) @jit.unroll_safe @@ -551,13 +547,9 @@ new_fastlocals_w = [None] * numlocals for i in range(min(len(varnames), numlocals)): - w_name = self.space.wrap(varnames[i].decode('utf-8')) - try: - w_value = self.space.getitem(self.w_locals, w_name) - except OperationError, e: - if not e.match(self.space, self.space.w_KeyError): - raise - else: + name = varnames[i] + w_value = self.space.finditem_str(self.w_locals, name) + if w_value is not None: new_fastlocals_w[i] = w_value self.setfastscope(new_fastlocals_w) @@ -566,13 +558,8 @@ for i in range(len(freevarnames)): name = freevarnames[i] cell = self.cells[i] - w_name = self.space.wrap(name) - try: - w_value = self.space.getitem(self.w_locals, w_name) - except OperationError, e: - if not e.match(self.space, self.space.w_KeyError): - raise - else: + w_value = self.space.finditem_str(self.w_locals, name) + if w_value is not None: cell.set(w_value) @jit.unroll_safe diff --git a/pypy/interpreter/pytraceback.py b/pypy/interpreter/pytraceback.py --- a/pypy/interpreter/pytraceback.py +++ b/pypy/interpreter/pytraceback.py @@ -57,6 +57,7 @@ tb = operror.get_traceback() tb = PyTraceback(space, frame, last_instruction, tb) operror.set_traceback(tb) + operror.record_context(space, frame) def check_traceback(space, w_tb, msg): diff --git a/pypy/interpreter/test/test_raise.py b/pypy/interpreter/test/test_raise.py --- a/pypy/interpreter/test/test_raise.py +++ b/pypy/interpreter/test/test_raise.py @@ -369,6 +369,44 @@ else: fail("No exception raised") + def test_context_once_removed(self): + context = IndexError() + def func1(): + func2() + def func2(): + try: + 1/0 + except ZeroDivisionError as e: + assert e.__context__ is context + else: + fail('No exception raised') + try: + raise context + except: + func1() + + @py.test.mark.xfail(reason="A somewhat contrived case that may burden the " + "JIT to fully support") + def test_frame_spanning_cycle_broken(self): + context = IndexError() + def func(): + try: + 1/0 + except Exception as e1: + try: + raise context + except Exception as e2: + assert e2.__context__ is e1 + # XXX: + assert e1.__context__ is None + else: + fail('No exception raised') + try: + raise context + except: + func() + + class AppTestTraceback: def test_raise_with___traceback__(self): diff --git a/pypy/module/__builtin__/functional.py b/pypy/module/__builtin__/functional.py --- a/pypy/module/__builtin__/functional.py +++ b/pypy/module/__builtin__/functional.py @@ -6,7 +6,8 @@ from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.error import OperationError, oefmt -from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault +from pypy.interpreter.gateway import ( + interp2app, interpindirect2app, unwrap_spec) from pypy.interpreter.typedef import TypeDef, interp_attrproperty_w from rpython.rlib import jit from rpython.rlib.objectmodel import specialize @@ -307,15 +308,19 @@ class W_Range(W_Root): - def __init__(self, w_start, w_stop, w_step, w_length): + def __init__(self, w_start, w_stop, w_step, w_length, promote_step=False): self.w_start = w_start self.w_stop = w_stop self.w_step = w_step self.w_length = w_length + self.promote_step = promote_step - @unwrap_spec(w_step = WrappedDefault(1)) def descr_new(space, w_subtype, w_start, w_stop=None, w_step=None): w_start = space.index(w_start) + promote_step = False + if space.is_none(w_step): # no step argument provided + w_step = space.wrap(1) + promote_step = True if space.is_none(w_stop): # only 1 argument provided w_start, w_stop = space.newint(0), w_start else: @@ -331,7 +336,7 @@ "step argument must not be zero")) w_length = compute_range_length(space, w_start, w_stop, w_step) obj = space.allocate_instance(W_Range, w_subtype) - W_Range.__init__(obj, w_start, w_stop, w_step, w_length) + W_Range.__init__(obj, w_start, w_stop, w_step, w_length, promote_step) return space.wrap(obj) def descr_repr(self, space): @@ -386,8 +391,19 @@ return self._compute_item(space, w_index) def descr_iter(self, space): - return space.wrap(W_RangeIterator( - space, self.w_start, self.w_step, self.w_length)) + try: + start = space.int_w(self.w_start) + stop = space.int_w(self.w_stop) + step = space.int_w(self.w_step) + length = space.int_w(self.w_length) + except OperationError as e: + pass + else: + if self.promote_step: + return W_IntRangeStepOneIterator(space, start, stop) + return W_IntRangeIterator(space, start, length, step) + return W_LongRangeIterator(space, self.w_start, self.w_step, + self.w_length) def descr_reversed(self, space): # lastitem = self.start + (self.length-1) * self.step @@ -395,7 +411,7 @@ self.w_start, space.mul(space.sub(self.w_length, space.newint(1)), self.w_step)) - return space.wrap(W_RangeIterator( + return space.wrap(W_LongRangeIterator( space, w_lastitem, space.neg(self.w_step), self.w_length)) def descr_reduce(self, space): @@ -493,7 +509,22 @@ W_Range.typedef.acceptable_as_base_class = False -class W_RangeIterator(W_Root): +class W_AbstractRangeIterator(W_Root): + + def descr_iter(self, space): + return space.wrap(self) + + def descr_len(self, space): + raise NotImplementedError + + def descr_next(self, space): + raise NotImplementedError + + def descr_reduce(self, space): + raise NotImplementedError + + +class W_LongRangeIterator(W_AbstractRangeIterator): def __init__(self, space, w_start, w_step, w_len, w_index=None): self.w_start = w_start self.w_step = w_step @@ -502,9 +533,6 @@ w_index = space.newint(0) self.w_index = w_index - def descr_iter(self, space): - return space.wrap(self) - def descr_next(self, space): if space.is_true(space.lt(self.w_index, self.w_len)): w_index = space.add(self.w_index, space.newint(1)) @@ -519,23 +547,75 @@ def descr_reduce(self, space): from pypy.interpreter.mixedmodule import MixedModule + w_mod = space.getbuiltinmodule('_pickle_support') + mod = space.interp_w(MixedModule, w_mod) + w_args = space.newtuple([self.w_start, self.w_step, self.w_len, + self.w_index]) + return space.newtuple([mod.get('longrangeiter_new'), w_args]) + + +class W_IntRangeIterator(W_AbstractRangeIterator): + + def __init__(self, space, current, remaining, step): + self.current = current + self.remaining = remaining + self.step = step + + def descr_next(self, space): + return self.next(space) + + def next(self, space): + if self.remaining > 0: + item = self.current + self.current = item + self.step + self.remaining -= 1 + return space.wrap(item) + raise OperationError(space.w_StopIteration, space.w_None) + + def descr_len(self, space): + return self.get_remaining(space) + + def descr_reduce(self, space): + from pypy.interpreter.mixedmodule import MixedModule w_mod = space.getbuiltinmodule('_pickle_support') mod = space.interp_w(MixedModule, w_mod) + new_inst = mod.get('intrangeiter_new') + w = space.wrap + nt = space.newtuple - return space.newtuple( - [mod.get('rangeiter_new'), - space.newtuple([self.w_start, self.w_step, - self.w_len, self.w_index]), - ]) + tup = [w(self.current), self.get_remaining(space), w(self.step)] + return nt([new_inst, nt(tup)]) + def get_remaining(self, space): + return space.wrap(self.remaining) -W_RangeIterator.typedef = TypeDef("rangeiterator", - __iter__ = interp2app(W_RangeIterator.descr_iter), - __length_hint__ = interp2app(W_RangeIterator.descr_len), - __next__ = interp2app(W_RangeIterator.descr_next), - __reduce__ = interp2app(W_RangeIterator.descr_reduce), + +class W_IntRangeStepOneIterator(W_IntRangeIterator): + _immutable_fields_ = ['stop'] + + def __init__(self, space, start, stop): + self.current = start + self.stop = stop + self.step = 1 + + def next(self, space): + if self.current < self.stop: + item = self.current + self.current = item + 1 + return space.wrap(item) + raise OperationError(space.w_StopIteration, space.w_None) + + def get_remaining(self, space): + return space.wrap(self.stop - self.current) + + +W_AbstractRangeIterator.typedef = TypeDef("rangeiterator", + __iter__ = interp2app(W_AbstractRangeIterator.descr_iter), + __length_hint__ = interpindirect2app(W_AbstractRangeIterator.descr_len), + __next__ = interpindirect2app(W_AbstractRangeIterator.descr_next), + __reduce__ = interpindirect2app(W_AbstractRangeIterator.descr_reduce), ) -W_RangeIterator.typedef.acceptable_as_base_class = False +W_AbstractRangeIterator.typedef.acceptable_as_base_class = False class W_Map(W_Root): diff --git a/pypy/module/_pickle_support/__init__.py b/pypy/module/_pickle_support/__init__.py --- a/pypy/module/_pickle_support/__init__.py +++ b/pypy/module/_pickle_support/__init__.py @@ -19,7 +19,8 @@ 'frame_new' : 'maker.frame_new', 'traceback_new' : 'maker.traceback_new', 'generator_new' : 'maker.generator_new', - 'rangeiter_new': 'maker.rangeiter_new', + 'longrangeiter_new': 'maker.longrangeiter_new', + 'intrangeiter_new': 'maker.intrangeiter_new', 'builtin_code': 'maker.builtin_code', 'builtin_function' : 'maker.builtin_function', 'enumerate_new': 'maker.enumerate_new', diff --git a/pypy/module/_pickle_support/maker.py b/pypy/module/_pickle_support/maker.py --- a/pypy/module/_pickle_support/maker.py +++ b/pypy/module/_pickle_support/maker.py @@ -62,9 +62,15 @@ new_generator = instantiate(GeneratorIteratorWithDel) return space.wrap(new_generator) -def rangeiter_new(space, w_start, w_step, w_len, w_index): - from pypy.module.__builtin__.functional import W_RangeIterator - new_iter = W_RangeIterator(space, w_start, w_step, w_len, w_index) +def longrangeiter_new(space, w_start, w_step, w_len, w_index): + from pypy.module.__builtin__.functional import W_LongRangeIterator + new_iter = W_LongRangeIterator(space, w_start, w_step, w_len, w_index) + return space.wrap(new_iter) + + at unwrap_spec(current=int, remaining=int, step=int) +def intrangeiter_new(space, current, remaining, step): + from pypy.module.__builtin__.functional import W_IntRangeIterator + new_iter = W_IntRangeIterator(space, current, remaining, step) return space.wrap(new_iter) def operationerror_new(space): diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py --- a/pypy/module/exceptions/interp_exceptions.py +++ b/pypy/module/exceptions/interp_exceptions.py @@ -75,9 +75,9 @@ from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.typedef import (TypeDef, GetSetProperty, descr_get_dict, descr_set_dict, descr_del_dict) -from pypy.interpreter.gateway import interp2app, unwrap_spec -from pypy.interpreter.error import OperationError -from pypy.interpreter.pytraceback import check_traceback +from pypy.interpreter.gateway import interp2app +from pypy.interpreter.error import OperationError, setup_context +from pypy.interpreter.pytraceback import PyTraceback, check_traceback from rpython.rlib import rwin32 @@ -163,7 +163,27 @@ self.suppress_context = True def descr_getcontext(self, space): - return self.w_context + w_context = self.w_context + if w_context is None: + self.w_context = w_context = self._setup_context(space) + return w_context + + def _setup_context(self, space): + """Lazily determine __context__ from w_traceback""" + # XXX: w_traceback can be overwritten: it's not necessarily the + # authoratative traceback! + last_operr = None + w_traceback = self.w_traceback + if w_traceback is not None and isinstance(w_traceback, PyTraceback): + ec = space.getexecutioncontext() + # search for __context__ beginning in the previous frame. A + # __context__ from the top most frame would have already + # been handled by OperationError.record_context + last_operr = ec.last_operr(space, w_traceback.frame.f_backref()) + if last_operr is None: + # no __context__ + return space.w_None + return setup_context(space, self, last_operr.get_w_value(space)) def descr_setcontext(self, space, w_newcontext): if not (space.is_w(w_newcontext, space.w_None) or @@ -174,7 +194,6 @@ self.w_context = w_newcontext def descr_gettraceback(self, space): - from pypy.interpreter.pytraceback import PyTraceback tb = self.w_traceback if tb is not None and isinstance(tb, PyTraceback): # tb escapes to app level (see OperationError.get_traceback) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -24,7 +24,7 @@ class W_UnicodeObject(W_Root): import_from_mixin(StringMethods) - _immutable_fields_ = ['_value'] + _immutable_fields_ = ['_value', '_utf8?'] def __init__(w_self, unistr): assert isinstance(unistr, unicode) diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py --- a/pypy/tool/release/package.py +++ b/pypy/tool/release/package.py @@ -133,7 +133,7 @@ modules = ['_sqlite3'] subprocess.check_call([str(pypy_c), '-c', 'import _sqlite3']) if not sys.platform == 'win32': - modules += ['_curses', 'syslog', 'gdbm', '_sqlite3'] + modules += ['_curses', 'syslog', '_gdbm', '_sqlite3'] if not options.no_tk: modules.append(('_tkinter')) for module in modules: @@ -402,10 +402,10 @@ ''' -gdbm_bit = '''gdbm +gdbm_bit = '''_gdbm ---- -The gdbm module includes code from gdbm.h, which is distributed under the terms +The _gdbm module includes code from gdbm.h, which is distributed under the terms of the GPL license version 2 or any later version. ''' From noreply at buildbot.pypy.org Fri Jun 20 09:57:16 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Fri, 20 Jun 2014 09:57:16 +0200 (CEST) Subject: [pypy-commit] stmgc parallel-pulling: a bit of code dedup Message-ID: <20140620075716.55F6D1C0299@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: parallel-pulling Changeset: r1256:b4e04b28f446 Date: 2014-06-20 09:57 +0200 http://bitbucket.org/pypy/stmgc/changeset/b4e04b28f446/ Log: a bit of code dedup diff --git a/c7/stm/core.c b/c7/stm/core.c --- a/c7/stm/core.c +++ b/c7/stm/core.c @@ -301,61 +301,68 @@ return false; } -static void copy_object_to_shared(object_t *obj, int source_segment_num) +static void copy_obj_from_to_segment( + int from_segment_num, int to_segment_num, object_t *obj, + bool nodebug) { - /* Only used by major GC. XXX There is a lot of code duplication - with synchronize_object_now() but I don't completely see how to - improve... - */ - assert(!_is_young(obj)); + /* page-wise copy of an object from one segment to another */ - char *segment_base = get_segment_base(source_segment_num); + OPT_ASSERT(from_segment_num != to_segment_num); + + char *from_base = get_segment_base(from_segment_num); + char *to_base = get_segment_base(to_segment_num); + + char *realobj = REAL_ADDRESS(from_base, obj); + ssize_t size = stmcb_size_rounded_up((struct object_s *)realobj); + + /* XXX: copied from sync_object_now */ uintptr_t start = (uintptr_t)obj; uintptr_t first_page = start / 4096UL; - struct object_s *realobj = (struct object_s *) - REAL_ADDRESS(segment_base, obj); - if (realobj->stm_flags & GCFLAG_SMALL_UNIFORM) { + if (((struct object_s *)realobj)->stm_flags & GCFLAG_SMALL_UNIFORM) { abort();//XXX WRITE THE FAST CASE - } - else { - ssize_t obj_size = stmcb_size_rounded_up(realobj); - assert(obj_size >= 16); - uintptr_t end = start + obj_size; + } else { + uintptr_t end = start + size; uintptr_t last_page = (end - 1) / 4096UL; for (; first_page <= last_page; first_page++) { + uintptr_t copy_size; + if (first_page == last_page) { + /* this is the final fragment */ + copy_size = end - start; + } + else { + /* this is a non-final fragment, going up to the + page's end */ + copy_size = 4096 - (start & 4095); + } - /* Copy the object into the shared page, if needed */ - if (is_private_page(source_segment_num, first_page)) { - - uintptr_t copy_size; - if (first_page == last_page) { - /* this is the final fragment */ - copy_size = end - start; - } - else { - /* this is a non-final fragment, going up to the - page's end */ - copy_size = 4096 - (start & 4095); - } - /* double-check that the result fits in one page */ - assert(copy_size > 0); - assert(copy_size + (start & 4095) <= 4096); - - char *src = REAL_ADDRESS(segment_base, start); - char *dst = REAL_ADDRESS(stm_object_pages, start); + /* copy from shared page to private, if needed */ + char *dst = REAL_ADDRESS(to_base, start); + char *src = REAL_ADDRESS(from_base, start); + if ((from_segment_num == 0 || is_private_page(from_segment_num, first_page)) + && (to_segment_num == 0 || is_private_page(to_segment_num, first_page))) { + /* at least one of them is a private page, or both */ if (copy_size == 4096) pagecopy(dst, src); else memcpy(dst, src, copy_size); } + else if (!nodebug) { + /* nodebug=true only used for the trick in major_reshare_pages that + removes the privatization bit even if contents differ */ + assert(memcmp(dst, src, copy_size) == 0); /* same page */ + } start = (start + 4096) & ~4095; } } + + write_fence(); } + + static void synchronize_object_now(object_t *obj, bool lazy_on_commit) { /* Copy around the version of 'obj' that lives in our own segment. @@ -591,67 +598,7 @@ abort_with_mutex(); } -static void copy_objs_from_segment_0(int segment_num, struct list_s *lst) -{ - /* pull the list of objects from segment 0. This either resets - modifications or just updates the view of the current segment. - */ - char *local_base = get_segment_base(segment_num); - char *zero_base = get_segment_base(0); - LIST_FOREACH_R(lst, object_t * /*item*/, - ({ - /* memcpy in the opposite direction than - push_modified_to_other_segments() */ - char *realobj = REAL_ADDRESS(zero_base, item); - ssize_t size = stmcb_size_rounded_up((struct object_s *)realobj); - - /* XXX: copied from sync_object_now */ - uintptr_t start = (uintptr_t)item; - uintptr_t first_page = start / 4096UL; - - if (((struct object_s *)realobj)->stm_flags & GCFLAG_SMALL_UNIFORM) { - abort();//XXX WRITE THE FAST CASE - } - else { - uintptr_t end = start + size; - uintptr_t last_page = (end - 1) / 4096UL; - long myself = segment_num; - - for (; first_page <= last_page; first_page++) { - uintptr_t copy_size; - if (first_page == last_page) { - /* this is the final fragment */ - copy_size = end - start; - } - else { - /* this is a non-final fragment, going up to the - page's end */ - copy_size = 4096 - (start & 4095); - } - - /* copy from shared page to private, if needed */ - char *dst = REAL_ADDRESS(local_base, start); - char *src = REAL_ADDRESS(zero_base, start); - if (is_private_page(myself, first_page)) { - if (copy_size == 4096) - pagecopy(dst, src); - else - memcpy(dst, src, copy_size); - } - else { - assert(memcmp(dst, src, copy_size) == 0); /* same page */ - } - - start = (start + 4096) & ~4095; - } - } - - /* all objs in segment 0 should have the WB flag: */ - assert(((struct object_s *)realobj)->stm_flags & GCFLAG_WRITE_BARRIER); - })); - write_fence(); -} static void pull_committed_changes(struct stm_priv_segment_info_s *pseg) { @@ -659,7 +606,14 @@ if (list_count(lst)) { dprintf(("pulling %lu objects from shared segment\n", list_count(lst))); - copy_objs_from_segment_0(pseg->pub.segment_num, lst); + + LIST_FOREACH_R(lst, object_t * /*item*/, + ({ + /* memcpy in the opposite direction than + push_modified_to_other_segments() */ + copy_obj_from_to_segment(0, pseg->pub.segment_num, item, false); + })); + list_clear(lst); } } @@ -678,7 +632,6 @@ */ struct stm_priv_segment_info_s *pseg = get_priv_segment(segment_num); char *local_base = get_segment_base(segment_num); - char *remote_base = get_segment_base(0); LIST_FOREACH_R( pseg->modified_old_objects, @@ -686,16 +639,14 @@ ({ /* memcpy in the opposite direction than push_modified_to_other_segments() */ - char *src = REAL_ADDRESS(remote_base, item); - char *dst = REAL_ADDRESS(local_base, item); - ssize_t size = stmcb_size_rounded_up((struct object_s *)src); - memcpy(dst, src, size); + copy_obj_from_to_segment(0, segment_num, item, false); /* objects in 'modified_old_objects' usually have the WRITE_BARRIER flag, unless they have been modified recently. Ignore the old flag; after copying from the other segment, we should have the flag. */ - assert(((struct object_s *)dst)->stm_flags & GCFLAG_WRITE_BARRIER); + char *dst = REAL_ADDRESS(local_base, item); + OPT_ASSERT(((struct object_s *)dst)->stm_flags & GCFLAG_WRITE_BARRIER); /* write all changes to the object before we release the write lock below. This is needed because we need to @@ -704,7 +655,7 @@ write_fence() ensures in particular that 'src' has been fully read before we release the lock: reading it is necessary to write 'dst'. */ - write_fence(); + //write_fence(); - done by copy_obj_from_to_segment() /* clear the write-lock */ uintptr_t lock_idx = (((uintptr_t)item) >> 4) - WRITELOCK_START; diff --git a/c7/stm/core.h b/c7/stm/core.h --- a/c7/stm/core.h +++ b/c7/stm/core.h @@ -260,7 +260,8 @@ asm("/* workaround for llvm bug */"); } -static void copy_object_to_shared(object_t *obj, int source_segment_num); +static void copy_obj_from_to_segment( + int from_segment_num, int to_segment_num, object_t *obj, bool nodebug); static void synchronize_object_now(object_t *obj, bool lazy_on_commit); static void pull_committed_changes(struct stm_priv_segment_info_s *pseg); diff --git a/c7/stm/gcpage.c b/c7/stm/gcpage.c --- a/c7/stm/gcpage.c +++ b/c7/stm/gcpage.c @@ -281,7 +281,11 @@ */ struct list_s *lst = get_priv_segment(i)->large_overflow_objects; if (lst != NULL) { - LIST_FOREACH_R(lst, object_t *, copy_object_to_shared(item, i)); + LIST_FOREACH_R(lst, object_t *, + ({ + copy_obj_from_to_segment(i, 0, item, + /* nodebug */ true); + })); } } From noreply at buildbot.pypy.org Fri Jun 20 10:18:20 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Fri, 20 Jun 2014 10:18:20 +0200 (CEST) Subject: [pypy-commit] stmgc parallel-pulling: add timing info for parallel-pulling. As expected, raytrace spends only ~1% of Message-ID: <20140620081820.382371C0299@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: parallel-pulling Changeset: r1257:e8a0753c37ff Date: 2014-06-20 10:18 +0200 http://bitbucket.org/pypy/stmgc/changeset/e8a0753c37ff/ Log: add timing info for parallel-pulling. As expected, raytrace spends only ~1% of its time pulling, threadworms spends ~0.13s. Both have a huge "sync pause" time, so the biggest contribution to that is probably the time required for all threads to reach the safe points. diff --git a/c7/stm/core.c b/c7/stm/core.c --- a/c7/stm/core.c +++ b/c7/stm/core.c @@ -607,6 +607,10 @@ if (list_count(lst)) { dprintf(("pulling %lu objects from shared segment\n", list_count(lst))); + /* not completely accurate as it counts for the thread doing the + pulling (may be someone else during major collections) */ + enum stm_time_e old_state = change_timing_state(STM_TIME_PULL_OBJS); + LIST_FOREACH_R(lst, object_t * /*item*/, ({ /* memcpy in the opposite direction than @@ -615,6 +619,7 @@ })); list_clear(lst); + change_timing_state(old_state); } } diff --git a/c7/stm/timing.c b/c7/stm/timing.c --- a/c7/stm/timing.c +++ b/c7/stm/timing.c @@ -64,6 +64,7 @@ "minor gc", "major gc", "sync pause", + "pull objs" }; void stm_flush_timing(stm_thread_local_t *tl, int verbose) diff --git a/c7/stmgc.h b/c7/stmgc.h --- a/c7/stmgc.h +++ b/c7/stmgc.h @@ -71,6 +71,7 @@ STM_TIME_MINOR_GC, STM_TIME_MAJOR_GC, STM_TIME_SYNC_PAUSE, + STM_TIME_PULL_OBJS, _STM_TIME_N }; From noreply at buildbot.pypy.org Fri Jun 20 14:40:25 2014 From: noreply at buildbot.pypy.org (arigo) Date: Fri, 20 Jun 2014 14:40:25 +0200 (CEST) Subject: [pypy-commit] pypy default: Also test for split(..., 'one-char'). Message-ID: <20140620124025.F07421C3619@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72104:731d6d55509a Date: 2014-06-20 14:39 +0200 http://bitbucket.org/pypy/pypy/changeset/731d6d55509a/ Log: Also test for split(..., 'one-char'). diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py --- a/rpython/rlib/test/test_rstring.py +++ b/rpython/rlib/test/test_rstring.py @@ -239,6 +239,7 @@ res = res and split('a//b//c//d', '//') == ['a', 'b', 'c', 'd'] res = res and split(' a\ta\na b') == ['a', 'a', 'a', 'b'] res = res and split('a//b//c//d', '//', 2) == ['a', 'b', 'c//d'] + res = res and split('abcd,efghi', ',') == ['abcd', 'efghi'] res = res and split(u'a//b//c//d', u'//') == [u'a', u'b', u'c', u'd'] res = res and split(u'endcase test', u'test') == [u'endcase ', u''] res = res and rsplit('a|b|c|d', '|', 2) == ['a|b', 'c', 'd'] From noreply at buildbot.pypy.org Fri Jun 20 14:51:38 2014 From: noreply at buildbot.pypy.org (Raemi) Date: Fri, 20 Jun 2014 14:51:38 +0200 (CEST) Subject: [pypy-commit] stmgc incremental-conflict-detection: Some experiment to start conflict detection on commit right when the first Message-ID: <20140620125138.7BFCC1D257D@cobra.cs.uni-duesseldorf.de> Author: Remi Meier Branch: incremental-conflict-detection Changeset: r1258:563659443288 Date: 2014-06-20 14:52 +0200 http://bitbucket.org/pypy/stmgc/changeset/563659443288/ Log: Some experiment to start conflict detection on commit right when the first thread reaches its safe point. We don't need to wait for all threads first. Right now this is just a hack. It seems to improve the performance of raytrace and threadworms only by 3-5%. diff --git a/c7/stm/contention.c b/c7/stm/contention.c --- a/c7/stm/contention.c +++ b/c7/stm/contention.c @@ -119,8 +119,8 @@ /* Pick one contention management... could be made dynamically choosable */ #ifdef STM_TESTS cm_abort_the_younger(&contmgr); -#else - cm_pause_if_younger(&contmgr); +/* #else */ +/* cm_pause_if_younger(&contmgr); */ #endif /* Fix the choices that are found incorrect due to TS_INEVITABLE diff --git a/c7/stm/core.c b/c7/stm/core.c --- a/c7/stm/core.c +++ b/c7/stm/core.c @@ -493,6 +493,7 @@ /* cannot access STM_SEGMENT or STM_PSEGMENT from here ! */ } +static __thread bool commit_in_progress = false; void stm_commit_transaction(void) { assert(!_has_mutex()); @@ -508,16 +509,97 @@ push_overflow_objects_from_privatized_pages(); s_mutex_lock(); - restart: /* force all other threads to be paused. They will unpause automatically when we are done here, i.e. at mutex_unlock(). Important: we should not call cond_wait() in the meantime. */ - synchronize_all_threads(STOP_OTHERS_UNTIL_MUTEX_UNLOCK); - /* detect conflicts */ - if (detect_write_read_conflicts()) - goto restart; + //synchronize_all_threads(STOP_OTHERS_UNTIL_MUTEX_UNLOCK); + enter_safe_point_if_requested(); + + if (UNLIKELY(globally_unique_transaction)) { + assert(count_other_threads_sp_running() == 0); + /* detect conflicts */ + if (detect_write_read_conflicts()) + goto restart; + + } else { + signal_everybody_to_pause_running(); + commit_in_progress = true; + + /* keep track of who we already checked against: */ + bool conflict_checked[NB_SEGMENTS]; + int my_num = STM_SEGMENT->segment_num; + long i; + for (i = 1; i <= NB_SEGMENTS; i++) + conflict_checked[i-1] = (i == my_num); + + /* incrementally check for conflicts in all other threads + until we checked all of them (and all reached their safe point) */ + long to_check = NB_SEGMENTS - 1; /* without me */ + while (to_check > 0) { + /* check against all that are in a safe point: */ + for (i = 1; i <= NB_SEGMENTS; i++) { + struct stm_priv_segment_info_s *pseg = get_priv_segment(i); + + if (!conflict_checked[i-1] && pseg->safe_point != SP_RUNNING) { + OPT_ASSERT(i != my_num); + /* not us, in a safe point / not running, and not checked already */ + conflict_checked[i-1] = true; + to_check--; + + /* actual checking: */ + bool retry_required = false; + if (pseg->transaction_state == TS_NONE) + continue; /* no need to check */ + + if (is_aborting_now(i)) + continue; /* no need to check: is pending immediate abort */ + + char *remote_base = get_segment_base(i); + uint8_t remote_version = get_segment(i)->transaction_read_version; + + LIST_FOREACH_R( + STM_PSEGMENT->modified_old_objects, + object_t * /*item*/, + ({ + if (was_read_remote(remote_base, item, remote_version)) { + /* A write-read conflict! */ + if (write_read_contention_management(i, item)) { + /* If we reach this point, we didn't abort, but we + had to wait for the other thread to commit. If we + did, then we have to restart committing from our call + to synchronize_all_threads(). */ + retry_required = true; + break; + } + /* we aborted the other transaction without waiting, so + we can just continue */ + } + })); + + if (retry_required) { + remove_requests_for_safe_point(); + commit_in_progress = false; + goto restart; + } + } + } + + if (count_other_threads_sp_running()) { + STM_PSEGMENT->safe_point = SP_WAIT_FOR_C_AT_SAFE_POINT; + cond_wait(C_AT_SAFE_POINT); + STM_PSEGMENT->safe_point = SP_RUNNING; + + if (must_abort()) + abort_with_mutex(); + } + } + + assert(!count_other_threads_sp_running()); + remove_requests_for_safe_point(); + commit_in_progress = false; + } /* cannot abort any more from here */ dprintf(("commit_transaction\n")); @@ -671,6 +753,11 @@ assert(_has_mutex()); dprintf(("~~~ ABORT\n")); + if (commit_in_progress) { + remove_requests_for_safe_point(); + commit_in_progress = false; + } + assert(STM_PSEGMENT->running_pthread == pthread_self()); abort_data_structures_from_segment_num(STM_SEGMENT->segment_num); From noreply at buildbot.pypy.org Fri Jun 20 16:59:27 2014 From: noreply at buildbot.pypy.org (arigo) Date: Fri, 20 Jun 2014 16:59:27 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder3-perf: Trying yet another way for JITing Message-ID: <20140620145927.5FB311D2371@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder3-perf Changeset: r72105:821e851f10e9 Date: 2014-06-20 11:34 +0200 http://bitbucket.org/pypy/pypy/changeset/821e851f10e9/ Log: Trying yet another way for JITing From noreply at buildbot.pypy.org Fri Jun 20 16:59:28 2014 From: noreply at buildbot.pypy.org (arigo) Date: Fri, 20 Jun 2014 16:59:28 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder3-perf: Rewrite yet another time rbuilder.py Message-ID: <20140620145928.8F1D61D2371@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder3-perf Changeset: r72106:4cc95bf7d1d6 Date: 2014-06-20 16:23 +0200 http://bitbucket.org/pypy/pypy/changeset/4cc95bf7d1d6/ Log: Rewrite yet another time rbuilder.py diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -2,6 +2,7 @@ from rpython.rlib.objectmodel import enforceargs from rpython.rlib.rarithmetic import ovfcheck, r_uint, intmask from rpython.rlib.debug import ll_assert +from rpython.rlib.unroll import unrolling_iterable from rpython.rtyper.rptr import PtrRepr from rpython.rtyper.lltypesystem import lltype, rffi, rstr from rpython.rtyper.lltypesystem.lltype import staticAdtMethod, nullptr @@ -34,62 +35,15 @@ # ------------------------------------------------------------ +def dont_inline(func): + func._dont_inline_ = True + return func + def always_inline(func): func._always_inline_ = True return func -def new_grow_funcs(name, mallocfn): - - @enforceargs(None, int) - def stringbuilder_grow(ll_builder, needed): - try: - needed = ovfcheck(needed + ll_builder.total_size) - needed = ovfcheck(needed + 63) & ~63 - total_size = ll_builder.total_size + needed - except OverflowError: - raise MemoryError - # - new_string = mallocfn(needed) - # - PIECE = lltype.typeOf(ll_builder.extra_pieces).TO - old_piece = lltype.malloc(PIECE) - old_piece.buf = ll_builder.current_buf - old_piece.prev_piece = ll_builder.extra_pieces - ll_assert(bool(old_piece.buf), "no buf??") - ll_builder.current_buf = new_string - ll_builder.current_pos = 0 - ll_builder.current_end = needed - ll_builder.total_size = total_size - ll_builder.extra_pieces = old_piece - - def stringbuilder_append_overflow(ll_builder, ll_str, size): - # First, the part that still fits in the current piece - part1 = ll_builder.current_end - ll_builder.current_pos - start = ll_builder.skip - ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, - start, ll_builder.current_pos, - part1) - ll_builder.skip += part1 - stringbuilder_grow(ll_builder, size - part1) - - def stringbuilder_append_overflow_2(ll_builder, char0): - # Overflow when writing two chars. There are two cases depending - # on whether one char still fits or not. - if ll_builder.current_pos < ll_builder.current_end: - ll_builder.current_buf.chars[ll_builder.current_pos] = char0 - ll_builder.skip = 1 - stringbuilder_grow(ll_builder, 2) - - return (func_with_new_name(stringbuilder_grow, '%s_grow' % name), - func_with_new_name(stringbuilder_append_overflow, - '%s_append_overflow' % name), - func_with_new_name(stringbuilder_append_overflow_2, - '%s_append_overflow_2' % name)) - -stringbuilder_grows = new_grow_funcs('stringbuilder', rstr.mallocstr) -unicodebuilder_grows = new_grow_funcs('unicodebuilder', rstr.mallocunicode) - STRINGPIECE = lltype.GcStruct('stringpiece', ('buf', lltype.Ptr(STR)), ('prev_piece', lltype.Ptr(lltype.GcForwardReference()))) @@ -100,12 +54,8 @@ ('current_pos', lltype.Signed), ('current_end', lltype.Signed), ('total_size', lltype.Signed), - ('skip', lltype.Signed), ('extra_pieces', lltype.Ptr(STRINGPIECE)), adtmeths={ - 'grow': staticAdtMethod(stringbuilder_grows[0]), - 'append_overflow': staticAdtMethod(stringbuilder_grows[1]), - 'append_overflow_2': staticAdtMethod(stringbuilder_grows[2]), 'copy_string_contents': staticAdtMethod(rstr.copy_string_contents), 'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_string), 'mallocfn': staticAdtMethod(rstr.mallocstr), @@ -122,18 +72,290 @@ ('current_pos', lltype.Signed), ('current_end', lltype.Signed), ('total_size', lltype.Signed), - ('skip', lltype.Signed), ('extra_pieces', lltype.Ptr(UNICODEPIECE)), adtmeths={ - 'grow': staticAdtMethod(unicodebuilder_grows[0]), - 'append_overflow': staticAdtMethod(unicodebuilder_grows[1]), - 'append_overflow_2': staticAdtMethod(unicodebuilder_grows[2]), 'copy_string_contents': staticAdtMethod(rstr.copy_unicode_contents), 'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_unicode), 'mallocfn': staticAdtMethod(rstr.mallocunicode), } ) +# ------------------------------------------------------------ +# The generic piece of code to append a string (or a slice of it) +# to a builder; it is inlined inside various functions below + + at always_inline +def _ll_append(ll_builder, ll_str, start, size): + pos = ll_builder.current_pos + end = ll_builder.current_end + if (end - pos) < size: + ll_grow_and_append(ll_builder, ll_str, start, size) + else: + ll_builder.current_pos = pos + size + ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, + start, pos, size) + +# ------------------------------------------------------------ +# Logic to grow a builder (by adding a new string to it) + + at dont_inline + at enforceargs(None, int) +def ll_grow_by(ll_builder, needed): + try: + needed = ovfcheck(needed + ll_builder.total_size) + needed = ovfcheck(needed + 63) & ~63 + total_size = ll_builder.total_size + needed + except OverflowError: + raise MemoryError + # + new_string = ll_builder.mallocfn(needed) + # + PIECE = lltype.typeOf(ll_builder.extra_pieces).TO + old_piece = lltype.malloc(PIECE) + old_piece.buf = ll_builder.current_buf + old_piece.prev_piece = ll_builder.extra_pieces + ll_assert(bool(old_piece.buf), "no buf??") + ll_builder.current_buf = new_string + ll_builder.current_pos = 0 + ll_builder.current_end = needed + ll_builder.total_size = total_size + ll_builder.extra_pieces = old_piece + + at dont_inline +def ll_grow_and_append(ll_builder, ll_str, start, size): + # First, the part that still fits in the current piece + part1 = ll_builder.current_end - ll_builder.current_pos + ll_assert(part1 < size, "part1 >= size") + ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, + start, ll_builder.current_pos, + part1) + start += part1 + size -= part1 + # Allocate the new piece + ll_grow_by(ll_builder, size) + ll_assert(ll_builder.current_pos == 0, "current_pos must be 0 after grow()") + # Finally, the second part of the string + ll_builder.current_pos = size + ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, + start, 0, size) + +# ------------------------------------------------------------ +# builder.append() + + at always_inline +def ll_append(ll_builder, ll_str): + if jit.we_are_jitted(): + ll_jit_append(ll_builder, ll_str) + else: + # no-jit case: inline the logic of _ll_append() in the caller + _ll_append(ll_builder, ll_str, 0, len(ll_str.chars)) + + at dont_inline +def ll_jit_append(ll_builder, ll_str): + # jit case: first try special cases for known small lengths + if ll_jit_try_append_slice(ll_builder, ll_str, 0, len(ll_str.chars)): + return + # fall-back to do a residual call to ll_append_res0 + ll_append_res0(ll_builder, ll_str) + + at jit.dont_look_inside +def ll_append_res0(ll_builder, ll_str): + _ll_append(ll_builder, ll_str, 0, len(ll_str.chars)) + +# ------------------------------------------------------------ +# builder.append_char() + + at always_inline +def ll_append_char(ll_builder, char): + jit.conditional_call(ll_builder.current_pos == ll_builder.current_end, + ll_grow_by, ll_builder, 1) + pos = ll_builder.current_pos + ll_builder.current_pos = pos + 1 + ll_builder.current_buf.chars[pos] = char + +# ------------------------------------------------------------ +# builder.append_slice() + + at always_inline +def ll_append_slice(ll_builder, ll_str, start, end): + if jit.we_are_jitted(): + ll_jit_append_slice(ll_builder, ll_str, start, end) + else: + # no-jit case: inline the logic of _ll_append() in the caller + _ll_append(ll_builder, ll_str, start, end - start) + + at dont_inline +def ll_jit_append_slice(ll_builder, ll_str, start, end): + # jit case: first try special cases for known small lengths + if ll_jit_try_append_slice(ll_builder, ll_str, start, end - start): + return + # fall-back to do a residual call to ll_append_res_slice + ll_append_res_slice(ll_builder, ll_str, start, end) + + at jit.dont_look_inside +def ll_append_res_slice(ll_builder, ll_str, start, end): + _ll_append(ll_builder, ll_str, start, end - start) + +# ------------------------------------------------------------ +# Special-casing for the JIT: appending strings (or slices) of +# a known length up to MAX_N. These functions all contain an +# inlined copy of _ll_append(), but with a known small N, gcc +# will compile the copy_string_contents() efficiently. + +MAX_N = 10 + +def make_func_for_size(N): + @jit.dont_look_inside + def ll_append_0(ll_builder, ll_str): + _ll_append(ll_builder, ll_str, 0, N) + ll_append_0 = func_with_new_name(ll_append_0, "ll_append_0_%d" % N) + # + @jit.dont_look_inside + def ll_append_start(ll_builder, ll_str, start): + _ll_append(ll_builder, ll_str, start, N) + ll_append_start = func_with_new_name(ll_append_start, + "ll_append_start_%d" % N) + return ll_append_0, ll_append_start, N + +unroll_func_for_size = unrolling_iterable([make_func_for_size(_n) + for _n in range(2, MAX_N + 1)]) + +def ll_jit_try_append_slice(ll_builder, ll_str, start, size): + if jit.isconstant(size): + if size == 0: + return True + if size == 1: + ll_append_char(ll_builder, ll_str.chars[start]) + return True + for func0, funcstart, for_size in unroll_func_for_size: + if size == for_size: + if jit.isconstant(start) and start == 0: + func0(ll_builder, ll_str) + else: + funcstart(ll_builder, ll_str, start) + return True + return False # use the fall-back path + +# ------------------------------------------------------------ +# builder.append_multiple_char() + + at always_inline +def ll_append_multiple_char(ll_builder, char, times): + if jit.we_are_jitted(): + if ll_jit_try_append_multiple_char(ll_builder, char, times): + return + _ll_append_multiple_char(ll_builder, char, times) + + at jit.dont_look_inside +def _ll_append_multiple_char(ll_builder, char, times): + part1 = ll_builder.current_end - ll_builder.current_pos + if times > part1: + times -= part1 + buf = ll_builder.current_buf + for i in xrange(ll_builder.current_pos, ll_builder.current_end): + buf.chars[i] = char + ll_grow_by(ll_builder, times) + # + buf = ll_builder.current_buf + pos = ll_builder.current_pos + end = pos + times + ll_builder.current_pos = end + for i in xrange(pos, end): + buf.chars[i] = char + +def ll_jit_try_append_multiple_char(ll_builder, char, size): + if jit.isconstant(size): + if size == 0: + return True + if size == 1: + ll_append_char(ll_builder, char) + return True + return False # use the fall-back path + +# ------------------------------------------------------------ +# builder.append_charpsize() + + at jit.dont_look_inside +def ll_append_charpsize(ll_builder, charp, size): + part1 = ll_builder.current_end - ll_builder.current_pos + if size > part1: + # First, the part that still fits + ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, + ll_builder.current_pos, part1) + charp = rffi.ptradd(charp, part1) + size -= part1 + ll_grow_by(ll_builder, size) + # + pos = ll_builder.current_pos + ll_builder.current_pos = pos + size + ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, pos, size) + +# ------------------------------------------------------------ +# builder.getlength() + + at always_inline +def ll_getlength(ll_builder): + num_chars_missing_from_last_piece = ( + ll_builder.current_end - ll_builder.current_pos) + return ll_builder.total_size - num_chars_missing_from_last_piece + +# ------------------------------------------------------------ +# builder.build() + + at jit.look_inside_iff(lambda ll_builder: jit.isvirtual(ll_builder)) +def ll_build(ll_builder): + # NB. usually the JIT doesn't look inside this function; it does + # so only in the simplest example where it could virtualize everything + if ll_builder.extra_pieces: + ll_fold_pieces(ll_builder) + elif ll_builder.current_pos != ll_builder.total_size: + ll_shrink_final(ll_builder) + return ll_builder.current_buf + +def ll_shrink_final(ll_builder): + final_size = ll_builder.current_pos + ll_assert(final_size <= ll_builder.total_size, + "final_size > ll_builder.total_size?") + buf = rgc.ll_shrink_array(ll_builder.current_buf, final_size) + ll_builder.current_buf = buf + ll_builder.current_end = final_size + ll_builder.total_size = final_size + +def ll_fold_pieces(ll_builder): + final_size = BaseStringBuilderRepr.ll_getlength(ll_builder) + ll_assert(final_size >= 0, "negative final_size") + extra = ll_builder.extra_pieces + ll_builder.extra_pieces = lltype.nullptr(lltype.typeOf(extra).TO) + # + result = ll_builder.mallocfn(final_size) + piece = ll_builder.current_buf + piece_lgt = ll_builder.current_pos + ll_assert(ll_builder.current_end == len(piece.chars), + "bogus last piece_lgt") + ll_builder.total_size = final_size + ll_builder.current_buf = result + ll_builder.current_pos = final_size + ll_builder.current_end = final_size + + dst = final_size + while True: + dst -= piece_lgt + ll_assert(dst >= 0, "rbuilder build: overflow") + ll_builder.copy_string_contents(piece, result, 0, dst, piece_lgt) + if not extra: + break + piece = extra.buf + piece_lgt = len(piece.chars) + extra = extra.prev_piece + ll_assert(dst == 0, "rbuilder build: underflow") + +# ------------------------------------------------------------ +# bool(builder) + +def ll_bool(ll_builder): + return ll_builder != nullptr(lltype.typeOf(ll_builder).TO) + +# ------------------------------------------------------------ class BaseStringBuilderRepr(AbstractStringBuilderRepr): def empty(self): @@ -145,211 +367,24 @@ # Negative values are mapped to 1280. init_size = intmask(min(r_uint(init_size), r_uint(1280))) ll_builder = lltype.malloc(cls.lowleveltype.TO) - ll_builder.current_buf = cls.mallocfn(init_size) + ll_builder.current_buf = ll_builder.mallocfn(init_size) ll_builder.current_pos = 0 ll_builder.current_end = init_size ll_builder.total_size = init_size return ll_builder - @staticmethod - @always_inline - def ll_append(ll_builder, ll_str): - BaseStringBuilderRepr.ll_append_slice(ll_builder, ll_str, - 0, len(ll_str.chars)) - - @staticmethod - @always_inline - def ll_append_char(ll_builder, char): - jit.conditional_call(ll_builder.current_pos == ll_builder.current_end, - ll_builder.grow, ll_builder, 1) - pos = ll_builder.current_pos - ll_builder.current_pos = pos + 1 - ll_builder.current_buf.chars[pos] = char - - @staticmethod - def ll_append_char_2(ll_builder, char0, char1): - # this is only used by the JIT, when appending a small, known-length - # string. Unlike two consecutive ll_append_char(), it can do that - # with only one conditional_call. - ll_builder.skip = 2 - jit.conditional_call( - ll_builder.current_end - ll_builder.current_pos < 2, - ll_builder.append_overflow_2, ll_builder, char0) - pos = ll_builder.current_pos - buf = ll_builder.current_buf - buf.chars[pos] = char0 - pos += ll_builder.skip - ll_builder.current_pos = pos - buf.chars[pos - 1] = char1 - # NB. this usually writes into buf.chars[current_pos] and - # buf.chars[current_pos+1], except if we had an overflow right - # in the middle of the two chars. In that case, 'skip' is set to - # 1 and only one char is written: the 'char1' overrides the 'char0'. - - @staticmethod - @always_inline - def ll_append_slice(ll_builder, ll_str, start, end): - size = end - start - if jit.we_are_jitted(): - if BaseStringBuilderRepr._ll_jit_try_append_slice( - ll_builder, ll_str, start, size): - return - ll_builder.skip = start - jit.conditional_call( - size > ll_builder.current_end - ll_builder.current_pos, - ll_builder.append_overflow, ll_builder, ll_str, size) - start = ll_builder.skip - size = end - start - pos = ll_builder.current_pos - ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, - start, pos, size) - ll_builder.current_pos = pos + size - - @staticmethod - def _ll_jit_try_append_slice(ll_builder, ll_str, start, size): - if jit.isconstant(size): - if size == 0: - return True - if size == 1: - BaseStringBuilderRepr.ll_append_char(ll_builder, - ll_str.chars[start]) - return True - if size == 2: - BaseStringBuilderRepr.ll_append_char_2(ll_builder, - ll_str.chars[start], - ll_str.chars[start + 1]) - return True - return False # use the fall-back path - - @staticmethod - @always_inline - def ll_append_multiple_char(ll_builder, char, times): - if jit.we_are_jitted(): - if BaseStringBuilderRepr._ll_jit_try_append_multiple_char( - ll_builder, char, times): - return - BaseStringBuilderRepr._ll_append_multiple_char(ll_builder, char, times) - - @staticmethod - @jit.dont_look_inside - def _ll_append_multiple_char(ll_builder, char, times): - part1 = ll_builder.current_end - ll_builder.current_pos - if times > part1: - times -= part1 - buf = ll_builder.current_buf - for i in xrange(ll_builder.current_pos, ll_builder.current_end): - buf.chars[i] = char - ll_builder.grow(ll_builder, times) - # - buf = ll_builder.current_buf - pos = ll_builder.current_pos - end = pos + times - ll_builder.current_pos = end - for i in xrange(pos, end): - buf.chars[i] = char - - @staticmethod - def _ll_jit_try_append_multiple_char(ll_builder, char, size): - if jit.isconstant(size): - if size == 0: - return True - if size == 1: - BaseStringBuilderRepr.ll_append_char(ll_builder, char) - return True - if size == 2: - BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char) - return True - if size == 3: - BaseStringBuilderRepr.ll_append_char(ll_builder, char) - BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char) - return True - if size == 4: - BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char) - BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char) - return True - return False # use the fall-back path - - @staticmethod - @jit.dont_look_inside - def ll_append_charpsize(ll_builder, charp, size): - part1 = ll_builder.current_end - ll_builder.current_pos - if size > part1: - # First, the part that still fits - ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, - ll_builder.current_pos, part1) - charp = rffi.ptradd(charp, part1) - size -= part1 - ll_builder.grow(ll_builder, size) - # - pos = ll_builder.current_pos - ll_builder.current_pos = pos + size - ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, pos, size) - - @staticmethod - @always_inline - def ll_getlength(ll_builder): - num_chars_missing_from_last_piece = ( - ll_builder.current_end - ll_builder.current_pos) - return ll_builder.total_size - num_chars_missing_from_last_piece - - @staticmethod - @jit.look_inside_iff(lambda ll_builder: jit.isvirtual(ll_builder)) - def ll_build(ll_builder): - # NB. usually the JIT doesn't look inside this function; it does - # so only in the simplest example where it could virtualize everything - if ll_builder.extra_pieces: - BaseStringBuilderRepr._ll_fold_pieces(ll_builder) - elif ll_builder.current_pos != ll_builder.total_size: - BaseStringBuilderRepr._ll_shrink_final(ll_builder) - return ll_builder.current_buf - - @staticmethod - def _ll_shrink_final(ll_builder): - final_size = ll_builder.current_pos - ll_assert(final_size <= ll_builder.total_size, - "final_size > ll_builder.total_size?") - buf = rgc.ll_shrink_array(ll_builder.current_buf, final_size) - ll_builder.current_buf = buf - ll_builder.current_end = final_size - ll_builder.total_size = final_size - - @staticmethod - def _ll_fold_pieces(ll_builder): - final_size = BaseStringBuilderRepr.ll_getlength(ll_builder) - ll_assert(final_size >= 0, "negative final_size") - extra = ll_builder.extra_pieces - ll_builder.extra_pieces = lltype.nullptr(lltype.typeOf(extra).TO) - # - result = ll_builder.mallocfn(final_size) - piece = ll_builder.current_buf - piece_lgt = ll_builder.current_pos - ll_assert(ll_builder.current_end == len(piece.chars), - "bogus last piece_lgt") - ll_builder.total_size = final_size - ll_builder.current_buf = result - ll_builder.current_pos = final_size - ll_builder.current_end = final_size - - dst = final_size - while True: - dst -= piece_lgt - ll_assert(dst >= 0, "rbuilder build: overflow") - ll_builder.copy_string_contents(piece, result, 0, dst, piece_lgt) - if not extra: - break - piece = extra.buf - piece_lgt = len(piece.chars) - extra = extra.prev_piece - ll_assert(dst == 0, "rbuilder build: underflow") - - @classmethod - def ll_bool(cls, ll_builder): - return ll_builder != nullptr(cls.lowleveltype.TO) + ll_append = staticmethod(ll_append) + ll_append_char = staticmethod(ll_append_char) + ll_append_slice = staticmethod(ll_append_slice) + ll_append_multiple_char = staticmethod(ll_append_multiple_char) + ll_append_charpsize = staticmethod(ll_append_charpsize) + ll_getlength = staticmethod(ll_getlength) + ll_build = staticmethod(ll_build) + ll_bool = staticmethod(ll_bool) class StringBuilderRepr(BaseStringBuilderRepr): lowleveltype = lltype.Ptr(STRINGBUILDER) basetp = STR - mallocfn = staticmethod(rstr.mallocstr) string_repr = string_repr char_repr = char_repr raw_ptr_repr = PtrRepr( @@ -359,7 +394,6 @@ class UnicodeBuilderRepr(BaseStringBuilderRepr): lowleveltype = lltype.Ptr(UNICODEBUILDER) basetp = UNICODE - mallocfn = staticmethod(rstr.mallocunicode) string_repr = unicode_repr char_repr = unichar_repr raw_ptr_repr = PtrRepr( diff --git a/rpython/rtyper/test/test_rbuilder.py b/rpython/rtyper/test/test_rbuilder.py --- a/rpython/rtyper/test/test_rbuilder.py +++ b/rpython/rtyper/test/test_rbuilder.py @@ -28,9 +28,13 @@ def test_simple(self): sb = StringBuilderRepr.ll_new(3) + assert StringBuilderRepr.ll_getlength(sb) == 0 StringBuilderRepr.ll_append_char(sb, 'x') + assert StringBuilderRepr.ll_getlength(sb) == 1 StringBuilderRepr.ll_append(sb, llstr("abc")) + assert StringBuilderRepr.ll_getlength(sb) == 4 StringBuilderRepr.ll_append_slice(sb, llstr("foobar"), 2, 5) + assert StringBuilderRepr.ll_getlength(sb) == 7 StringBuilderRepr.ll_append_multiple_char(sb, 'y', 3) assert StringBuilderRepr.ll_getlength(sb) == 10 s = StringBuilderRepr.ll_build(sb) From noreply at buildbot.pypy.org Fri Jun 20 16:59:29 2014 From: noreply at buildbot.pypy.org (arigo) Date: Fri, 20 Jun 2014 16:59:29 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder3-perf: Add some special cases for the JIT Message-ID: <20140620145929.CA9A01D2371@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder3-perf Changeset: r72107:ba9a90a5473b Date: 2014-06-20 16:58 +0200 http://bitbucket.org/pypy/pypy/changeset/ba9a90a5473b/ Log: Add some special cases for the JIT diff --git a/rpython/jit/metainterp/test/test_string.py b/rpython/jit/metainterp/test/test_string.py --- a/rpython/jit/metainterp/test/test_string.py +++ b/rpython/jit/metainterp/test/test_string.py @@ -688,7 +688,9 @@ return n res = self.meta_interp(f, [10], backendopt=True) assert res == 0 - self.check_resops(call=2) # (ll_shrink_array) * 2 unroll + self.check_resops(call=6, # (ll_append_res0, ll_append_0_2, ll_build) + # * 2 unroll + cond_call=0) def test_stringbuilder_append_len2_2(self): jitdriver = JitDriver(reds=['n', 'str1'], greens=[]) @@ -708,7 +710,8 @@ return n res = self.meta_interp(f, [10], backendopt=True) assert res == 0 - self.check_resops(call=2) # (ll_shrink_array) * 2 unroll + self.check_resops(call=4, # (ll_append_res0, ll_build) * 2 unroll + cond_call=0) def test_stringbuilder_append_slice_1(self): jitdriver = JitDriver(reds=['n'], greens=[]) @@ -724,8 +727,8 @@ return n res = self.meta_interp(f, [10], backendopt=True) assert res == 0 - self.check_resops(call=2, # (ll_shrink_array) * 2 unroll - copyunicodecontent=4) + self.check_resops(call=6, cond_call=0, + copyunicodecontent=0) def test_stringbuilder_append_slice_2(self): jitdriver = JitDriver(reds=['n'], greens=[]) @@ -751,12 +754,14 @@ while n > 0: jitdriver.jit_merge_point(n=n) sb = UnicodeBuilder() - sb.append_multiple_char(u"x", 3) + sb.append_multiple_char(u"x", 5) s = sb.build() - if len(s) != 3: raise ValueError + if len(s) != 5: raise ValueError if s[0] != u"x": raise ValueError if s[1] != u"x": raise ValueError if s[2] != u"x": raise ValueError + if s[3] != u"x": raise ValueError + if s[4] != u"x": raise ValueError n -= 1 return n res = self.meta_interp(f, [10], backendopt=True) @@ -770,19 +775,17 @@ while n > 0: jitdriver.jit_merge_point(n=n) sb = UnicodeBuilder() - sb.append_multiple_char(u"x", 5) + sb.append_multiple_char(u"x", 35) s = sb.build() - if len(s) != 5: raise ValueError - if s[0] != u"x": raise ValueError - if s[1] != u"x": raise ValueError - if s[2] != u"x": raise ValueError - if s[3] != u"x": raise ValueError - if s[4] != u"x": raise ValueError + if len(s) != 35: raise ValueError + for c in s: + if c != u"x": + raise ValueError n -= 1 return n res = self.meta_interp(f, [10], backendopt=True) assert res == 0 - self.check_resops(call=4) # (append, build) * 2 unroll + self.check_resops(call=4) # (_ll_append_multiple_char, build) * 2 def test_stringbuilder_bug1(self): jitdriver = JitDriver(reds=['n', 's1'], greens=[]) diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -220,13 +220,36 @@ unroll_func_for_size = unrolling_iterable([make_func_for_size(_n) for _n in range(2, MAX_N + 1)]) + at jit.unroll_safe def ll_jit_try_append_slice(ll_builder, ll_str, start, size): if jit.isconstant(size): if size == 0: return True + # a special case: if the builder's pos and end are still contants + # (typically if the builder is still virtual), and if 'size' fits, + # then we don't need any reallocation and can just set the + # characters in the buffer, in a way that won't force anything. + if (jit.isconstant(ll_builder.current_pos) and + jit.isconstant(ll_builder.current_end) and + size <= (ll_builder.current_end - ll_builder.current_pos) and + size <= 16): + pos = ll_builder.current_pos + buf = ll_builder.current_buf + stop = pos + size + ll_builder.current_pos = stop + while pos < stop: + buf.chars[pos] = ll_str.chars[start] + pos += 1 + start += 1 + return True + # turn appends of length 1 into ll_append_char(). if size == 1: ll_append_char(ll_builder, ll_str.chars[start]) return True + # turn appends of length 2 to 10 into residual calls to + # specialized functions, for the lengths 2 to 10, where + # gcc will optimize the known-length copy_string_contents() + # as much as possible. for func0, funcstart, for_size in unroll_func_for_size: if size == for_size: if jit.isconstant(start) and start == 0: @@ -263,10 +286,27 @@ for i in xrange(pos, end): buf.chars[i] = char + at jit.unroll_safe def ll_jit_try_append_multiple_char(ll_builder, char, size): if jit.isconstant(size): if size == 0: return True + # a special case: if the builder's pos and end are still contants + # (typically if the builder is still virtual), and if 'size' fits, + # then we don't need any reallocation and can just set the + # characters in the buffer, in a way that won't force anything. + if (jit.isconstant(ll_builder.current_pos) and + jit.isconstant(ll_builder.current_end) and + size <= (ll_builder.current_end - ll_builder.current_pos) and + size <= 16): + pos = ll_builder.current_pos + buf = ll_builder.current_buf + stop = pos + size + ll_builder.current_pos = stop + while pos < stop: + buf.chars[pos] = char + pos += 1 + return True if size == 1: ll_append_char(ll_builder, char) return True From noreply at buildbot.pypy.org Fri Jun 20 19:25:51 2014 From: noreply at buildbot.pypy.org (arigo) Date: Fri, 20 Jun 2014 19:25:51 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder3-perf: Optimization for %d. Message-ID: <20140620172551.208211D2E25@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder3-perf Changeset: r72108:4284e494350c Date: 2014-06-20 19:22 +0200 http://bitbucket.org/pypy/pypy/changeset/4284e494350c/ Log: Optimization for %d. diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -379,6 +379,19 @@ std_wp._annspecialcase_ = 'specialize:argtype(1)' def std_wp_number(self, r, prefix=''): + result = self.result + if len(prefix) == 0 and len(r) >= self.width: + # this is strictly a fast path: no prefix, and no padding + # needed. It is more efficient code both in the non-jit + # case (less testing stuff) and in the jit case (uses only + # result.append(), and no startswith() if not f_sign and + # not f_blank). + if self.f_sign and not r.startswith('-'): + result.append(const('+')) + elif self.f_blank and not r.startswith('-'): + result.append(const(' ')) + result.append(const(r)) + return # add a '+' or ' ' sign if necessary sign = r.startswith('-') if not sign: @@ -391,7 +404,6 @@ # do the padding requested by self.width and the flags, # without building yet another RPython string but directly # by pushing the pad character into self.result - result = self.result padding = self.width - len(r) - len(prefix) if padding <= 0: padding = 0 From noreply at buildbot.pypy.org Fri Jun 20 20:51:49 2014 From: noreply at buildbot.pypy.org (arigo) Date: Fri, 20 Jun 2014 20:51:49 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder3-perf: Shorten the JIT code. Message-ID: <20140620185149.549161C0299@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder3-perf Changeset: r72109:3ee90a683997 Date: 2014-06-20 20:50 +0200 http://bitbucket.org/pypy/pypy/changeset/3ee90a683997/ Log: Shorten the JIT code. diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py --- a/pypy/module/pypyjit/test_pypy_c/test_string.py +++ b/pypy/module/pypyjit/test_pypy_c/test_string.py @@ -101,64 +101,38 @@ log = self.run(main, [1000]) assert log.result == main(1000) loop, = log.loops_by_filename(self.filepath) - # NB: since the stringbuilder2-perf branch we get more operations than - # before, but a lot less branches that might fail randomly. assert loop.match(""" - i100 = int_gt(i95, 0) - guard_true(i100, descr=...) + i79 = int_gt(i74, 0) + guard_true(i79, descr=...) guard_not_invalidated(descr=...) - p101 = call(ConstClass(ll_int2dec__Signed), i95, descr=) + p80 = call(ConstClass(ll_int2dec__Signed), i74, descr=) guard_no_exception(descr=...) - i102 = strlen(p101) - i103 = int_is_true(i102) - guard_true(i103, descr=...) - i104 = strgetitem(p101, 0) - i105 = int_eq(i104, 45) - guard_false(i105, descr=...) - i106 = int_neg(i102) - i107 = int_gt(i102, 23) - p108 = new(descr=) - p110 = newstr(23) + i85 = strlen(p80) + p86 = new(descr=) + p88 = newstr(23) setfield_gc(..., descr=) setfield_gc(..., descr=) setfield_gc(..., descr=) - cond_call(i107, ConstClass(stringbuilder_append_overflow__stringbuilderPtr_rpy_stringPtr_Signed), p108, p101, i102, descr=) + call(ConstClass(ll_append_res0__stringbuilderPtr_rpy_stringPtr), p86, p80, descr=) guard_no_exception(descr=...) - i111 = getfield_gc(p108, descr=) - i112 = int_sub(i102, i111) - i113 = getfield_gc(p108, descr=) - p114 = getfield_gc(p108, descr=) - copystrcontent(p101, p114, i111, i113, i112) - i115 = int_add(i113, i112) - i116 = getfield_gc(p108, descr=) - setfield_gc(p108, i115, descr=) - i117 = int_eq(i115, i116) - cond_call(i117, ConstClass(stringbuilder_grow__stringbuilderPtr_Signed), p108, 1, descr=) + i89 = getfield_gc(p86, descr=) + i90 = getfield_gc(p86, descr=) + i91 = int_eq(i89, i90) + cond_call(i91, ConstClass(ll_grow_by__stringbuilderPtr_Signed), p86, 1, descr=) guard_no_exception(descr=...) - i118 = getfield_gc(p108, descr=) - i119 = int_add(i118, 1) - p120 = getfield_gc(p108, descr=) - strsetitem(p120, i118, 32) - i121 = getfield_gc(p108, descr=) - i122 = int_sub(i121, i119) - setfield_gc(..., descr=) - setfield_gc(..., descr=) - i123 = int_gt(i102, i122) - cond_call(i123, ConstClass(stringbuilder_append_overflow__stringbuilderPtr_rpy_stringPtr_Signed), p108, p101, i102, descr=) + i92 = getfield_gc(p86, descr=) + i93 = int_add(i92, 1) + p94 = getfield_gc(p86, descr=) + strsetitem(p94, i92, 32) + setfield_gc(p86, i93, descr=) + call(ConstClass(ll_append_res0__stringbuilderPtr_rpy_stringPtr), p86, p80, descr=) guard_no_exception(descr=...) - i124 = getfield_gc(p108, descr=) - i125 = int_sub(i102, i124) - i126 = getfield_gc(p108, descr=) - p127 = getfield_gc(p108, descr=) - copystrcontent(p101, p127, i124, i126, i125) - i128 = int_add(i126, i125) - setfield_gc(p108, i128, descr=) - p135 = call(..., descr=) # ll_build guard_no_exception(descr=...) - i136 = strlen(p135) - i137 = int_add_ovf(i92, i136) + i96 = strlen(p95) + i97 = int_add_ovf(i71, i96) guard_no_overflow(descr=...) - i138 = int_sub(i95, 1) + i98 = int_sub(i74, 1) --TICK-- jump(..., descr=...) """) From noreply at buildbot.pypy.org Fri Jun 20 20:51:50 2014 From: noreply at buildbot.pypy.org (arigo) Date: Fri, 20 Jun 2014 20:51:50 +0200 (CEST) Subject: [pypy-commit] pypy stringbuilder3-perf: Ready for merge Message-ID: <20140620185150.8FFE71C0299@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stringbuilder3-perf Changeset: r72110:1e05ea95e5ff Date: 2014-06-20 20:50 +0200 http://bitbucket.org/pypy/pypy/changeset/1e05ea95e5ff/ Log: Ready for merge From noreply at buildbot.pypy.org Fri Jun 20 20:51:51 2014 From: noreply at buildbot.pypy.org (arigo) Date: Fri, 20 Jun 2014 20:51:51 +0200 (CEST) Subject: [pypy-commit] pypy default: hg merge stringbuilder3-perf Message-ID: <20140620185151.D9EB81C0299@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72111:c094b7451dbd Date: 2014-06-20 20:51 +0200 http://bitbucket.org/pypy/pypy/changeset/c094b7451dbd/ Log: hg merge stringbuilder3-perf Yet another attempt: this should simplify the JIT code. diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py --- a/pypy/module/pypyjit/test_pypy_c/test_string.py +++ b/pypy/module/pypyjit/test_pypy_c/test_string.py @@ -101,64 +101,38 @@ log = self.run(main, [1000]) assert log.result == main(1000) loop, = log.loops_by_filename(self.filepath) - # NB: since the stringbuilder2-perf branch we get more operations than - # before, but a lot less branches that might fail randomly. assert loop.match(""" - i100 = int_gt(i95, 0) - guard_true(i100, descr=...) + i79 = int_gt(i74, 0) + guard_true(i79, descr=...) guard_not_invalidated(descr=...) - p101 = call(ConstClass(ll_int2dec__Signed), i95, descr=) + p80 = call(ConstClass(ll_int2dec__Signed), i74, descr=) guard_no_exception(descr=...) - i102 = strlen(p101) - i103 = int_is_true(i102) - guard_true(i103, descr=...) - i104 = strgetitem(p101, 0) - i105 = int_eq(i104, 45) - guard_false(i105, descr=...) - i106 = int_neg(i102) - i107 = int_gt(i102, 23) - p108 = new(descr=) - p110 = newstr(23) + i85 = strlen(p80) + p86 = new(descr=) + p88 = newstr(23) setfield_gc(..., descr=) setfield_gc(..., descr=) setfield_gc(..., descr=) - cond_call(i107, ConstClass(stringbuilder_append_overflow__stringbuilderPtr_rpy_stringPtr_Signed), p108, p101, i102, descr=) + call(ConstClass(ll_append_res0__stringbuilderPtr_rpy_stringPtr), p86, p80, descr=) guard_no_exception(descr=...) - i111 = getfield_gc(p108, descr=) - i112 = int_sub(i102, i111) - i113 = getfield_gc(p108, descr=) - p114 = getfield_gc(p108, descr=) - copystrcontent(p101, p114, i111, i113, i112) - i115 = int_add(i113, i112) - i116 = getfield_gc(p108, descr=) - setfield_gc(p108, i115, descr=) - i117 = int_eq(i115, i116) - cond_call(i117, ConstClass(stringbuilder_grow__stringbuilderPtr_Signed), p108, 1, descr=) + i89 = getfield_gc(p86, descr=) + i90 = getfield_gc(p86, descr=) + i91 = int_eq(i89, i90) + cond_call(i91, ConstClass(ll_grow_by__stringbuilderPtr_Signed), p86, 1, descr=) guard_no_exception(descr=...) - i118 = getfield_gc(p108, descr=) - i119 = int_add(i118, 1) - p120 = getfield_gc(p108, descr=) - strsetitem(p120, i118, 32) - i121 = getfield_gc(p108, descr=) - i122 = int_sub(i121, i119) - setfield_gc(..., descr=) - setfield_gc(..., descr=) - i123 = int_gt(i102, i122) - cond_call(i123, ConstClass(stringbuilder_append_overflow__stringbuilderPtr_rpy_stringPtr_Signed), p108, p101, i102, descr=) + i92 = getfield_gc(p86, descr=) + i93 = int_add(i92, 1) + p94 = getfield_gc(p86, descr=) + strsetitem(p94, i92, 32) + setfield_gc(p86, i93, descr=) + call(ConstClass(ll_append_res0__stringbuilderPtr_rpy_stringPtr), p86, p80, descr=) guard_no_exception(descr=...) - i124 = getfield_gc(p108, descr=) - i125 = int_sub(i102, i124) - i126 = getfield_gc(p108, descr=) - p127 = getfield_gc(p108, descr=) - copystrcontent(p101, p127, i124, i126, i125) - i128 = int_add(i126, i125) - setfield_gc(p108, i128, descr=) - p135 = call(..., descr=) # ll_build guard_no_exception(descr=...) - i136 = strlen(p135) - i137 = int_add_ovf(i92, i136) + i96 = strlen(p95) + i97 = int_add_ovf(i71, i96) guard_no_overflow(descr=...) - i138 = int_sub(i95, 1) + i98 = int_sub(i74, 1) --TICK-- jump(..., descr=...) """) diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -379,6 +379,19 @@ std_wp._annspecialcase_ = 'specialize:argtype(1)' def std_wp_number(self, r, prefix=''): + result = self.result + if len(prefix) == 0 and len(r) >= self.width: + # this is strictly a fast path: no prefix, and no padding + # needed. It is more efficient code both in the non-jit + # case (less testing stuff) and in the jit case (uses only + # result.append(), and no startswith() if not f_sign and + # not f_blank). + if self.f_sign and not r.startswith('-'): + result.append(const('+')) + elif self.f_blank and not r.startswith('-'): + result.append(const(' ')) + result.append(const(r)) + return # add a '+' or ' ' sign if necessary sign = r.startswith('-') if not sign: @@ -391,7 +404,6 @@ # do the padding requested by self.width and the flags, # without building yet another RPython string but directly # by pushing the pad character into self.result - result = self.result padding = self.width - len(r) - len(prefix) if padding <= 0: padding = 0 diff --git a/rpython/jit/metainterp/test/test_string.py b/rpython/jit/metainterp/test/test_string.py --- a/rpython/jit/metainterp/test/test_string.py +++ b/rpython/jit/metainterp/test/test_string.py @@ -688,7 +688,9 @@ return n res = self.meta_interp(f, [10], backendopt=True) assert res == 0 - self.check_resops(call=2) # (ll_shrink_array) * 2 unroll + self.check_resops(call=6, # (ll_append_res0, ll_append_0_2, ll_build) + # * 2 unroll + cond_call=0) def test_stringbuilder_append_len2_2(self): jitdriver = JitDriver(reds=['n', 'str1'], greens=[]) @@ -708,7 +710,8 @@ return n res = self.meta_interp(f, [10], backendopt=True) assert res == 0 - self.check_resops(call=2) # (ll_shrink_array) * 2 unroll + self.check_resops(call=4, # (ll_append_res0, ll_build) * 2 unroll + cond_call=0) def test_stringbuilder_append_slice_1(self): jitdriver = JitDriver(reds=['n'], greens=[]) @@ -724,8 +727,8 @@ return n res = self.meta_interp(f, [10], backendopt=True) assert res == 0 - self.check_resops(call=2, # (ll_shrink_array) * 2 unroll - copyunicodecontent=4) + self.check_resops(call=6, cond_call=0, + copyunicodecontent=0) def test_stringbuilder_append_slice_2(self): jitdriver = JitDriver(reds=['n'], greens=[]) @@ -751,12 +754,14 @@ while n > 0: jitdriver.jit_merge_point(n=n) sb = UnicodeBuilder() - sb.append_multiple_char(u"x", 3) + sb.append_multiple_char(u"x", 5) s = sb.build() - if len(s) != 3: raise ValueError + if len(s) != 5: raise ValueError if s[0] != u"x": raise ValueError if s[1] != u"x": raise ValueError if s[2] != u"x": raise ValueError + if s[3] != u"x": raise ValueError + if s[4] != u"x": raise ValueError n -= 1 return n res = self.meta_interp(f, [10], backendopt=True) @@ -770,19 +775,17 @@ while n > 0: jitdriver.jit_merge_point(n=n) sb = UnicodeBuilder() - sb.append_multiple_char(u"x", 5) + sb.append_multiple_char(u"x", 35) s = sb.build() - if len(s) != 5: raise ValueError - if s[0] != u"x": raise ValueError - if s[1] != u"x": raise ValueError - if s[2] != u"x": raise ValueError - if s[3] != u"x": raise ValueError - if s[4] != u"x": raise ValueError + if len(s) != 35: raise ValueError + for c in s: + if c != u"x": + raise ValueError n -= 1 return n res = self.meta_interp(f, [10], backendopt=True) assert res == 0 - self.check_resops(call=4) # (append, build) * 2 unroll + self.check_resops(call=4) # (_ll_append_multiple_char, build) * 2 def test_stringbuilder_bug1(self): jitdriver = JitDriver(reds=['n', 's1'], greens=[]) diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -2,6 +2,7 @@ from rpython.rlib.objectmodel import enforceargs from rpython.rlib.rarithmetic import ovfcheck, r_uint, intmask from rpython.rlib.debug import ll_assert +from rpython.rlib.unroll import unrolling_iterable from rpython.rtyper.rptr import PtrRepr from rpython.rtyper.lltypesystem import lltype, rffi, rstr from rpython.rtyper.lltypesystem.lltype import staticAdtMethod, nullptr @@ -34,62 +35,15 @@ # ------------------------------------------------------------ +def dont_inline(func): + func._dont_inline_ = True + return func + def always_inline(func): func._always_inline_ = True return func -def new_grow_funcs(name, mallocfn): - - @enforceargs(None, int) - def stringbuilder_grow(ll_builder, needed): - try: - needed = ovfcheck(needed + ll_builder.total_size) - needed = ovfcheck(needed + 63) & ~63 - total_size = ll_builder.total_size + needed - except OverflowError: - raise MemoryError - # - new_string = mallocfn(needed) - # - PIECE = lltype.typeOf(ll_builder.extra_pieces).TO - old_piece = lltype.malloc(PIECE) - old_piece.buf = ll_builder.current_buf - old_piece.prev_piece = ll_builder.extra_pieces - ll_assert(bool(old_piece.buf), "no buf??") - ll_builder.current_buf = new_string - ll_builder.current_pos = 0 - ll_builder.current_end = needed - ll_builder.total_size = total_size - ll_builder.extra_pieces = old_piece - - def stringbuilder_append_overflow(ll_builder, ll_str, size): - # First, the part that still fits in the current piece - part1 = ll_builder.current_end - ll_builder.current_pos - start = ll_builder.skip - ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, - start, ll_builder.current_pos, - part1) - ll_builder.skip += part1 - stringbuilder_grow(ll_builder, size - part1) - - def stringbuilder_append_overflow_2(ll_builder, char0): - # Overflow when writing two chars. There are two cases depending - # on whether one char still fits or not. - if ll_builder.current_pos < ll_builder.current_end: - ll_builder.current_buf.chars[ll_builder.current_pos] = char0 - ll_builder.skip = 1 - stringbuilder_grow(ll_builder, 2) - - return (func_with_new_name(stringbuilder_grow, '%s_grow' % name), - func_with_new_name(stringbuilder_append_overflow, - '%s_append_overflow' % name), - func_with_new_name(stringbuilder_append_overflow_2, - '%s_append_overflow_2' % name)) - -stringbuilder_grows = new_grow_funcs('stringbuilder', rstr.mallocstr) -unicodebuilder_grows = new_grow_funcs('unicodebuilder', rstr.mallocunicode) - STRINGPIECE = lltype.GcStruct('stringpiece', ('buf', lltype.Ptr(STR)), ('prev_piece', lltype.Ptr(lltype.GcForwardReference()))) @@ -100,12 +54,8 @@ ('current_pos', lltype.Signed), ('current_end', lltype.Signed), ('total_size', lltype.Signed), - ('skip', lltype.Signed), ('extra_pieces', lltype.Ptr(STRINGPIECE)), adtmeths={ - 'grow': staticAdtMethod(stringbuilder_grows[0]), - 'append_overflow': staticAdtMethod(stringbuilder_grows[1]), - 'append_overflow_2': staticAdtMethod(stringbuilder_grows[2]), 'copy_string_contents': staticAdtMethod(rstr.copy_string_contents), 'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_string), 'mallocfn': staticAdtMethod(rstr.mallocstr), @@ -122,18 +72,330 @@ ('current_pos', lltype.Signed), ('current_end', lltype.Signed), ('total_size', lltype.Signed), - ('skip', lltype.Signed), ('extra_pieces', lltype.Ptr(UNICODEPIECE)), adtmeths={ - 'grow': staticAdtMethod(unicodebuilder_grows[0]), - 'append_overflow': staticAdtMethod(unicodebuilder_grows[1]), - 'append_overflow_2': staticAdtMethod(unicodebuilder_grows[2]), 'copy_string_contents': staticAdtMethod(rstr.copy_unicode_contents), 'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_unicode), 'mallocfn': staticAdtMethod(rstr.mallocunicode), } ) +# ------------------------------------------------------------ +# The generic piece of code to append a string (or a slice of it) +# to a builder; it is inlined inside various functions below + + at always_inline +def _ll_append(ll_builder, ll_str, start, size): + pos = ll_builder.current_pos + end = ll_builder.current_end + if (end - pos) < size: + ll_grow_and_append(ll_builder, ll_str, start, size) + else: + ll_builder.current_pos = pos + size + ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, + start, pos, size) + +# ------------------------------------------------------------ +# Logic to grow a builder (by adding a new string to it) + + at dont_inline + at enforceargs(None, int) +def ll_grow_by(ll_builder, needed): + try: + needed = ovfcheck(needed + ll_builder.total_size) + needed = ovfcheck(needed + 63) & ~63 + total_size = ll_builder.total_size + needed + except OverflowError: + raise MemoryError + # + new_string = ll_builder.mallocfn(needed) + # + PIECE = lltype.typeOf(ll_builder.extra_pieces).TO + old_piece = lltype.malloc(PIECE) + old_piece.buf = ll_builder.current_buf + old_piece.prev_piece = ll_builder.extra_pieces + ll_assert(bool(old_piece.buf), "no buf??") + ll_builder.current_buf = new_string + ll_builder.current_pos = 0 + ll_builder.current_end = needed + ll_builder.total_size = total_size + ll_builder.extra_pieces = old_piece + + at dont_inline +def ll_grow_and_append(ll_builder, ll_str, start, size): + # First, the part that still fits in the current piece + part1 = ll_builder.current_end - ll_builder.current_pos + ll_assert(part1 < size, "part1 >= size") + ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, + start, ll_builder.current_pos, + part1) + start += part1 + size -= part1 + # Allocate the new piece + ll_grow_by(ll_builder, size) + ll_assert(ll_builder.current_pos == 0, "current_pos must be 0 after grow()") + # Finally, the second part of the string + ll_builder.current_pos = size + ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, + start, 0, size) + +# ------------------------------------------------------------ +# builder.append() + + at always_inline +def ll_append(ll_builder, ll_str): + if jit.we_are_jitted(): + ll_jit_append(ll_builder, ll_str) + else: + # no-jit case: inline the logic of _ll_append() in the caller + _ll_append(ll_builder, ll_str, 0, len(ll_str.chars)) + + at dont_inline +def ll_jit_append(ll_builder, ll_str): + # jit case: first try special cases for known small lengths + if ll_jit_try_append_slice(ll_builder, ll_str, 0, len(ll_str.chars)): + return + # fall-back to do a residual call to ll_append_res0 + ll_append_res0(ll_builder, ll_str) + + at jit.dont_look_inside +def ll_append_res0(ll_builder, ll_str): + _ll_append(ll_builder, ll_str, 0, len(ll_str.chars)) + +# ------------------------------------------------------------ +# builder.append_char() + + at always_inline +def ll_append_char(ll_builder, char): + jit.conditional_call(ll_builder.current_pos == ll_builder.current_end, + ll_grow_by, ll_builder, 1) + pos = ll_builder.current_pos + ll_builder.current_pos = pos + 1 + ll_builder.current_buf.chars[pos] = char + +# ------------------------------------------------------------ +# builder.append_slice() + + at always_inline +def ll_append_slice(ll_builder, ll_str, start, end): + if jit.we_are_jitted(): + ll_jit_append_slice(ll_builder, ll_str, start, end) + else: + # no-jit case: inline the logic of _ll_append() in the caller + _ll_append(ll_builder, ll_str, start, end - start) + + at dont_inline +def ll_jit_append_slice(ll_builder, ll_str, start, end): + # jit case: first try special cases for known small lengths + if ll_jit_try_append_slice(ll_builder, ll_str, start, end - start): + return + # fall-back to do a residual call to ll_append_res_slice + ll_append_res_slice(ll_builder, ll_str, start, end) + + at jit.dont_look_inside +def ll_append_res_slice(ll_builder, ll_str, start, end): + _ll_append(ll_builder, ll_str, start, end - start) + +# ------------------------------------------------------------ +# Special-casing for the JIT: appending strings (or slices) of +# a known length up to MAX_N. These functions all contain an +# inlined copy of _ll_append(), but with a known small N, gcc +# will compile the copy_string_contents() efficiently. + +MAX_N = 10 + +def make_func_for_size(N): + @jit.dont_look_inside + def ll_append_0(ll_builder, ll_str): + _ll_append(ll_builder, ll_str, 0, N) + ll_append_0 = func_with_new_name(ll_append_0, "ll_append_0_%d" % N) + # + @jit.dont_look_inside + def ll_append_start(ll_builder, ll_str, start): + _ll_append(ll_builder, ll_str, start, N) + ll_append_start = func_with_new_name(ll_append_start, + "ll_append_start_%d" % N) + return ll_append_0, ll_append_start, N + +unroll_func_for_size = unrolling_iterable([make_func_for_size(_n) + for _n in range(2, MAX_N + 1)]) + + at jit.unroll_safe +def ll_jit_try_append_slice(ll_builder, ll_str, start, size): + if jit.isconstant(size): + if size == 0: + return True + # a special case: if the builder's pos and end are still contants + # (typically if the builder is still virtual), and if 'size' fits, + # then we don't need any reallocation and can just set the + # characters in the buffer, in a way that won't force anything. + if (jit.isconstant(ll_builder.current_pos) and + jit.isconstant(ll_builder.current_end) and + size <= (ll_builder.current_end - ll_builder.current_pos) and + size <= 16): + pos = ll_builder.current_pos + buf = ll_builder.current_buf + stop = pos + size + ll_builder.current_pos = stop + while pos < stop: + buf.chars[pos] = ll_str.chars[start] + pos += 1 + start += 1 + return True + # turn appends of length 1 into ll_append_char(). + if size == 1: + ll_append_char(ll_builder, ll_str.chars[start]) + return True + # turn appends of length 2 to 10 into residual calls to + # specialized functions, for the lengths 2 to 10, where + # gcc will optimize the known-length copy_string_contents() + # as much as possible. + for func0, funcstart, for_size in unroll_func_for_size: + if size == for_size: + if jit.isconstant(start) and start == 0: + func0(ll_builder, ll_str) + else: + funcstart(ll_builder, ll_str, start) + return True + return False # use the fall-back path + +# ------------------------------------------------------------ +# builder.append_multiple_char() + + at always_inline +def ll_append_multiple_char(ll_builder, char, times): + if jit.we_are_jitted(): + if ll_jit_try_append_multiple_char(ll_builder, char, times): + return + _ll_append_multiple_char(ll_builder, char, times) + + at jit.dont_look_inside +def _ll_append_multiple_char(ll_builder, char, times): + part1 = ll_builder.current_end - ll_builder.current_pos + if times > part1: + times -= part1 + buf = ll_builder.current_buf + for i in xrange(ll_builder.current_pos, ll_builder.current_end): + buf.chars[i] = char + ll_grow_by(ll_builder, times) + # + buf = ll_builder.current_buf + pos = ll_builder.current_pos + end = pos + times + ll_builder.current_pos = end + for i in xrange(pos, end): + buf.chars[i] = char + + at jit.unroll_safe +def ll_jit_try_append_multiple_char(ll_builder, char, size): + if jit.isconstant(size): + if size == 0: + return True + # a special case: if the builder's pos and end are still contants + # (typically if the builder is still virtual), and if 'size' fits, + # then we don't need any reallocation and can just set the + # characters in the buffer, in a way that won't force anything. + if (jit.isconstant(ll_builder.current_pos) and + jit.isconstant(ll_builder.current_end) and + size <= (ll_builder.current_end - ll_builder.current_pos) and + size <= 16): + pos = ll_builder.current_pos + buf = ll_builder.current_buf + stop = pos + size + ll_builder.current_pos = stop + while pos < stop: + buf.chars[pos] = char + pos += 1 + return True + if size == 1: + ll_append_char(ll_builder, char) + return True + return False # use the fall-back path + +# ------------------------------------------------------------ +# builder.append_charpsize() + + at jit.dont_look_inside +def ll_append_charpsize(ll_builder, charp, size): + part1 = ll_builder.current_end - ll_builder.current_pos + if size > part1: + # First, the part that still fits + ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, + ll_builder.current_pos, part1) + charp = rffi.ptradd(charp, part1) + size -= part1 + ll_grow_by(ll_builder, size) + # + pos = ll_builder.current_pos + ll_builder.current_pos = pos + size + ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, pos, size) + +# ------------------------------------------------------------ +# builder.getlength() + + at always_inline +def ll_getlength(ll_builder): + num_chars_missing_from_last_piece = ( + ll_builder.current_end - ll_builder.current_pos) + return ll_builder.total_size - num_chars_missing_from_last_piece + +# ------------------------------------------------------------ +# builder.build() + + at jit.look_inside_iff(lambda ll_builder: jit.isvirtual(ll_builder)) +def ll_build(ll_builder): + # NB. usually the JIT doesn't look inside this function; it does + # so only in the simplest example where it could virtualize everything + if ll_builder.extra_pieces: + ll_fold_pieces(ll_builder) + elif ll_builder.current_pos != ll_builder.total_size: + ll_shrink_final(ll_builder) + return ll_builder.current_buf + +def ll_shrink_final(ll_builder): + final_size = ll_builder.current_pos + ll_assert(final_size <= ll_builder.total_size, + "final_size > ll_builder.total_size?") + buf = rgc.ll_shrink_array(ll_builder.current_buf, final_size) + ll_builder.current_buf = buf + ll_builder.current_end = final_size + ll_builder.total_size = final_size + +def ll_fold_pieces(ll_builder): + final_size = BaseStringBuilderRepr.ll_getlength(ll_builder) + ll_assert(final_size >= 0, "negative final_size") + extra = ll_builder.extra_pieces + ll_builder.extra_pieces = lltype.nullptr(lltype.typeOf(extra).TO) + # + result = ll_builder.mallocfn(final_size) + piece = ll_builder.current_buf + piece_lgt = ll_builder.current_pos + ll_assert(ll_builder.current_end == len(piece.chars), + "bogus last piece_lgt") + ll_builder.total_size = final_size + ll_builder.current_buf = result + ll_builder.current_pos = final_size + ll_builder.current_end = final_size + + dst = final_size + while True: + dst -= piece_lgt + ll_assert(dst >= 0, "rbuilder build: overflow") + ll_builder.copy_string_contents(piece, result, 0, dst, piece_lgt) + if not extra: + break + piece = extra.buf + piece_lgt = len(piece.chars) + extra = extra.prev_piece + ll_assert(dst == 0, "rbuilder build: underflow") + +# ------------------------------------------------------------ +# bool(builder) + +def ll_bool(ll_builder): + return ll_builder != nullptr(lltype.typeOf(ll_builder).TO) + +# ------------------------------------------------------------ class BaseStringBuilderRepr(AbstractStringBuilderRepr): def empty(self): @@ -145,211 +407,24 @@ # Negative values are mapped to 1280. init_size = intmask(min(r_uint(init_size), r_uint(1280))) ll_builder = lltype.malloc(cls.lowleveltype.TO) - ll_builder.current_buf = cls.mallocfn(init_size) + ll_builder.current_buf = ll_builder.mallocfn(init_size) ll_builder.current_pos = 0 ll_builder.current_end = init_size ll_builder.total_size = init_size return ll_builder - @staticmethod - @always_inline - def ll_append(ll_builder, ll_str): - BaseStringBuilderRepr.ll_append_slice(ll_builder, ll_str, - 0, len(ll_str.chars)) - - @staticmethod - @always_inline - def ll_append_char(ll_builder, char): - jit.conditional_call(ll_builder.current_pos == ll_builder.current_end, - ll_builder.grow, ll_builder, 1) - pos = ll_builder.current_pos - ll_builder.current_pos = pos + 1 - ll_builder.current_buf.chars[pos] = char - - @staticmethod - def ll_append_char_2(ll_builder, char0, char1): - # this is only used by the JIT, when appending a small, known-length - # string. Unlike two consecutive ll_append_char(), it can do that - # with only one conditional_call. - ll_builder.skip = 2 - jit.conditional_call( - ll_builder.current_end - ll_builder.current_pos < 2, - ll_builder.append_overflow_2, ll_builder, char0) - pos = ll_builder.current_pos - buf = ll_builder.current_buf - buf.chars[pos] = char0 - pos += ll_builder.skip - ll_builder.current_pos = pos - buf.chars[pos - 1] = char1 - # NB. this usually writes into buf.chars[current_pos] and - # buf.chars[current_pos+1], except if we had an overflow right - # in the middle of the two chars. In that case, 'skip' is set to - # 1 and only one char is written: the 'char1' overrides the 'char0'. - - @staticmethod - @always_inline - def ll_append_slice(ll_builder, ll_str, start, end): - size = end - start - if jit.we_are_jitted(): - if BaseStringBuilderRepr._ll_jit_try_append_slice( - ll_builder, ll_str, start, size): - return - ll_builder.skip = start - jit.conditional_call( - size > ll_builder.current_end - ll_builder.current_pos, - ll_builder.append_overflow, ll_builder, ll_str, size) - start = ll_builder.skip - size = end - start - pos = ll_builder.current_pos - ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, - start, pos, size) - ll_builder.current_pos = pos + size - - @staticmethod - def _ll_jit_try_append_slice(ll_builder, ll_str, start, size): - if jit.isconstant(size): - if size == 0: - return True - if size == 1: - BaseStringBuilderRepr.ll_append_char(ll_builder, - ll_str.chars[start]) - return True - if size == 2: - BaseStringBuilderRepr.ll_append_char_2(ll_builder, - ll_str.chars[start], - ll_str.chars[start + 1]) - return True - return False # use the fall-back path - - @staticmethod - @always_inline - def ll_append_multiple_char(ll_builder, char, times): - if jit.we_are_jitted(): - if BaseStringBuilderRepr._ll_jit_try_append_multiple_char( - ll_builder, char, times): - return - BaseStringBuilderRepr._ll_append_multiple_char(ll_builder, char, times) - - @staticmethod - @jit.dont_look_inside - def _ll_append_multiple_char(ll_builder, char, times): - part1 = ll_builder.current_end - ll_builder.current_pos - if times > part1: - times -= part1 - buf = ll_builder.current_buf - for i in xrange(ll_builder.current_pos, ll_builder.current_end): - buf.chars[i] = char - ll_builder.grow(ll_builder, times) - # - buf = ll_builder.current_buf - pos = ll_builder.current_pos - end = pos + times - ll_builder.current_pos = end - for i in xrange(pos, end): - buf.chars[i] = char - - @staticmethod - def _ll_jit_try_append_multiple_char(ll_builder, char, size): - if jit.isconstant(size): - if size == 0: - return True - if size == 1: - BaseStringBuilderRepr.ll_append_char(ll_builder, char) - return True - if size == 2: - BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char) - return True - if size == 3: - BaseStringBuilderRepr.ll_append_char(ll_builder, char) - BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char) - return True - if size == 4: - BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char) - BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char) - return True - return False # use the fall-back path - - @staticmethod - @jit.dont_look_inside - def ll_append_charpsize(ll_builder, charp, size): - part1 = ll_builder.current_end - ll_builder.current_pos - if size > part1: - # First, the part that still fits - ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, - ll_builder.current_pos, part1) - charp = rffi.ptradd(charp, part1) - size -= part1 - ll_builder.grow(ll_builder, size) - # - pos = ll_builder.current_pos - ll_builder.current_pos = pos + size - ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, pos, size) - - @staticmethod - @always_inline - def ll_getlength(ll_builder): - num_chars_missing_from_last_piece = ( - ll_builder.current_end - ll_builder.current_pos) - return ll_builder.total_size - num_chars_missing_from_last_piece - - @staticmethod - @jit.look_inside_iff(lambda ll_builder: jit.isvirtual(ll_builder)) - def ll_build(ll_builder): - # NB. usually the JIT doesn't look inside this function; it does - # so only in the simplest example where it could virtualize everything - if ll_builder.extra_pieces: - BaseStringBuilderRepr._ll_fold_pieces(ll_builder) - elif ll_builder.current_pos != ll_builder.total_size: - BaseStringBuilderRepr._ll_shrink_final(ll_builder) - return ll_builder.current_buf - - @staticmethod - def _ll_shrink_final(ll_builder): - final_size = ll_builder.current_pos - ll_assert(final_size <= ll_builder.total_size, - "final_size > ll_builder.total_size?") - buf = rgc.ll_shrink_array(ll_builder.current_buf, final_size) - ll_builder.current_buf = buf - ll_builder.current_end = final_size - ll_builder.total_size = final_size - - @staticmethod - def _ll_fold_pieces(ll_builder): - final_size = BaseStringBuilderRepr.ll_getlength(ll_builder) - ll_assert(final_size >= 0, "negative final_size") - extra = ll_builder.extra_pieces - ll_builder.extra_pieces = lltype.nullptr(lltype.typeOf(extra).TO) - # - result = ll_builder.mallocfn(final_size) - piece = ll_builder.current_buf - piece_lgt = ll_builder.current_pos - ll_assert(ll_builder.current_end == len(piece.chars), - "bogus last piece_lgt") - ll_builder.total_size = final_size - ll_builder.current_buf = result - ll_builder.current_pos = final_size - ll_builder.current_end = final_size - - dst = final_size - while True: - dst -= piece_lgt - ll_assert(dst >= 0, "rbuilder build: overflow") - ll_builder.copy_string_contents(piece, result, 0, dst, piece_lgt) - if not extra: - break - piece = extra.buf - piece_lgt = len(piece.chars) - extra = extra.prev_piece - ll_assert(dst == 0, "rbuilder build: underflow") - - @classmethod - def ll_bool(cls, ll_builder): - return ll_builder != nullptr(cls.lowleveltype.TO) + ll_append = staticmethod(ll_append) + ll_append_char = staticmethod(ll_append_char) + ll_append_slice = staticmethod(ll_append_slice) + ll_append_multiple_char = staticmethod(ll_append_multiple_char) + ll_append_charpsize = staticmethod(ll_append_charpsize) + ll_getlength = staticmethod(ll_getlength) + ll_build = staticmethod(ll_build) + ll_bool = staticmethod(ll_bool) class StringBuilderRepr(BaseStringBuilderRepr): lowleveltype = lltype.Ptr(STRINGBUILDER) basetp = STR - mallocfn = staticmethod(rstr.mallocstr) string_repr = string_repr char_repr = char_repr raw_ptr_repr = PtrRepr( @@ -359,7 +434,6 @@ class UnicodeBuilderRepr(BaseStringBuilderRepr): lowleveltype = lltype.Ptr(UNICODEBUILDER) basetp = UNICODE - mallocfn = staticmethod(rstr.mallocunicode) string_repr = unicode_repr char_repr = unichar_repr raw_ptr_repr = PtrRepr( diff --git a/rpython/rtyper/test/test_rbuilder.py b/rpython/rtyper/test/test_rbuilder.py --- a/rpython/rtyper/test/test_rbuilder.py +++ b/rpython/rtyper/test/test_rbuilder.py @@ -28,9 +28,13 @@ def test_simple(self): sb = StringBuilderRepr.ll_new(3) + assert StringBuilderRepr.ll_getlength(sb) == 0 StringBuilderRepr.ll_append_char(sb, 'x') + assert StringBuilderRepr.ll_getlength(sb) == 1 StringBuilderRepr.ll_append(sb, llstr("abc")) + assert StringBuilderRepr.ll_getlength(sb) == 4 StringBuilderRepr.ll_append_slice(sb, llstr("foobar"), 2, 5) + assert StringBuilderRepr.ll_getlength(sb) == 7 StringBuilderRepr.ll_append_multiple_char(sb, 'y', 3) assert StringBuilderRepr.ll_getlength(sb) == 10 s = StringBuilderRepr.ll_build(sb) From noreply at buildbot.pypy.org Fri Jun 20 22:09:35 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Fri, 20 Jun 2014 22:09:35 +0200 (CEST) Subject: [pypy-commit] pypy pypy3-release-2.3.x: Added tag pypy3-release-2.3.1 for changeset 986752d005bb Message-ID: <20140620200935.502C41C326C@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: pypy3-release-2.3.x Changeset: r72112:8c66e412b1fc Date: 2014-06-20 13:08 -0700 http://bitbucket.org/pypy/pypy/changeset/8c66e412b1fc/ Log: Added tag pypy3-release-2.3.1 for changeset 986752d005bb diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -10,3 +10,4 @@ 20e51c4389ed4469b66bb9d6289ce0ecfc82c4b9 release-2.3.0 0000000000000000000000000000000000000000 release-2.3.0 394146e9bb673514c61f0150ab2013ccf78e8de7 release-2.3 +986752d005bb6c65ce418113e4c3cd115f61a9b4 pypy3-release-2.3.1 From noreply at buildbot.pypy.org Fri Jun 20 22:09:36 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Fri, 20 Jun 2014 22:09:36 +0200 (CEST) Subject: [pypy-commit] pypy pypy3-release-2.3.x: additions Message-ID: <20140620200936.9D0701C326C@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: pypy3-release-2.3.x Changeset: r72113:49a5282155a1 Date: 2014-06-20 13:08 -0700 http://bitbucket.org/pypy/pypy/changeset/49a5282155a1/ Log: additions diff --git a/pypy/doc/release-pypy3-2.3.1.rst b/pypy/doc/release-pypy3-2.3.1.rst --- a/pypy/doc/release-pypy3-2.3.1.rst +++ b/pypy/doc/release-pypy3-2.3.1.rst @@ -21,12 +21,18 @@ * Additional support for the u'unicode' syntax (`PEP 414`_) from Python 3.3 -* Fixed the previously disabled collection (list/dict/set) strategies which - resolves a notable performance regression from PyPy 2. - * Updates from the default branch, such as incremental GC and various JIT improvements +* Resolved some notable JIT performance regressions from PyPy2: + + - Re-enabled the previously disabled collection (list/dict/set) strategies + + - Resolved performance of iteration over range objects + + - Resolved handling of Python 3's exception __context__ unnecessarily forcing + frame object overhead + .. _`PEP 414`: http://legacy.python.org/dev/peps/pep-0414/ What is PyPy? From noreply at buildbot.pypy.org Fri Jun 20 22:09:44 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Fri, 20 Jun 2014 22:09:44 +0200 (CEST) Subject: [pypy-commit] pypy.org extradoc: update for PyPy3 2.3.1 release Message-ID: <20140620200944.3AD231C326C@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: extradoc Changeset: r510:c3dd915045d1 Date: 2014-06-20 13:05 -0700 http://bitbucket.org/pypy/pypy.org/changeset/c3dd915045d1/ Log: update for PyPy3 2.3.1 release diff --git a/download.html b/download.html --- a/download.html +++ b/download.html @@ -51,9 +51,9 @@ (but not the release) are slightly miscompiled due to buildslave being old. Contributions are welcomed.

Here are the binaries for the current release — PyPy 2.3.1 — -(what's new in PyPy 2.3.1? )

-

for x86 and ARM Linux, Mac OS/X, Windows and the older release — PyPy3 2.1 beta1 — -(what's new in PyPy3 2.1 beta1?).

+(what's new in PyPy 2.3.1? ) for x86 and ARM Linux, Mac OS/X, Windows +and — PyPy3 2.3.1 — +(what's new in PyPy3 2.3.1?).

  • Download -
    -

    PyPy3 2.1 Beta 1

    +
    +

    PyPy3 2.3.1

    Note that Linux binaries are dynamically linked, as is usual, and thus might not be usable due to the sad story of linux binary compatibility. This means that Linux binaries are only usable on the distributions written next to them unless you're ready to hack your system by adding symlinks to the libraries it tries to open.

    @@ -273,15 +273,15 @@ 39b8c4c7ae8ff9f7dd8b854834b530ef pypy-2.3.1-osx64.tar.bz2 aa71e065f89849a9f641cc3ca8d80ebc pypy-2.3.1-win32.zip 5dab108869a039b754da5c07046fb17c pypy-2.3.1-linux-armel.tar.bz2 -f6adca4d26f34bef9903cc5347c7d688 pypy3-2.1-beta1-linux64.tar.bz2 -d57d0d0d3c49c7cce75440924d8f66b7 pypy3-2.1-beta1-linux-armel.tar.bz2 -55b82b199ccf537c7ea5e2f31df78dfe pypy3-2.1-beta1-linux-armhf-raring.tar.bz2 -a43e64557fe27b979a8546a89c05652f pypy3-2.1-beta1-linux-armhf-raspbian.tar.bz2 -9ae9d0b67279c557fb4b7f57c3072d0f pypy3-2.1-beta1-linux.tar.bz2 -36634bf17f0e9852fda7cc6745f2df28 pypy3-2.1-beta1-osx64.tar.bz2 -026c8be208bf2ef742ddfa3f8ca41a05 pypy3-2.1-beta1-win32.zip -bc2013d1927dc1c0c91228e566abd8da pypy3-2.1-beta1-src.tar.bz2 -cba4bdcfaed94185b20637379cb236b9 pypy3-2.1-beta1-src.zip +a86da5688dfd84e0485239f8991af308 pypy3-2.3.1-linux64.tar.bz2 +b0d6a0491e9c9be39d3c314c0823a039 pypy3-2.3.1-linux-armel.tar.bz2 +f79f7b5588d2b5a68d2781908bc8f9af pypy3-2.3.1-linux-armhf-raring.tar.bz2 +ac2ffd447db10a05d145a703fb586138 pypy3-2.3.1-linux-armhf-raspbian.tar.bz2 +d9003db983452e06bb2a028c9e7b6e2b pypy3-2.3.1-linux.tar.bz2 +a122a2c6414b9d724b46a45f89202578 pypy3-2.3.1-osx64.tar.bz2 +4d8d4d1326f40aa98c56ef77439a54aa pypy3-2.3.1-win32.zip +83731c1cf0b5ecd5acf39210d7710816 pypy3-2.3.1-src.tar.bz2 +88f33c7a25996fe4abf342765570a8fe pypy3-2.3.1-src.zip 2c9f0054f3b93a6473f10be35277825a pypy-1.8-sandbox-linux64.tar.bz2 009c970b5fa75754ae4c32a5d108a8d4 pypy-1.8-sandbox-linux.tar.bz2 8ef9b71a5fd66bdb598f178602ea195f7c1b0f66 pypy-2.3.1-linux64.tar.bz2 @@ -291,15 +291,15 @@ 4d9cdf801e4c8fb432b17be0edf76eb3d9360f40 pypy-2.3.1-osx64.tar.bz2 08639771f26188739a82408454188582c6e25ce9 pypy-2.3.1-win32.zip ad8ebf67c5ccf354513a9cdb0586080b5964a5bd pypy-2.3.1-linux-armel.tar.bz2 -6aa8377a09f79f1ce145537865d80716e40378de pypy3-2.1-beta1-linux64.tar.bz2 -c948aa751500e20df0678695524c6fc5088da39c pypy3-2.1-beta1-linux-armel.tar.bz2 -b316e04cd99abccfcfe7007df7ce78e56feb8889 pypy3-2.1-beta1-linux-armhf-raring.tar.bz2 -292851e698dcf2f1f9875198d1617a9f3afde635 pypy3-2.1-beta1-linux-armhf-raspbian.tar.bz2 -dff08887a9746b9cbb5cb88e29243f7d1aa7639c pypy3-2.1-beta1-linux.tar.bz2 -4455121f59214332b77d7c93e1d1849d0507d4cb pypy3-2.1-beta1-osx64.tar.bz2 -02749917c4c6898d8c616abf16061cf926d8b3e5 pypy3-2.1-beta1-win32.zip -ea56727de793b71f15f741e600dffd0291b77964 pypy3-2.1-beta1-src.tar.bz2 -255243e214f1f941598cfafcf753d99ceca18547 pypy3-2.1-beta1-src.zip +7276a9e97229e754f66863a0793c59066120ec51 pypy3-2.3.1-linux64.tar.bz2 +fb52a30be0fd4c7d8686c98e03595a8b48b11e82 pypy3-2.3.1-linux-armel.tar.bz2 +0239677fe28a4c22a70853242368456b98ac665a pypy3-2.3.1-linux-armhf-raring.tar.bz2 +c5b8194a63fedcad1441580b183993076e06c06d pypy3-2.3.1-linux-armhf-raspbian.tar.bz2 +90134c8f96f63993a890edbabd905ffb4044829d pypy3-2.3.1-linux.tar.bz2 +263be31beb243aa56e9878b421079e3282617e87 pypy3-2.3.1-osx64.tar.bz2 +5f64f8a5d27e185d51c92e5e4717ef977ce12734 pypy3-2.3.1-win32.zip +b9a0d9759f6f383e5c9edab4a21c3b8768f28dbd pypy3-2.3.1-src.tar.bz2 +3c9761c5e7970daf1aaf18feac929c4a0ff0da99 pypy3-2.3.1-src.zip 895aaf7bba5787dd30adda5cc0e0e7fc297c0ca7 pypy-1.8-sandbox-linux64.tar.bz2 be94460bed8b2682880495435c309b6611ae2c31 pypy-1.8-sandbox-linux.tar.bz2 diff --git a/index.html b/index.html --- a/index.html +++ b/index.html @@ -46,7 +46,7 @@

    Welcome to PyPy

    PyPy is a fast, compliant alternative implementation of the Python -language (2.7.6 and 3.2.3). It has several advantages and distinct features:

    +language (2.7.6 and 3.2.5). It has several advantages and distinct features:

    • Speed: thanks to its Just-in-Time compiler, Python programs @@ -63,7 +63,7 @@
    • As well as other features.
    -

    Download and try out the PyPy release 2.3.1 or the PyPy3 2.1 beta1!

    +

    Download and try out the PyPy or PyPy3 release 2.3.1!

    Want to know more? A good place to start is our detailed speed and compatibility reports!

    diff --git a/source/download.txt b/source/download.txt --- a/source/download.txt +++ b/source/download.txt @@ -15,13 +15,12 @@ miscompiled due to buildslave being old. Contributions are welcomed**. Here are the binaries for the current release — **PyPy 2.3.1** — -(`what's new in PyPy 2.3.1?`_ ) +(`what's new in PyPy 2.3.1?`_ ) for x86 and ARM Linux, Mac OS/X, Windows +and — **PyPy3 2.3.1** — +(`what's new in PyPy3 2.3.1?`_). -for x86 and ARM Linux, Mac OS/X, Windows and the older release — **PyPy3 2.1 beta1** — -(`what's new in PyPy3 2.1 beta1?`_). - -.. _what's new in PyPy3 2.1 beta1?: http://doc.pypy.org/en/latest/release-pypy3-2.1.0-beta1.html .. _what's new in PyPy 2.3.1?: http://doc.pypy.org/en/latest/release-2.3.1.html +.. _what's new in PyPy3 2.3.1?: http://doc.pypy.org/en/latest/release-pypy3-2.3.1.html .. class:: download_menu @@ -99,8 +98,8 @@ .. __: https://bitbucket.org/pypy/pypy/downloads .. _mirror: http://cobra.cs.uni-duesseldorf.de/~buildmaster/mirror/ -PyPy3 2.1 Beta 1 ----------------- +PyPy3 2.3.1 +----------- Note that Linux binaries are dynamically linked, as is usual, and thus might not be usable due to the sad story of linux binary compatibility. This means @@ -121,16 +120,16 @@ * `All our downloads,`__ including previous versions. We also have a mirror_, but please use only if you have troubles accessing the links above -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.1-beta1-linux.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.1-beta1-linux64.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.1-beta1-linux-armhf-raspbian.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.1-beta1-linux-armhf-raring.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.1-beta1-linux-armel.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.1-beta1-osx64.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.1-beta1-win32.zip +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.3.1-linux.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.3.1-linux64.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.3.1-linux-armhf-raspbian.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.3.1-linux-armhf-raring.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.3.1-linux-armel.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.3.1-osx64.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.3.1-win32.zip .. _`VS 2008 runtime library installer vcredist_x86.exe`: http://www.microsoft.com/en-us/download/details.aspx?id=5582 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.1-beta1-src.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.1-beta1-src.zip +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.3.1-src.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-2.3.1-src.zip .. __: https://bitbucket.org/pypy/pypy/downloads If your CPU is really old, it may not have SSE2. In this case, you need @@ -317,15 +316,15 @@ 39b8c4c7ae8ff9f7dd8b854834b530ef pypy-2.3.1-osx64.tar.bz2 aa71e065f89849a9f641cc3ca8d80ebc pypy-2.3.1-win32.zip 5dab108869a039b754da5c07046fb17c pypy-2.3.1-linux-armel.tar.bz2 - f6adca4d26f34bef9903cc5347c7d688 pypy3-2.1-beta1-linux64.tar.bz2 - d57d0d0d3c49c7cce75440924d8f66b7 pypy3-2.1-beta1-linux-armel.tar.bz2 - 55b82b199ccf537c7ea5e2f31df78dfe pypy3-2.1-beta1-linux-armhf-raring.tar.bz2 - a43e64557fe27b979a8546a89c05652f pypy3-2.1-beta1-linux-armhf-raspbian.tar.bz2 - 9ae9d0b67279c557fb4b7f57c3072d0f pypy3-2.1-beta1-linux.tar.bz2 - 36634bf17f0e9852fda7cc6745f2df28 pypy3-2.1-beta1-osx64.tar.bz2 - 026c8be208bf2ef742ddfa3f8ca41a05 pypy3-2.1-beta1-win32.zip - bc2013d1927dc1c0c91228e566abd8da pypy3-2.1-beta1-src.tar.bz2 - cba4bdcfaed94185b20637379cb236b9 pypy3-2.1-beta1-src.zip + a86da5688dfd84e0485239f8991af308 pypy3-2.3.1-linux64.tar.bz2 + b0d6a0491e9c9be39d3c314c0823a039 pypy3-2.3.1-linux-armel.tar.bz2 + f79f7b5588d2b5a68d2781908bc8f9af pypy3-2.3.1-linux-armhf-raring.tar.bz2 + ac2ffd447db10a05d145a703fb586138 pypy3-2.3.1-linux-armhf-raspbian.tar.bz2 + d9003db983452e06bb2a028c9e7b6e2b pypy3-2.3.1-linux.tar.bz2 + a122a2c6414b9d724b46a45f89202578 pypy3-2.3.1-osx64.tar.bz2 + 4d8d4d1326f40aa98c56ef77439a54aa pypy3-2.3.1-win32.zip + 83731c1cf0b5ecd5acf39210d7710816 pypy3-2.3.1-src.tar.bz2 + 88f33c7a25996fe4abf342765570a8fe pypy3-2.3.1-src.zip 2c9f0054f3b93a6473f10be35277825a pypy-1.8-sandbox-linux64.tar.bz2 009c970b5fa75754ae4c32a5d108a8d4 pypy-1.8-sandbox-linux.tar.bz2 @@ -336,14 +335,14 @@ 4d9cdf801e4c8fb432b17be0edf76eb3d9360f40 pypy-2.3.1-osx64.tar.bz2 08639771f26188739a82408454188582c6e25ce9 pypy-2.3.1-win32.zip ad8ebf67c5ccf354513a9cdb0586080b5964a5bd pypy-2.3.1-linux-armel.tar.bz2 - 6aa8377a09f79f1ce145537865d80716e40378de pypy3-2.1-beta1-linux64.tar.bz2 - c948aa751500e20df0678695524c6fc5088da39c pypy3-2.1-beta1-linux-armel.tar.bz2 - b316e04cd99abccfcfe7007df7ce78e56feb8889 pypy3-2.1-beta1-linux-armhf-raring.tar.bz2 - 292851e698dcf2f1f9875198d1617a9f3afde635 pypy3-2.1-beta1-linux-armhf-raspbian.tar.bz2 - dff08887a9746b9cbb5cb88e29243f7d1aa7639c pypy3-2.1-beta1-linux.tar.bz2 - 4455121f59214332b77d7c93e1d1849d0507d4cb pypy3-2.1-beta1-osx64.tar.bz2 - 02749917c4c6898d8c616abf16061cf926d8b3e5 pypy3-2.1-beta1-win32.zip - ea56727de793b71f15f741e600dffd0291b77964 pypy3-2.1-beta1-src.tar.bz2 - 255243e214f1f941598cfafcf753d99ceca18547 pypy3-2.1-beta1-src.zip + 7276a9e97229e754f66863a0793c59066120ec51 pypy3-2.3.1-linux64.tar.bz2 + fb52a30be0fd4c7d8686c98e03595a8b48b11e82 pypy3-2.3.1-linux-armel.tar.bz2 + 0239677fe28a4c22a70853242368456b98ac665a pypy3-2.3.1-linux-armhf-raring.tar.bz2 + c5b8194a63fedcad1441580b183993076e06c06d pypy3-2.3.1-linux-armhf-raspbian.tar.bz2 + 90134c8f96f63993a890edbabd905ffb4044829d pypy3-2.3.1-linux.tar.bz2 + 263be31beb243aa56e9878b421079e3282617e87 pypy3-2.3.1-osx64.tar.bz2 + 5f64f8a5d27e185d51c92e5e4717ef977ce12734 pypy3-2.3.1-win32.zip + b9a0d9759f6f383e5c9edab4a21c3b8768f28dbd pypy3-2.3.1-src.tar.bz2 + 3c9761c5e7970daf1aaf18feac929c4a0ff0da99 pypy3-2.3.1-src.zip 895aaf7bba5787dd30adda5cc0e0e7fc297c0ca7 pypy-1.8-sandbox-linux64.tar.bz2 be94460bed8b2682880495435c309b6611ae2c31 pypy-1.8-sandbox-linux.tar.bz2 diff --git a/source/index.txt b/source/index.txt --- a/source/index.txt +++ b/source/index.txt @@ -4,7 +4,7 @@ --- PyPy is a `fast`_, `compliant`_ alternative implementation of the `Python`_ -language (2.7.6 and 3.2.3). It has several advantages and distinct features: +language (2.7.6 and 3.2.5). It has several advantages and distinct features: * **Speed:** thanks to its Just-in-Time compiler, Python programs often run `faster`_ on PyPy. `(What is a JIT compiler?)`_ @@ -26,7 +26,7 @@ .. class:: download -`Download and try out the PyPy release 2.3.1 or the PyPy3 2.1 beta1!`__ +`Download and try out the PyPy or PyPy3 release 2.3.1!`__ .. __: download.html From noreply at buildbot.pypy.org Fri Jun 20 22:14:42 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Fri, 20 Jun 2014 22:14:42 +0200 (CEST) Subject: [pypy-commit] pypy default: copy these over for docs.pypy.org Message-ID: <20140620201442.F2A8D1C3619@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: Changeset: r72114:69637cc08858 Date: 2014-06-20 13:14 -0700 http://bitbucket.org/pypy/pypy/changeset/69637cc08858/ Log: copy these over for docs.pypy.org diff --git a/pypy/doc/release-pypy3-2.3.1.rst b/pypy/doc/release-pypy3-2.3.1.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/release-pypy3-2.3.1.rst @@ -0,0 +1,69 @@ +===================== +PyPy3 2.3.1 - Fulcrum +===================== + +We're pleased to announce the first stable release of PyPy3. PyPy3 +targets Python 3 (3.2.5) compatibility. + +We would like to thank all of the people who donated_ to the `py3k proposal`_ +for supporting the work that went into this. + +You can download the PyPy3 2.3.1 release here: + + http://pypy.org/download.html#pypy3-2-3-1 + +Highlights +========== + +* The first stable release of PyPy3: support for Python 3! + +* The stdlib has been updated to Python 3.2.5 + +* Additional support for the u'unicode' syntax (`PEP 414`_) from Python 3.3 + +* Updates from the default branch, such as incremental GC and various JIT + improvements + +* Resolved some notable JIT performance regressions from PyPy2: + + - Re-enabled the previously disabled collection (list/dict/set) strategies + + - Resolved performance of iteration over range objects + + - Resolved handling of Python 3's exception __context__ unnecessarily forcing + frame object overhead + +.. _`PEP 414`: http://legacy.python.org/dev/peps/pep-0414/ + +What is PyPy? +============== + +PyPy is a very compliant Python interpreter, almost a drop-in replacement for +CPython 2.7.6 or 3.2.5. It's fast due to its integrated tracing JIT compiler. + +This release supports x86 machines running Linux 32/64, Mac OS X 64, Windows, +and OpenBSD, +as well as newer ARM hardware (ARMv6 or ARMv7, with VFPv3) running Linux. + +While we support 32 bit python on Windows, work on the native Windows 64 +bit python is still stalling, we would welcome a volunteer +to `handle that`_. + +.. _`handle that`: http://doc.pypy.org/en/latest/windows.html#what-is-missing-for-a-full-64-bit-translation + +How to use PyPy? +================= + +We suggest using PyPy from a `virtualenv`_. Once you have a virtualenv +installed, you can follow instructions from `pypy documentation`_ on how +to proceed. This document also covers other `installation schemes`_. + +.. _donated: http://morepypy.blogspot.com/2012/01/py3k-and-numpy-first-stage-thanks-to.html +.. _`py3k proposal`: http://pypy.org/py3donate.html +.. _`pypy documentation`: http://doc.pypy.org/en/latest/getting-started.html#installing-using-virtualenv +.. _`virtualenv`: http://www.virtualenv.org/en/latest/ +.. _`installation schemes`: http://doc.pypy.org/en/latest/getting-started.html#installing-pypy + + +Cheers, +the PyPy team diff --git a/pypy/doc/whatsnew-pypy3-2.3.1.rst b/pypy/doc/whatsnew-pypy3-2.3.1.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/whatsnew-pypy3-2.3.1.rst @@ -0,0 +1,6 @@ +========================= +What's new in PyPy3 2.3.1 +========================= + +.. this is a revision shortly after pypy3-release-2.3.x +.. startrev: 0137d8e6657d From noreply at buildbot.pypy.org Sat Jun 21 08:53:22 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sat, 21 Jun 2014 08:53:22 +0200 (CEST) Subject: [pypy-commit] pypy default: Add this paper to the list Message-ID: <20140621065322.C8AA01C33B3@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72115:f36ff0b3852b Date: 2014-06-21 08:52 +0200 http://bitbucket.org/pypy/pypy/changeset/f36ff0b3852b/ Log: Add this paper to the list diff --git a/pypy/doc/extradoc.rst b/pypy/doc/extradoc.rst --- a/pypy/doc/extradoc.rst +++ b/pypy/doc/extradoc.rst @@ -8,6 +8,9 @@ *Articles about PyPy published so far, most recent first:* (bibtex_ file) +* `A Way Forward in Parallelising Dynamic Languages`_, + R. Meier, A. Rigo + * `Runtime Feedback in a Meta-Tracing JIT for Efficient Dynamic Languages`_, C.F. Bolz, A. Cuni, M. Fijalkowski, M. Leuschel, S. Pedroni, A. Rigo @@ -71,6 +74,7 @@ .. _bibtex: https://bitbucket.org/pypy/extradoc/raw/tip/talk/bibtex.bib +.. _`A Way Forward in Parallelising Dynamic Languages`: https://bitbucket.org/pypy/extradoc/raw/extradoc/talk/icooolps2014/position-paper.pdf .. _`Runtime Feedback in a Meta-Tracing JIT for Efficient Dynamic Languages`: https://bitbucket.org/pypy/extradoc/raw/extradoc/talk/icooolps2011/jit-hints.pdf .. _`Allocation Removal by Partial Evaluation in a Tracing JIT`: https://bitbucket.org/pypy/extradoc/raw/extradoc/talk/pepm2011/bolz-allocation-removal.pdf .. _`Towards a Jitting VM for Prolog Execution`: http://www.stups.uni-duesseldorf.de/mediawiki/images/a/a7/Pub-BoLeSch2010.pdf From noreply at buildbot.pypy.org Sat Jun 21 09:11:54 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sat, 21 Jun 2014 09:11:54 +0200 (CEST) Subject: [pypy-commit] pypy default: Link to the general extradoc/talk directory. Message-ID: <20140621071154.ABE531C114A@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72116:9dd89ee7e5b2 Date: 2014-06-21 09:11 +0200 http://bitbucket.org/pypy/pypy/changeset/9dd89ee7e5b2/ Log: Link to the general extradoc/talk directory. diff --git a/pypy/doc/extradoc.rst b/pypy/doc/extradoc.rst --- a/pypy/doc/extradoc.rst +++ b/pypy/doc/extradoc.rst @@ -97,6 +97,11 @@ Talks and Presentations ---------------------------------- +*This part is no longer updated.* The complete list is here__ (in +alphabetical order). + +.. __: https://bitbucket.org/pypy/extradoc/src/extradoc/talk/ + Talks in 2010 +++++++++++++ From noreply at buildbot.pypy.org Sat Jun 21 09:23:44 2014 From: noreply at buildbot.pypy.org (vext01) Date: Sat, 21 Jun 2014 09:23:44 +0200 (CEST) Subject: [pypy-commit] pypy default: Release the GIL when calling out to fclose() and pclose() Message-ID: <20140621072344.0AC0E1C33B3@cobra.cs.uni-duesseldorf.de> Author: Edd Barrett Branch: Changeset: r72117:c684bf704d1f Date: 2014-06-20 17:32 +0100 http://bitbucket.org/pypy/pypy/changeset/c684bf704d1f/ Log: Release the GIL when calling out to fclose() and pclose() Bug exposed by pypy bridge. Armin explains it best: 11:04 < arigato> ebarrett: that's because rpython.rlib.rfile, used only by php, fails to translate when there are threads involved (used only by pypy) 11:07 < arigato> it's a limitation of rfile.py 11:07 < arigato> if it relies on RPython __del__s to close the files, then the files cannot be closed with the GIL released diff --git a/rpython/rlib/rfile.py b/rpython/rlib/rfile.py --- a/rpython/rlib/rfile.py +++ b/rpython/rlib/rfile.py @@ -35,7 +35,7 @@ FILE = lltype.Struct('FILE') # opaque type maybe c_open = llexternal('fopen', [rffi.CCHARP, rffi.CCHARP], lltype.Ptr(FILE)) -c_close = llexternal('fclose', [lltype.Ptr(FILE)], rffi.INT) +c_close = llexternal('fclose', [lltype.Ptr(FILE)], rffi.INT, releasegil=False) c_fwrite = llexternal('fwrite', [rffi.CCHARP, rffi.SIZE_T, rffi.SIZE_T, lltype.Ptr(FILE)], rffi.SIZE_T) c_fread = llexternal('fread', [rffi.CCHARP, rffi.SIZE_T, rffi.SIZE_T, @@ -57,7 +57,7 @@ rffi.CCHARP) c_popen = llexternal('popen', [rffi.CCHARP, rffi.CCHARP], lltype.Ptr(FILE)) -c_pclose = llexternal('pclose', [lltype.Ptr(FILE)], rffi.INT) +c_pclose = llexternal('pclose', [lltype.Ptr(FILE)], rffi.INT, releasegil=False) BASE_BUF_SIZE = 4096 BASE_LINE_SIZE = 100 From noreply at buildbot.pypy.org Sat Jun 21 20:44:07 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sat, 21 Jun 2014 20:44:07 +0200 (CEST) Subject: [pypy-commit] pypy default: Fix for issue #1787 Message-ID: <20140621184407.7A12D1C0CA6@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72118:d1b0787cfc90 Date: 2014-06-21 10:56 +0200 http://bitbucket.org/pypy/pypy/changeset/d1b0787cfc90/ Log: Fix for issue #1787 diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py --- a/pypy/module/_rawffi/interp_rawffi.py +++ b/pypy/module/_rawffi/interp_rawffi.py @@ -508,7 +508,10 @@ argshapes = unpack_argshapes(space, w_args) resshape = unpack_resshape(space, w_res) ffi_args = [shape.get_basic_ffi_type() for shape in argshapes] - ffi_res = resshape.get_basic_ffi_type() + if resshape is not None: + ffi_res = resshape.get_basic_ffi_type() + else: + ffi_res = ffi_type_void try: ptr = RawFuncPtr('???', ffi_args, ffi_res, rffi.cast(rffi.VOIDP, addr), flags) diff --git a/pypy/module/_rawffi/test/test__rawffi.py b/pypy/module/_rawffi/test/test__rawffi.py --- a/pypy/module/_rawffi/test/test__rawffi.py +++ b/pypy/module/_rawffi/test/test__rawffi.py @@ -353,6 +353,11 @@ assert ptr[0] == rawcall.buffer ptr.free() + def test_raw_callable_returning_void(self): + import _rawffi + _rawffi.FuncPtr(0, [], None) + # assert did not crash + def test_short_addition(self): import _rawffi lib = _rawffi.CDLL(self.lib_name) From noreply at buildbot.pypy.org Sat Jun 21 21:33:32 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sat, 21 Jun 2014 21:33:32 +0200 (CEST) Subject: [pypy-commit] cffi default: Merged in techtonik/cffi (pull request #40) Message-ID: <20140621193332.7040E1C0CA6@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r1518:bada488956f8 Date: 2014-06-21 21:33 +0200 http://bitbucket.org/cffi/cffi/changeset/bada488956f8/ Log: Merged in techtonik/cffi (pull request #40) Attempt to improve intro text for people not familiar with problem diff --git a/doc/source/index.rst b/doc/source/index.rst --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -1,31 +1,32 @@ CFFI documentation ================================ -Foreign Function Interface for Python calling C code. The aim of this project -is to provide a convenient and reliable way of calling C code from Python. -The interface is based on `LuaJIT's FFI`_ and follows a few principles: +C Foreign Function Interface for Python. The goal is to provide a +convenient and reliable way to call compiled C code from Python using +interface declarations written in C. -* The goal is to call C code from Python. You should be able to do so - without learning a 3rd language: every alternative requires you to learn - their own language (Cython_, SWIG_) or API (ctypes_). So we tried to - assume that you know Python and C and minimize the extra bits of API that - you need to learn. +The interface is based on `LuaJIT's FFI`_, and follows a few principles: + +* The goal is to call C code from Python without learning a 3rd language: + existing alternatives require users to learn domain specific language + (Cython_, SWIG_) or API (ctypes_). CFFI design requires users to know + only C and Python, minimizing extra bits of API that need to be learned. * Keep all the Python-related logic in Python so that you don't need to write much C code (unlike `CPython native C extensions`_). -* Work either at the level of the ABI (Application Binary Interface) - or the API (Application Programming Interface). Usually, C - libraries have a specified C API but often not an ABI (e.g. they may +* Support level of the ABI (Application Binary Interface) calling system + functions directly (the way ctypes_ works) and level of the API + (Application Programming Interface) using compiler to validate and link + C language constructs. Usually, C libraries have a specified C API, + but often not an ABI (e.g. they may document a "struct" as having at least these fields, but maybe more). - (ctypes_ works at the ABI level, whereas Cython_ and `native C extensions`_ - work at the API level.) -* We try to be complete. For now some C99 constructs are not supported, +* Try to be complete. For now some C99 constructs are not supported, but all C89 should be, including macros (and including macro "abuses", which you can `manually wrap`_ in saner-looking C functions). -* We attempt to support both PyPy and CPython, with a reasonable path +* Attempt to support both PyPy and CPython, with a reasonable path for other Python implementations like IronPython and Jython. * Note that this project is **not** about embedding executable C code in From noreply at buildbot.pypy.org Sat Jun 21 21:33:34 2014 From: noreply at buildbot.pypy.org (techtonik) Date: Sat, 21 Jun 2014 21:33:34 +0200 (CEST) Subject: [pypy-commit] cffi default: Attempt to improve intro text for people not familiar with problem Message-ID: <20140621193334.8369D1C0CA6@cobra.cs.uni-duesseldorf.de> Author: anatoly techtonik Branch: Changeset: r1515:8d721cd75852 Date: 2014-05-30 14:10 +0300 http://bitbucket.org/cffi/cffi/changeset/8d721cd75852/ Log: Attempt to improve intro text for people not familiar with problem diff --git a/doc/source/index.rst b/doc/source/index.rst --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -1,31 +1,32 @@ CFFI documentation ================================ -Foreign Function Interface for Python calling C code. The aim of this project -is to provide a convenient and reliable way of calling C code from Python. -The interface is based on `LuaJIT's FFI`_ and follows a few principles: +C Foreign Function Interface for Python. The goal is to provide a +convenient and reliable way to call compiled C code from Python using +interface declarations written in C. -* The goal is to call C code from Python. You should be able to do so - without learning a 3rd language: every alternative requires you to learn - their own language (Cython_, SWIG_) or API (ctypes_). So we tried to - assume that you know Python and C and minimize the extra bits of API that - you need to learn. +The interface is based on `LuaJIT's FFI`_, and follows a few principles: + +* The goal is to call C code from Python without learning a 3rd language: + existing alternatives require to learn domain specific language + (Cython_, SWIG_) or API (ctypes_). CFFI design requires users to know + only C and Python, minimizing extra bits of API that need to be learned. * Keep all the Python-related logic in Python so that you don't need to write much C code (unlike `CPython native C extensions`_). -* Work either at the level of the ABI (Application Binary Interface) - or the API (Application Programming Interface). Usually, C - libraries have a specified C API but often not an ABI (e.g. they may +* Support level of the ABI (Application Binary Interface) calling system + functions directly (the way ctypes_ works) and level of the API + (Application Programming Interface) using compiler to validate and link + C language constructs. Usually, C libraries have a specified C API, + bt often not an ABI (e.g. they may document a "struct" as having at least these fields, but maybe more). - (ctypes_ works at the ABI level, whereas Cython_ and `native C extensions`_ - work at the API level.) -* We try to be complete. For now some C99 constructs are not supported, +* Try to be complete. For now some C99 constructs are not supported, but all C89 should be, including macros (and including macro "abuses", which you can `manually wrap`_ in saner-looking C functions). -* We attempt to support both PyPy and CPython, with a reasonable path +* Attempt to support both PyPy and CPython, with a reasonable path for other Python implementations like IronPython and Jython. * Note that this project is **not** about embedding executable C code in From noreply at buildbot.pypy.org Sat Jun 21 21:33:35 2014 From: noreply at buildbot.pypy.org (techtonik) Date: Sat, 21 Jun 2014 21:33:35 +0200 (CEST) Subject: [pypy-commit] cffi default: docs: English improvement, thank to Daniel Holth Message-ID: <20140621193335.AA3EC1C0CA6@cobra.cs.uni-duesseldorf.de> Author: anatoly techtonik Branch: Changeset: r1516:02cede1d62b2 Date: 2014-05-30 16:54 +0300 http://bitbucket.org/cffi/cffi/changeset/02cede1d62b2/ Log: docs: English improvement, thank to Daniel Holth diff --git a/doc/source/index.rst b/doc/source/index.rst --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -8,7 +8,7 @@ The interface is based on `LuaJIT's FFI`_, and follows a few principles: * The goal is to call C code from Python without learning a 3rd language: - existing alternatives require to learn domain specific language + existing alternatives require users to learn domain specific language (Cython_, SWIG_) or API (ctypes_). CFFI design requires users to know only C and Python, minimizing extra bits of API that need to be learned. From noreply at buildbot.pypy.org Sat Jun 21 21:33:36 2014 From: noreply at buildbot.pypy.org (techtonik) Date: Sat, 21 Jun 2014 21:33:36 +0200 (CEST) Subject: [pypy-commit] cffi default: index.rst: typo, edited online with Bitbucket Message-ID: <20140621193336.CA9F11C0CA6@cobra.cs.uni-duesseldorf.de> Author: anatoly techtonik Branch: Changeset: r1517:75a1c583a999 Date: 2014-06-16 08:35 +0000 http://bitbucket.org/cffi/cffi/changeset/75a1c583a999/ Log: index.rst: typo, edited online with Bitbucket diff --git a/doc/source/index.rst b/doc/source/index.rst --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -19,7 +19,7 @@ functions directly (the way ctypes_ works) and level of the API (Application Programming Interface) using compiler to validate and link C language constructs. Usually, C libraries have a specified C API, - bt often not an ABI (e.g. they may + but often not an ABI (e.g. they may document a "struct" as having at least these fields, but maybe more). * Try to be complete. For now some C99 constructs are not supported, From noreply at buildbot.pypy.org Sat Jun 21 21:41:50 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sat, 21 Jun 2014 21:41:50 +0200 (CEST) Subject: [pypy-commit] cffi default: Rephrase again a paragraph Message-ID: <20140621194150.4254F1C114A@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r1519:1ab01de49a4b Date: 2014-06-21 21:41 +0200 http://bitbucket.org/cffi/cffi/changeset/1ab01de49a4b/ Log: Rephrase again a paragraph diff --git a/doc/source/index.rst b/doc/source/index.rst --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -9,17 +9,19 @@ * The goal is to call C code from Python without learning a 3rd language: existing alternatives require users to learn domain specific language - (Cython_, SWIG_) or API (ctypes_). CFFI design requires users to know - only C and Python, minimizing extra bits of API that need to be learned. + (Cython_, SWIG_) or API (ctypes_). The CFFI design requires users to know + only C and Python, minimizing the extra bits of API that need to be learned. * Keep all the Python-related logic in Python so that you don't need to write much C code (unlike `CPython native C extensions`_). -* Support level of the ABI (Application Binary Interface) calling system - functions directly (the way ctypes_ works) and level of the API - (Application Programming Interface) using compiler to validate and link - C language constructs. Usually, C libraries have a specified C API, - but often not an ABI (e.g. they may +* The preferred way is to work at the level of the API (Application + Programming Interface): the C compiler is called from the declarations + you write to validate and link to the C language constructs. + Alternatively, it is also possible to work at the ABI level + (Application Binary Interface), the way ctypes_ work. + However, on non-Windows platforms, C libraries typically + have a specified C API but not an ABI (e.g. they may document a "struct" as having at least these fields, but maybe more). * Try to be complete. For now some C99 constructs are not supported, From noreply at buildbot.pypy.org Sat Jun 21 21:48:36 2014 From: noreply at buildbot.pypy.org (mattip) Date: Sat, 21 Jun 2014 21:48:36 +0200 (CEST) Subject: [pypy-commit] pypy default: fix use of deprecated _ctypes.call_function in win32 only ctypes -A test, since it is not yet removed. Message-ID: <20140621194836.680FF1C114A@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72119:765614ef397c Date: 2014-06-20 14:41 +0300 http://bitbucket.org/pypy/pypy/changeset/765614ef397c/ Log: fix use of deprecated _ctypes.call_function in win32 only ctypes -A test, since it is not yet removed. diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py --- a/lib_pypy/_ctypes/function.py +++ b/lib_pypy/_ctypes/function.py @@ -219,6 +219,8 @@ if restype is None: import ctypes restype = ctypes.c_int + if self._argtypes_ is None: + self._argtypes_ = [] self._ptr = self._getfuncptr_fromaddress(self._argtypes_, restype) self._check_argtypes_for_fastpath() return From noreply at buildbot.pypy.org Sat Jun 21 21:48:37 2014 From: noreply at buildbot.pypy.org (mattip) Date: Sat, 21 Jun 2014 21:48:37 +0200 (CEST) Subject: [pypy-commit] pypy default: directly import RawCDLL instead of relying on "import *" Message-ID: <20140621194837.ADF3C1C114A@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72120:792c3453abff Date: 2014-06-20 16:55 +0300 http://bitbucket.org/pypy/pypy/changeset/792c3453abff/ Log: directly import RawCDLL instead of relying on "import *" diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py --- a/pypy/module/sys/vm.py +++ b/pypy/module/sys/vm.py @@ -244,7 +244,8 @@ handle = space.fromcache(State).get_pythonapi_handle() # Make a dll object with it - from pypy.module._rawffi.interp_rawffi import W_CDLL, RawCDLL + from pypy.module._rawffi.interp_rawffi import W_CDLL + from rpython.rlib.clibffi import RawCDLL cdll = RawCDLL(handle) return space.wrap(W_CDLL(space, "python api", cdll)) From noreply at buildbot.pypy.org Sat Jun 21 21:48:38 2014 From: noreply at buildbot.pypy.org (mattip) Date: Sat, 21 Jun 2014 21:48:38 +0200 (CEST) Subject: [pypy-commit] pypy disable_pythonapi: document failed functionality of pythonapi Message-ID: <20140621194838.D51061C114A@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: disable_pythonapi Changeset: r72121:8830f5d80d16 Date: 2014-06-21 21:09 +0300 http://bitbucket.org/pypy/pypy/changeset/8830f5d80d16/ Log: document failed functionality of pythonapi diff --git a/pypy/doc/ctypes-implementation.rst b/pypy/doc/ctypes-implementation.rst --- a/pypy/doc/ctypes-implementation.rst +++ b/pypy/doc/ctypes-implementation.rst @@ -72,13 +72,17 @@ Here is a list of the limitations and missing features of the current implementation: -* ``ctypes.pythonapi`` lets you access the CPython C API emulation layer - of PyPy, at your own risks and without doing anything sensible about - the GIL. Since PyPy 2.3, these functions are also named with an extra - "Py", for example ``PyPyInt_FromLong()``. Basically, don't use this, - but it might more or less work in simple cases if you do. (Obviously, - assuming the PyObject pointers you get have any particular fields in - any particular order is just going to crash.) +* ``ctypes.pythonapi`` (Windows only) lets you access the CPython C API + emulation layer. It does not work on PyPy at the moment, we are missing a + ``getfunc`` method for CDLL. Work was begun + to refactor the rpython implementation of _rawffi (in + pypy/modules/_rawffi/alt) but that project has stalled. + + Note that even if it worked, our implementation would not do anything + sensible about the GIL and the functions will be named with an extra + "Py", for example ``PyPyInt_FromLong()``. Basically, don't use this. + Assuming the PyObject pointers you get have any particular fields in + any particular order is just going to crash. * We copy Python strings instead of having pointers to raw buffers diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -206,9 +206,9 @@ if sys.platform != "win32" or sys.version_info < (2, 6): skip("Windows Python >= 2.6 only") assert sys.dllhandle - assert sys.dllhandle.getaddressindll('PyPyErr_NewException') - import ctypes # slow - PyUnicode_GetDefaultEncoding = ctypes.pythonapi.PyPyUnicode_GetDefaultEncoding + assert sys.dllhandle.getaddressindll('cpyexttestErr_NewException') + import ctypes + PyUnicode_GetDefaultEncoding = ctypes.pythonapi.cpyexttestUnicode_GetDefaultEncoding PyUnicode_GetDefaultEncoding.restype = ctypes.c_char_p assert PyUnicode_GetDefaultEncoding() == 'ascii' From noreply at buildbot.pypy.org Sat Jun 21 21:48:40 2014 From: noreply at buildbot.pypy.org (mattip) Date: Sat, 21 Jun 2014 21:48:40 +0200 (CEST) Subject: [pypy-commit] pypy default: merge heads Message-ID: <20140621194840.1C4141C114A@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72122:b4af9ad3c7d8 Date: 2014-06-21 22:20 +0300 http://bitbucket.org/pypy/pypy/changeset/b4af9ad3c7d8/ Log: merge heads diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py --- a/lib_pypy/_ctypes/function.py +++ b/lib_pypy/_ctypes/function.py @@ -219,6 +219,8 @@ if restype is None: import ctypes restype = ctypes.c_int + if self._argtypes_ is None: + self._argtypes_ = [] self._ptr = self._getfuncptr_fromaddress(self._argtypes_, restype) self._check_argtypes_for_fastpath() return diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py --- a/pypy/module/sys/vm.py +++ b/pypy/module/sys/vm.py @@ -244,7 +244,8 @@ handle = space.fromcache(State).get_pythonapi_handle() # Make a dll object with it - from pypy.module._rawffi.interp_rawffi import W_CDLL, RawCDLL + from pypy.module._rawffi.interp_rawffi import W_CDLL + from rpython.rlib.clibffi import RawCDLL cdll = RawCDLL(handle) return space.wrap(W_CDLL(space, "python api", cdll)) From noreply at buildbot.pypy.org Sat Jun 21 21:48:41 2014 From: noreply at buildbot.pypy.org (mattip) Date: Sat, 21 Jun 2014 21:48:41 +0200 (CEST) Subject: [pypy-commit] pypy default: fix test of rzipfile on win32 (arigato) Message-ID: <20140621194841.556571C114A@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72123:4e8d7e49de5b Date: 2014-06-21 22:48 +0300 http://bitbucket.org/pypy/pypy/changeset/4e8d7e49de5b/ Log: fix test of rzipfile on win32 (arigato) diff --git a/rpython/rlib/streamio.py b/rpython/rlib/streamio.py --- a/rpython/rlib/streamio.py +++ b/rpython/rlib/streamio.py @@ -37,7 +37,7 @@ import os, sys, errno from rpython.rlib.objectmodel import specialize, we_are_translated from rpython.rlib.rarithmetic import r_longlong, intmask -from rpython.rlib import rposix +from rpython.rlib import rposix, nonconst from rpython.rlib.rstring import StringBuilder from os import O_RDONLY, O_WRONLY, O_RDWR, O_CREAT, O_TRUNC, O_APPEND @@ -159,6 +159,8 @@ stream = TextInputFilter(stream) elif not binary and os.linesep == '\r\n': stream = TextCRLFFilter(stream) + if nonconst.NonConstant(False): + stream.flush_buffers() # annotation workaround for untranslated tests return stream From noreply at buildbot.pypy.org Sun Jun 22 00:11:18 2014 From: noreply at buildbot.pypy.org (amauryfa) Date: Sun, 22 Jun 2014 00:11:18 +0200 (CEST) Subject: [pypy-commit] pypy py3k: switch to unicodedata 6.2.0 Message-ID: <20140621221118.6C5BE1C114A@cobra.cs.uni-duesseldorf.de> Author: Amaury Forgeot d'Arc Branch: py3k Changeset: r72124:62fc05632b25 Date: 2014-06-20 22:12 +0200 http://bitbucket.org/pypy/pypy/changeset/62fc05632b25/ Log: switch to unicodedata 6.2.0 diff --git a/pypy/module/unicodedata/__init__.py b/pypy/module/unicodedata/__init__.py --- a/pypy/module/unicodedata/__init__.py +++ b/pypy/module/unicodedata/__init__.py @@ -3,7 +3,7 @@ # This is the default unicodedb used in various places: # - the unicode type # - the regular expression engine -from rpython.rlib.unicodedata import unicodedb_6_0_0 as unicodedb +from rpython.rlib.unicodedata import unicodedb_6_2_0 as unicodedb # to get information about individual unicode chars look at: # http://www.fileformat.info/info/unicode/char/search.htm @@ -14,7 +14,7 @@ interpleveldefs = { 'unidata_version' : 'space.wrap(interp_ucd.ucd.version)', 'ucd_3_2_0' : 'space.wrap(interp_ucd.ucd_3_2_0)', - 'ucd_6_0_0' : 'space.wrap(interp_ucd.ucd_6_0_0)', + 'ucd_6_0_0' : 'space.wrap(interp_ucd.ucd_6_2_0)', 'ucd' : 'space.wrap(interp_ucd.ucd)', '__doc__' : "space.wrap('unicode character database')", } diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -9,7 +9,7 @@ from rpython.rlib.rarithmetic import r_longlong from rpython.rlib.objectmodel import we_are_translated from rpython.rlib.runicode import MAXUNICODE -from rpython.rlib.unicodedata import unicodedb_6_0_0, unicodedb_3_2_0 +from rpython.rlib.unicodedata import unicodedb_6_2_0, unicodedb_3_2_0 from rpython.rlib.runicode import code_to_unichr, ord_accepts_surrogate import sys @@ -324,5 +324,5 @@ **methods) ucd_3_2_0 = UCD(unicodedb_3_2_0) -ucd_6_0_0 = UCD(unicodedb_6_0_0) -ucd = ucd_6_0_0 +ucd_6_2_0 = UCD(unicodedb_6_2_0) +ucd = ucd_6_2_0 From noreply at buildbot.pypy.org Sun Jun 22 00:11:19 2014 From: noreply at buildbot.pypy.org (amauryfa) Date: Sun, 22 Jun 2014 00:11:19 +0200 (CEST) Subject: [pypy-commit] pypy py3k: Backed out changeset 62fc05632b25: wrong branch Message-ID: <20140621221119.B53F91C114A@cobra.cs.uni-duesseldorf.de> Author: Amaury Forgeot d'Arc Branch: py3k Changeset: r72125:43e81986f9b2 Date: 2014-06-20 22:23 +0200 http://bitbucket.org/pypy/pypy/changeset/43e81986f9b2/ Log: Backed out changeset 62fc05632b25: wrong branch diff --git a/pypy/module/unicodedata/__init__.py b/pypy/module/unicodedata/__init__.py --- a/pypy/module/unicodedata/__init__.py +++ b/pypy/module/unicodedata/__init__.py @@ -3,7 +3,7 @@ # This is the default unicodedb used in various places: # - the unicode type # - the regular expression engine -from rpython.rlib.unicodedata import unicodedb_6_2_0 as unicodedb +from rpython.rlib.unicodedata import unicodedb_6_0_0 as unicodedb # to get information about individual unicode chars look at: # http://www.fileformat.info/info/unicode/char/search.htm @@ -14,7 +14,7 @@ interpleveldefs = { 'unidata_version' : 'space.wrap(interp_ucd.ucd.version)', 'ucd_3_2_0' : 'space.wrap(interp_ucd.ucd_3_2_0)', - 'ucd_6_0_0' : 'space.wrap(interp_ucd.ucd_6_2_0)', + 'ucd_6_0_0' : 'space.wrap(interp_ucd.ucd_6_0_0)', 'ucd' : 'space.wrap(interp_ucd.ucd)', '__doc__' : "space.wrap('unicode character database')", } diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -9,7 +9,7 @@ from rpython.rlib.rarithmetic import r_longlong from rpython.rlib.objectmodel import we_are_translated from rpython.rlib.runicode import MAXUNICODE -from rpython.rlib.unicodedata import unicodedb_6_2_0, unicodedb_3_2_0 +from rpython.rlib.unicodedata import unicodedb_6_0_0, unicodedb_3_2_0 from rpython.rlib.runicode import code_to_unichr, ord_accepts_surrogate import sys @@ -324,5 +324,5 @@ **methods) ucd_3_2_0 = UCD(unicodedb_3_2_0) -ucd_6_2_0 = UCD(unicodedb_6_2_0) -ucd = ucd_6_2_0 +ucd_6_0_0 = UCD(unicodedb_6_0_0) +ucd = ucd_6_0_0 From noreply at buildbot.pypy.org Sun Jun 22 00:11:21 2014 From: noreply at buildbot.pypy.org (amauryfa) Date: Sun, 22 Jun 2014 00:11:21 +0200 (CEST) Subject: [pypy-commit] pypy py3.3: hg merge py3.3 Message-ID: <20140621221121.181EC1C114A@cobra.cs.uni-duesseldorf.de> Author: Amaury Forgeot d'Arc Branch: py3.3 Changeset: r72126:9e03d88e5f4c Date: 2014-06-20 22:23 +0200 http://bitbucket.org/pypy/pypy/changeset/9e03d88e5f4c/ Log: hg merge py3.3 From noreply at buildbot.pypy.org Sun Jun 22 00:11:22 2014 From: noreply at buildbot.pypy.org (amauryfa) Date: Sun, 22 Jun 2014 00:11:22 +0200 (CEST) Subject: [pypy-commit] pypy py3.3: Redo 62fc05632b25: unicodedata 6.2.0 for Python 3.3 Message-ID: <20140621221122.735E01C114A@cobra.cs.uni-duesseldorf.de> Author: Amaury Forgeot d'Arc Branch: py3.3 Changeset: r72127:3400f9076823 Date: 2014-06-20 22:27 +0200 http://bitbucket.org/pypy/pypy/changeset/3400f9076823/ Log: Redo 62fc05632b25: unicodedata 6.2.0 for Python 3.3 diff --git a/pypy/module/unicodedata/__init__.py b/pypy/module/unicodedata/__init__.py --- a/pypy/module/unicodedata/__init__.py +++ b/pypy/module/unicodedata/__init__.py @@ -3,7 +3,7 @@ # This is the default unicodedb used in various places: # - the unicode type # - the regular expression engine -from rpython.rlib.unicodedata import unicodedb_6_0_0 as unicodedb +from rpython.rlib.unicodedata import unicodedb_6_2_0 as unicodedb # to get information about individual unicode chars look at: # http://www.fileformat.info/info/unicode/char/search.htm @@ -14,7 +14,7 @@ interpleveldefs = { 'unidata_version' : 'space.wrap(interp_ucd.ucd.version)', 'ucd_3_2_0' : 'space.wrap(interp_ucd.ucd_3_2_0)', - 'ucd_6_0_0' : 'space.wrap(interp_ucd.ucd_6_0_0)', + 'ucd_6_0_0' : 'space.wrap(interp_ucd.ucd_6_2_0)', 'ucd' : 'space.wrap(interp_ucd.ucd)', '__doc__' : "space.wrap('unicode character database')", } diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -9,7 +9,7 @@ from rpython.rlib.rarithmetic import r_longlong from rpython.rlib.objectmodel import we_are_translated from rpython.rlib.runicode import MAXUNICODE -from rpython.rlib.unicodedata import unicodedb_6_0_0, unicodedb_3_2_0 +from rpython.rlib.unicodedata import unicodedb_6_2_0, unicodedb_3_2_0 from rpython.rlib.runicode import code_to_unichr, ord_accepts_surrogate import sys @@ -324,5 +324,5 @@ **methods) ucd_3_2_0 = UCD(unicodedb_3_2_0) -ucd_6_0_0 = UCD(unicodedb_6_0_0) -ucd = ucd_6_0_0 +ucd_6_2_0 = UCD(unicodedb_6_2_0) +ucd = ucd_6_2_0 From noreply at buildbot.pypy.org Sun Jun 22 00:11:23 2014 From: noreply at buildbot.pypy.org (amauryfa) Date: Sun, 22 Jun 2014 00:11:23 +0200 (CEST) Subject: [pypy-commit] pypy py3.3: Fix exception handling in _ssl: it now use distinct Exception classes. Message-ID: <20140621221123.E85E81C114A@cobra.cs.uni-duesseldorf.de> Author: Amaury Forgeot d'Arc Branch: py3.3 Changeset: r72128:18dd3240202b Date: 2014-06-22 00:07 +0200 http://bitbucket.org/pypy/pypy/changeset/18dd3240202b/ Log: Fix exception handling in _ssl: it now use distinct Exception classes. + Small hacks until "import ssl" succeeds. diff --git a/pypy/module/_ssl/__init__.py b/pypy/module/_ssl/__init__.py --- a/pypy/module/_ssl/__init__.py +++ b/pypy/module/_ssl/__init__.py @@ -5,7 +5,12 @@ See the socket module for documentation.""" interpleveldefs = { - 'SSLError': 'interp_ssl.get_error(space)', + 'SSLError': 'interp_ssl.get_error(space).w_error', + 'SSLZeroReturnError': 'interp_ssl.get_error(space).w_ZeroReturnError', + 'SSLWantReadError': 'interp_ssl.get_error(space).w_WantReadError', + 'SSLWantWriteError': 'interp_ssl.get_error(space).w_WantWriteError', + 'SSLEOFError': 'interp_ssl.get_error(space).w_EOFError', + 'SSLSyscallError': 'interp_ssl.get_error(space).w_SyscallError', '_SSLSocket': 'interp_ssl.SSLSocket', '_SSLContext': 'interp_ssl.SSLContext', '_test_decode_cert': 'interp_ssl._test_decode_cert', @@ -24,6 +29,8 @@ if HAVE_OPENSSL_RAND: Module.interpleveldefs['RAND_add'] = "interp_ssl.RAND_add" + Module.interpleveldefs['RAND_bytes'] = "space.w_None" # so far + Module.interpleveldefs['RAND_pseudo_bytes'] = "space.w_None" # so far Module.interpleveldefs['RAND_status'] = "interp_ssl.RAND_status" Module.interpleveldefs['RAND_egd'] = "interp_ssl.RAND_egd" diff --git a/pypy/module/_ssl/interp_ssl.py b/pypy/module/_ssl/interp_ssl.py --- a/pypy/module/_ssl/interp_ssl.py +++ b/pypy/module/_ssl/interp_ssl.py @@ -11,6 +11,7 @@ from rpython.rlib.rposix import get_errno, set_errno from pypy.module._socket import interp_socket +from pypy.module.exceptions import interp_exceptions import weakref @@ -62,10 +63,15 @@ # protocol options constants["OP_ALL"] = SSL_OP_ALL & ~SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS +constants["OP_CIPHER_SERVER_PREFERENCE"] = SSL_OP_CIPHER_SERVER_PREFERENCE +constants["OP_SINGLE_DH_USE"] = SSL_OP_SINGLE_DH_USE constants["OP_NO_SSLv2"] = SSL_OP_NO_SSLv2 constants["OP_NO_SSLv3"] = SSL_OP_NO_SSLv3 constants["OP_NO_TLSv1"] = SSL_OP_NO_TLSv1 constants["HAS_SNI"] = HAS_SNI +constants["HAS_ECDH"] = True # To break the test suite +constants["HAS_NPN"] = True # To break the test suite +constants["HAS_TLS_UNIQUE"] = True # To break the test suite # OpenSSL version def _parse_version(ver): @@ -83,11 +89,12 @@ constants["OPENSSL_VERSION"] = SSLEAY_VERSION constants["_OPENSSL_API_VERSION"] = _parse_version(libver) -def ssl_error(space, msg, errno=0): - w_exception_class = get_error(space) - w_exception = space.call_function(w_exception_class, +def ssl_error(space, msg, errno=0, w_errtype=None): + if w_errtype is None: + w_errtype = get_error(space).w_error + w_exception = space.call_function(w_errtype, space.wrap(errno), space.wrap(msg)) - return OperationError(w_exception_class, w_exception) + return OperationError(w_errtype, w_exception) class SSLContext(W_Root): @@ -958,16 +965,20 @@ err = libssl_SSL_get_error(ss.ssl, ret) else: err = SSL_ERROR_SSL + w_errtype = None errstr = "" errval = 0 if err == SSL_ERROR_ZERO_RETURN: + w_errtype = get_error(space).w_ZeroReturnError errstr = "TLS/SSL connection has been closed" errval = PY_SSL_ERROR_ZERO_RETURN elif err == SSL_ERROR_WANT_READ: + w_errtype = get_error(space).w_WantReadError errstr = "The operation did not complete (read)" errval = PY_SSL_ERROR_WANT_READ elif err == SSL_ERROR_WANT_WRITE: + w_errtype = get_error(space).w_WantWriteError errstr = "The operation did not complete (write)" errval = PY_SSL_ERROR_WANT_WRITE elif err == SSL_ERROR_WANT_X509_LOOKUP: @@ -980,6 +991,7 @@ e = libssl_ERR_get_error() if e == 0: if ret == 0 or ss.w_socket() is None: + w_errtype = get_error(space).w_EOFError errstr = "EOF occurred in violation of protocol" errval = PY_SSL_ERROR_EOF elif ret == -1: @@ -987,6 +999,7 @@ error = rsocket.last_error() return interp_socket.converted_error(space, error) else: + w_errtype = get_error(space).w_SyscallError errstr = "Some I/O error occurred" errval = PY_SSL_ERROR_SYSCALL else: @@ -1003,17 +1016,44 @@ errstr = "Invalid error code" errval = PY_SSL_ERROR_INVALID_ERROR_CODE - return ssl_error(space, errstr, errval) + return ssl_error(space, errstr, errval, w_errtype=w_errtype) -class Cache: +class W_Error(interp_exceptions.W_OSError): + "An error occurred in the SSL implementation." + + def descr_str(self, space): + if space.isinstance_w(self.w_strerror, space.w_unicode): + return self.w_strerror + else: + return space.str(space.newtuple(self.args_w)) + +W_Error.typedef = TypeDef( + "ssl.SSLError", + interp_exceptions.W_OSError.typedef, + __new__ = interp_exceptions._new(W_Error), + __doc__ = W_Error.__doc__, + __str__ = interp2app(W_Error.descr_str), + ) + + +class ErrorCache: def __init__(self, space): - w_socketerror = interp_socket.get_error(space, "error") - self.w_error = space.new_exception_class( - "_ssl.SSLError", w_socketerror) + self.w_error = space.gettypefor(W_Error) + self.w_ZeroReturnError = space.new_exception_class( + "ssl.SSLZeroReturnError", self.w_error) + self.w_WantReadError = space.new_exception_class( + "ssl.SSLWantReadError", self.w_error) + self.w_WantWriteError = space.new_exception_class( + "ssl.SSLWantWriteError", self.w_error) + self.w_EOFError = space.new_exception_class( + "ssl.SSLEOFError", self.w_error) + self.w_SyscallError = space.new_exception_class( + "ssl.SSLSyscallError", self.w_error) def get_error(space): - return space.fromcache(Cache).w_error + return space.fromcache(ErrorCache) + @unwrap_spec(filename=str, verbose=bool) def _test_decode_cert(space, filename, verbose=True): diff --git a/pypy/module/_ssl/test/test_ssl.py b/pypy/module/_ssl/test/test_ssl.py --- a/pypy/module/_ssl/test/test_ssl.py +++ b/pypy/module/_ssl/test/test_ssl.py @@ -241,6 +241,37 @@ ctx = _ssl._SSLContext(_ssl.PROTOCOL_TLSv1) assert _ssl.OP_ALL | _ssl.OP_NO_SSLv2 == ctx.options +class AppTestSSLError: + spaceconfig = dict(usemodules=('_ssl', '_socket', 'binascii', 'thread')) + + def test_str(self): + import _ssl + # The str() of a SSLError doesn't include the errno + e = _ssl.SSLError(1, "foo") + assert str(e) == "foo" + assert e.errno == 1 + # Same for a subclass + e = _ssl.SSLZeroReturnError(1, "foo") + assert str(e) == "foo" + assert e.errno == 1 + + def test_subclass(self): + import ssl + import socket + # Check that the appropriate SSLError subclass is raised + # (this only tests one of them) + ctx = ssl.SSLContext(ssl.PROTOCOL_TLSv1) + with socket.socket() as s: + s.bind(("127.0.0.1", 0)) + s.listen(5) + c = socket.socket() + c.connect(s.getsockname()) + c.setblocking(False) + with ctx.wrap_socket(c, False, do_handshake_on_connect=False) as c: + exc = raises(ssl.SSLWantReadError, c.do_handshake) + assert str(exc.value).startswith("The operation did not complete (read)"), s + # For compatibility + assert exc.value.errno == ssl.SSL_ERROR_WANT_READ SSL_CERTIFICATE = """ diff --git a/rpython/rlib/ropenssl.py b/rpython/rlib/ropenssl.py --- a/rpython/rlib/ropenssl.py +++ b/rpython/rlib/ropenssl.py @@ -82,6 +82,10 @@ SSL_OP_NO_TLSv1 = rffi_platform.ConstantInteger("SSL_OP_NO_TLSv1") SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS = rffi_platform.ConstantInteger( "SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS") + SSL_OP_CIPHER_SERVER_PREFERENCE = rffi_platform.ConstantInteger( + "SSL_OP_CIPHER_SERVER_PREFERENCE") + SSL_OP_SINGLE_DH_USE = rffi_platform.ConstantInteger( + "SSL_OP_SINGLE_DH_USE") HAS_SNI = rffi_platform.Defined("SSL_CTRL_SET_TLSEXT_HOSTNAME") SSL_VERIFY_NONE = rffi_platform.ConstantInteger("SSL_VERIFY_NONE") SSL_VERIFY_PEER = rffi_platform.ConstantInteger("SSL_VERIFY_PEER") From noreply at buildbot.pypy.org Sun Jun 22 10:48:22 2014 From: noreply at buildbot.pypy.org (mattip) Date: Sun, 22 Jun 2014 10:48:22 +0200 (CEST) Subject: [pypy-commit] pypy disable_pythonapi: pythonapi is not windows specific Message-ID: <20140622084822.C54FD1C1041@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: disable_pythonapi Changeset: r72129:5cc32885d631 Date: 2014-06-22 10:21 +0300 http://bitbucket.org/pypy/pypy/changeset/5cc32885d631/ Log: pythonapi is not windows specific diff --git a/pypy/doc/ctypes-implementation.rst b/pypy/doc/ctypes-implementation.rst --- a/pypy/doc/ctypes-implementation.rst +++ b/pypy/doc/ctypes-implementation.rst @@ -72,7 +72,7 @@ Here is a list of the limitations and missing features of the current implementation: -* ``ctypes.pythonapi`` (Windows only) lets you access the CPython C API +* ``ctypes.pythonapi`` lets you access the CPython C API emulation layer. It does not work on PyPy at the moment, we are missing a ``getfunc`` method for CDLL. Work was begun to refactor the rpython implementation of _rawffi (in From noreply at buildbot.pypy.org Sun Jun 22 10:48:24 2014 From: noreply at buildbot.pypy.org (mattip) Date: Sun, 22 Jun 2014 10:48:24 +0200 (CEST) Subject: [pypy-commit] pypy default: putenv is _putenv on windows Message-ID: <20140622084824.064E11C1041@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72130:308e64d3c884 Date: 2014-06-22 10:27 +0300 http://bitbucket.org/pypy/pypy/changeset/308e64d3c884/ Log: putenv is _putenv on windows diff --git a/rpython/rtyper/module/ll_os_environ.py b/rpython/rtyper/module/ll_os_environ.py --- a/rpython/rtyper/module/ll_os_environ.py +++ b/rpython/rtyper/module/ll_os_environ.py @@ -60,7 +60,7 @@ # ____________________________________________________________ # Access to the 'environ' external variable - +prefix = '' if sys.platform.startswith('darwin'): CCHARPPP = rffi.CArrayPtr(rffi.CCHARPP) _os_NSGetEnviron = rffi.llexternal( @@ -77,6 +77,7 @@ rffi.CCHARPP, '_environ', eci) get__wenviron, _set__wenviron = rffi.CExternVariable( CWCHARPP, '_wenviron', eci, c_type='wchar_t **') + prefix = '_' else: os_get_environ, _os_set_environ = rffi.CExternVariable( rffi.CCHARPP, 'environ', ExternalCompilationInfo()) @@ -117,7 +118,7 @@ os_getenv = rffi.llexternal('getenv', [rffi.CCHARP], rffi.CCHARP, releasegil=False) -os_putenv = rffi.llexternal('putenv', [rffi.CCHARP], rffi.INT) +os_putenv = rffi.llexternal(prefix + 'putenv', [rffi.CCHARP], rffi.INT) if _WIN32: _wgetenv = rffi.llexternal('_wgetenv', [rffi.CWCHARP], rffi.CWCHARP, compilation_info=eci, releasegil=False) From noreply at buildbot.pypy.org Sun Jun 22 17:00:40 2014 From: noreply at buildbot.pypy.org (mattip) Date: Sun, 22 Jun 2014 17:00:40 +0200 (CEST) Subject: [pypy-commit] pypy.org extradoc: repack source for 2.3.1 Message-ID: <20140622150040.7117C1D2B39@cobra.cs.uni-duesseldorf.de> Author: Matti Picus Branch: extradoc Changeset: r511:21c4f4cdc21e Date: 2014-06-22 18:01 +0300 http://bitbucket.org/pypy/pypy.org/changeset/21c4f4cdc21e/ Log: repack source for 2.3.1 diff --git a/download.html b/download.html --- a/download.html +++ b/download.html @@ -97,8 +97,8 @@
  • Mac OS/X binary (64bit)
  • Windows binary (32bit) (you might need the VS 2008 runtime library installer vcredist_x86.exe.)
  • -
  • Source (tar.bz2)
  • -
  • Source (zip)
  • +
  • Source (tar.bz2)
  • +
  • Source (zip)
  • All our downloads, including previous versions. We also have a mirror, but please use only if you have troubles accessing the links above
@@ -196,8 +196,8 @@
  • Get the source code. The following packages contain the source at the same revision as the above binaries:

    Or you can checkout the current trunk using Mercurial (the trunk usually works and is of course more up-to-date):

    @@ -273,6 +273,8 @@ 39b8c4c7ae8ff9f7dd8b854834b530ef pypy-2.3.1-osx64.tar.bz2 aa71e065f89849a9f641cc3ca8d80ebc pypy-2.3.1-win32.zip 5dab108869a039b754da5c07046fb17c pypy-2.3.1-linux-armel.tar.bz2 +2b9aeccef1587a42fb5a4cc304b5d881 pypy-2.3.1-src.tar.bz2 +15c068c357d60719086b23e0bf9d0a5b pypy-2.3.1-src.zip a86da5688dfd84e0485239f8991af308 pypy3-2.3.1-linux64.tar.bz2 b0d6a0491e9c9be39d3c314c0823a039 pypy3-2.3.1-linux-armel.tar.bz2 f79f7b5588d2b5a68d2781908bc8f9af pypy3-2.3.1-linux-armhf-raring.tar.bz2 @@ -291,6 +293,8 @@ 4d9cdf801e4c8fb432b17be0edf76eb3d9360f40 pypy-2.3.1-osx64.tar.bz2 08639771f26188739a82408454188582c6e25ce9 pypy-2.3.1-win32.zip ad8ebf67c5ccf354513a9cdb0586080b5964a5bd pypy-2.3.1-linux-armel.tar.bz2 +833b33042456fe381cae4481b2eb536c5787d6c7 pypy-2.3.1-src.tar.bz2 +0d3f750fc28713eca77a91388c5a63843406d631 pypy-2.3.1-src.zip 7276a9e97229e754f66863a0793c59066120ec51 pypy3-2.3.1-linux64.tar.bz2 fb52a30be0fd4c7d8686c98e03595a8b48b11e82 pypy3-2.3.1-linux-armel.tar.bz2 0239677fe28a4c22a70853242368456b98ac665a pypy3-2.3.1-linux-armhf-raring.tar.bz2 diff --git a/source/download.txt b/source/download.txt --- a/source/download.txt +++ b/source/download.txt @@ -93,8 +93,8 @@ .. __: https://bitbucket.org/pypy/pypy/downloads/pypy-2.3.1-osx64.tar.bz2 .. __: https://bitbucket.org/pypy/pypy/downloads/pypy-2.3.1-win32.zip .. _`VS 2008 runtime library installer vcredist_x86.exe`: http://www.microsoft.com/en-us/download/details.aspx?id=5582 -.. __: https://bitbucket.org/pypy/pypy/get/release-2.3.1.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/get/release-2.3.1.zip +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy-2.3.1.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy-2.3.1.zip .. __: https://bitbucket.org/pypy/pypy/downloads .. _mirror: http://cobra.cs.uni-duesseldorf.de/~buildmaster/mirror/ @@ -230,8 +230,8 @@ * `pypy-2.3.1-src.tar.bz2`__ (sources, Unix line endings) * `pypy-2.3.1-src.zip`__ (sources, Unix line endings too) - .. __: https://bitbucket.org/pypy/pypy/get/release-2.3.1.tar.bz2 - .. __: https://bitbucket.org/pypy/pypy/get/release-2.3.1.zip + .. __: https://bitbucket.org/pypy/pypy/downloads/pypy-2.3.1.tar.bz2 + .. __: https://bitbucket.org/pypy/pypy/downloads/pypy-2.3.1.zip Or you can checkout the current trunk using Mercurial_ (the trunk usually works and is of course more up-to-date):: @@ -316,6 +316,8 @@ 39b8c4c7ae8ff9f7dd8b854834b530ef pypy-2.3.1-osx64.tar.bz2 aa71e065f89849a9f641cc3ca8d80ebc pypy-2.3.1-win32.zip 5dab108869a039b754da5c07046fb17c pypy-2.3.1-linux-armel.tar.bz2 + 2b9aeccef1587a42fb5a4cc304b5d881 pypy-2.3.1-src.tar.bz2 + 15c068c357d60719086b23e0bf9d0a5b pypy-2.3.1-src.zip a86da5688dfd84e0485239f8991af308 pypy3-2.3.1-linux64.tar.bz2 b0d6a0491e9c9be39d3c314c0823a039 pypy3-2.3.1-linux-armel.tar.bz2 f79f7b5588d2b5a68d2781908bc8f9af pypy3-2.3.1-linux-armhf-raring.tar.bz2 @@ -335,6 +337,8 @@ 4d9cdf801e4c8fb432b17be0edf76eb3d9360f40 pypy-2.3.1-osx64.tar.bz2 08639771f26188739a82408454188582c6e25ce9 pypy-2.3.1-win32.zip ad8ebf67c5ccf354513a9cdb0586080b5964a5bd pypy-2.3.1-linux-armel.tar.bz2 + 833b33042456fe381cae4481b2eb536c5787d6c7 pypy-2.3.1-src.tar.bz2 + 0d3f750fc28713eca77a91388c5a63843406d631 pypy-2.3.1-src.zip 7276a9e97229e754f66863a0793c59066120ec51 pypy3-2.3.1-linux64.tar.bz2 fb52a30be0fd4c7d8686c98e03595a8b48b11e82 pypy3-2.3.1-linux-armel.tar.bz2 0239677fe28a4c22a70853242368456b98ac665a pypy3-2.3.1-linux-armhf-raring.tar.bz2 From noreply at buildbot.pypy.org Sun Jun 22 17:16:31 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 22 Jun 2014 17:16:31 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: A branch to make 'ec' a thread-local, with support in the JIT Message-ID: <20140622151631.7642D1C0CA6@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72131:06e5fa85d805 Date: 2014-06-22 17:00 +0200 http://bitbucket.org/pypy/pypy/changeset/06e5fa85d805/ Log: A branch to make 'ec' a thread-local, with support in the JIT From noreply at buildbot.pypy.org Sun Jun 22 17:16:32 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 22 Jun 2014 17:16:32 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: in-progress Message-ID: <20140622151632.A51F41C0CA6@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72132:22aa4ef3e414 Date: 2014-06-22 17:06 +0200 http://bitbucket.org/pypy/pypy/changeset/22aa4ef3e414/ Log: in-progress diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -395,6 +395,7 @@ def startup(self): # To be called before using the space + self.threadlocals.enter_thread(self) # Initialize already imported builtin modules from pypy.interpreter.module import Module @@ -639,30 +640,33 @@ """NOT_RPYTHON: Abstract method that should put some minimal content into the w_builtins.""" - @jit.loop_invariant def getexecutioncontext(self): "Return what we consider to be the active execution context." # Important: the annotator must not see a prebuilt ExecutionContext: # you should not see frames while you translate # so we make sure that the threadlocals never *have* an # ExecutionContext during translation. - if self.config.translating and not we_are_translated(): - assert self.threadlocals.getvalue() is None, ( - "threadlocals got an ExecutionContext during translation!") - try: - return self._ec_during_translation - except AttributeError: - ec = self.createexecutioncontext() - self._ec_during_translation = ec + if not we_are_translated(): + if self.config.translating: + assert self.threadlocals.get_ec() is None, ( + "threadlocals got an ExecutionContext during translation!") + try: + return self._ec_during_translation + except AttributeError: + ec = self.createexecutioncontext() + self._ec_during_translation = ec + return ec + else: + ec = self.threadlocals.get_ec() + if ec is None: + self.threadlocals.enter_thread(self) + ec = self.threadlocals.get_ec() return ec - # normal case follows. The 'thread' module installs a real - # thread-local object in self.threadlocals, so this builds - # and caches a new ec in each thread. - ec = self.threadlocals.getvalue() - if ec is None: - ec = self.createexecutioncontext() - self.threadlocals.setvalue(ec) - return ec + else: + # translated case follows. self.threadlocals is either from + # 'pypy.interpreter.miscutils' or 'pypy.module.thread.threadlocals'. + # the result is assumed to be non-null: enter_thread() was called. + return self.threadlocals.get_ec() def _freeze_(self): return True diff --git a/pypy/interpreter/miscutils.py b/pypy/interpreter/miscutils.py --- a/pypy/interpreter/miscutils.py +++ b/pypy/interpreter/miscutils.py @@ -11,11 +11,11 @@ """ _value = None - def getvalue(self): + def get_ec(self): return self._value - def setvalue(self, value): - self._value = value + def enter_thread(self, space): + self._value = space.createexecutioncontext() def signals_enabled(self): return True diff --git a/pypy/module/thread/__init__.py b/pypy/module/thread/__init__.py --- a/pypy/module/thread/__init__.py +++ b/pypy/module/thread/__init__.py @@ -26,10 +26,11 @@ "NOT_RPYTHON: patches space.threadlocals to use real threadlocals" from pypy.module.thread import gil MixedModule.__init__(self, space, *args) - prev = space.threadlocals.getvalue() + prev_ec = space.threadlocals.get_ec() space.threadlocals = gil.GILThreadLocals() space.threadlocals.initialize(space) - space.threadlocals.setvalue(prev) + if prev_ec is not None: + space.threadlocals._set_ec(prev_ec) from pypy.module.posix.interp_posix import add_fork_hook from pypy.module.thread.os_thread import reinit_threads diff --git a/pypy/module/thread/os_thread.py b/pypy/module/thread/os_thread.py --- a/pypy/module/thread/os_thread.py +++ b/pypy/module/thread/os_thread.py @@ -126,6 +126,8 @@ release = staticmethod(release) def run(space, w_callable, args): + # add the ExecutionContext to space.threadlocals + space.threadlocals.enter_thread(space) try: space.call_args(w_callable, args) except OperationError, e: diff --git a/pypy/module/thread/threadlocals.py b/pypy/module/thread/threadlocals.py --- a/pypy/module/thread/threadlocals.py +++ b/pypy/module/thread/threadlocals.py @@ -1,10 +1,13 @@ from rpython.rlib import rthread +from rpython.rlib.objectmodel import we_are_translated from pypy.module.thread.error import wrap_thread_error from pypy.interpreter.executioncontext import ExecutionContext ExecutionContext._signals_enabled = 0 # default value +raw_thread_local = rthread.ThreadLocalReference(ExecutionContext) + class OSThreadLocals: """Thread-local storage for OS-level threads. @@ -19,47 +22,54 @@ def _cleanup_(self): self._valuedict.clear() self._mainthreadident = 0 - self._mostrecentkey = 0 # fast minicaching for the common case - self._mostrecentvalue = None # fast minicaching for the common case - def getvalue(self): + def enter_thread(self, space): + "Notification that the current thread is about to start running." + self._set_ec(space.createexecutioncontext()) + + def _set_ec(self, ec): ident = rthread.get_ident() - if ident == self._mostrecentkey: - result = self._mostrecentvalue - else: - value = self._valuedict.get(ident, None) - # slow path: update the minicache - self._mostrecentkey = ident - self._mostrecentvalue = value - result = value - return result + if self._mainthreadident == 0 or self._mainthreadident == ident: + ec._signals_enabled = 1 # the main thread is enabled + self._mainthreadident = ident + self._valuedict[ident] = ec + # This logic relies on hacks and _make_sure_does_not_move(). + # It only works because we keep the 'ec' alive in '_valuedict' too. + raw_thread_local.set(ec) - def setvalue(self, value): - ident = rthread.get_ident() - if value is not None: - if self._mainthreadident == 0: - value._signals_enabled = 1 # the main thread is enabled - self._mainthreadident = ident - self._valuedict[ident] = value - else: + def leave_thread(self, space): + "Notification that the current thread is about to stop." + from pypy.module.thread.os_local import thread_is_stopping + ec = self.get_ec() + if ec is not None: try: - del self._valuedict[ident] - except KeyError: - pass - # update the minicache to prevent it from containing an outdated value - self._mostrecentkey = ident - self._mostrecentvalue = value + thread_is_stopping(ec) + finally: + raw_thread_local.set(None) + ident = rthread.get_ident() + try: + del self._valuedict[ident] + except KeyError: + pass + + def get_ec(self): + ec = raw_thread_local.get() + if not we_are_translated(): + assert ec is self._valuedict.get(rthread.get_ident(), None) + return ec def signals_enabled(self): - ec = self.getvalue() + ec = self.get_ec() return ec is not None and ec._signals_enabled def enable_signals(self, space): - ec = self.getvalue() + ec = self.get_ec() + assert ec is not None ec._signals_enabled += 1 def disable_signals(self, space): - ec = self.getvalue() + ec = self.get_ec() + assert ec is not None new = ec._signals_enabled - 1 if new < 0: raise wrap_thread_error(space, @@ -69,22 +79,13 @@ def getallvalues(self): return self._valuedict - def leave_thread(self, space): - "Notification that the current thread is about to stop." - from pypy.module.thread.os_local import thread_is_stopping - ec = self.getvalue() - if ec is not None: - try: - thread_is_stopping(ec) - finally: - self.setvalue(None) - def reinit_threads(self, space): "Called in the child process after a fork()" ident = rthread.get_ident() - ec = self.getvalue() + ec = self.get_ec() + assert ec is not None if ident != self._mainthreadident: ec._signals_enabled += 1 self._cleanup_() self._mainthreadident = ident - self.setvalue(ec) + self._set_ec(ec) diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py --- a/rpython/rlib/rthread.py +++ b/rpython/rlib/rthread.py @@ -272,3 +272,43 @@ llop.gc_thread_after_fork(lltype.Void, result_of_fork, opaqueaddr) else: assert opaqueaddr == llmemory.NULL + +# ____________________________________________________________ +# +# Thread-locals. Only for references that are not changed often. +# KEEP THE REFERENCE ALIVE, THE GC DOES NOT FOLLOW THEM SO FAR! +# We use _make_sure_does_not_move() to make sure the pointer will not move. + +class ThreadLocalReference(object): + _COUNT = 1 + + def __init__(self, Cls): + "NOT_RPYTHON: must be prebuilt" + import thread + self.Cls = Cls + self.local = thread._local() # <- NOT_RPYTHON + self.unique_id = ThreadLocalReference._COUNT + ThreadLocalReference._COUNT += 1 + + def _freeze_(self): + return True + + @specialize.arg(0) + def get(self): + if we_are_translated(): + from rpython.rtyper.lltypesystem import rclass + from rpython.rtyper.annlowlevel import cast_base_ptr_to_instance + ptr = llop.threadlocalref_get(rclass.OBJECTPTR, self.unique_id) + return cast_base_ptr_to_instance(self.Cls, ptr) + else: + return getattr(self.local, 'value', None) + + @specialize.arg(0) + def set(self, value): + assert isinstance(value, self.Cls) or value is None + if we_are_translated(): + from rpython.rtyper.annlowlevel import cast_instance_to_base_ptr + ptr = cast_instance_to_base_ptr(value) + llop.threadlocalref_set(lltype.Void, self.unique_id, ptr) + else: + self.local.value = value From noreply at buildbot.pypy.org Sun Jun 22 17:16:33 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 22 Jun 2014 17:16:33 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: Copy a test from rstm. Add another (failing) translation test. Message-ID: <20140622151633.CE1741C0CA6@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72133:a34d95b12b36 Date: 2014-06-22 17:15 +0200 http://bitbucket.org/pypy/pypy/changeset/a34d95b12b36/ Log: Copy a test from rstm. Add another (failing) translation test. diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py --- a/rpython/rlib/rthread.py +++ b/rpython/rlib/rthread.py @@ -304,6 +304,7 @@ return getattr(self.local, 'value', None) @specialize.arg(0) + @jit.dont_look_inside def set(self, value): assert isinstance(value, self.Cls) or value is None if we_are_translated(): diff --git a/rpython/rlib/test/test_rthread.py b/rpython/rlib/test/test_rthread.py --- a/rpython/rlib/test/test_rthread.py +++ b/rpython/rlib/test/test_rthread.py @@ -1,4 +1,4 @@ -import gc +import gc, time from rpython.rlib.rthread import * from rpython.translator.c.test.test_boehm import AbstractGCTestClass from rpython.rtyper.lltypesystem import lltype, rffi @@ -29,6 +29,23 @@ else: py.test.fail("Did not raise") +def test_tlref_untranslated(): + class FooBar(object): + pass + t = ThreadLocalReference(FooBar) + results = [] + def subthread(): + x = FooBar() + results.append(t.get() is None) + t.set(x) + results.append(t.get() is x) + time.sleep(0.2) + results.append(t.get() is x) + for i in range(5): + start_new_thread(subthread, ()) + time.sleep(0.5) + assert results == [True] * 15 + class AbstractThreadTests(AbstractGCTestClass): use_threads = True @@ -198,6 +215,19 @@ res = fn() assert res >= 0.95 + def test_tlref(self): + class FooBar(object): + pass + t = ThreadLocalReference(FooBar) + def f(): + x1 = FooBar() + t.set(x1) + assert t.get() is x1 + return 42 + fn = self.getcompiled(f, []) + res = fn() + assert res == 42 + #class TestRunDirectly(AbstractThreadTests): # def getcompiled(self, f, argtypes): # return f diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -541,6 +541,9 @@ 'getslice': LLOp(canraise=(Exception,)), 'check_and_clear_exc': LLOp(), + 'threadlocalref_get': LLOp(sideeffects=False), + 'threadlocalref_set': LLOp(), + # __________ debugging __________ 'debug_view': LLOp(), 'debug_print': LLOp(canrun=True), From noreply at buildbot.pypy.org Sun Jun 22 17:36:02 2014 From: noreply at buildbot.pypy.org (mattip) Date: Sun, 22 Jun 2014 17:36:02 +0200 (CEST) Subject: [pypy-commit] pypy.org extradoc: typo Message-ID: <20140622153602.833B41D2CA9@cobra.cs.uni-duesseldorf.de> Author: Matti Picus Branch: extradoc Changeset: r512:2aa919823db1 Date: 2014-06-22 18:36 +0300 http://bitbucket.org/pypy/pypy.org/changeset/2aa919823db1/ Log: typo diff --git a/download.html b/download.html --- a/download.html +++ b/download.html @@ -97,8 +97,8 @@
  • Mac OS/X binary (64bit)
  • Windows binary (32bit) (you might need the VS 2008 runtime library installer vcredist_x86.exe.)
  • -
  • Source (tar.bz2)
  • -
  • Source (zip)
  • +
  • Source (tar.bz2)
  • +
  • Source (zip)
  • All our downloads, including previous versions. We also have a mirror, but please use only if you have troubles accessing the links above
  • @@ -196,8 +196,8 @@
  • Get the source code. The following packages contain the source at the same revision as the above binaries:

    Or you can checkout the current trunk using Mercurial (the trunk usually works and is of course more up-to-date):

    diff --git a/source/download.txt b/source/download.txt --- a/source/download.txt +++ b/source/download.txt @@ -93,8 +93,8 @@ .. __: https://bitbucket.org/pypy/pypy/downloads/pypy-2.3.1-osx64.tar.bz2 .. __: https://bitbucket.org/pypy/pypy/downloads/pypy-2.3.1-win32.zip .. _`VS 2008 runtime library installer vcredist_x86.exe`: http://www.microsoft.com/en-us/download/details.aspx?id=5582 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy-2.3.1.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy-2.3.1.zip +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy-2.3.1-src.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy-2.3.1-src.zip .. __: https://bitbucket.org/pypy/pypy/downloads .. _mirror: http://cobra.cs.uni-duesseldorf.de/~buildmaster/mirror/ @@ -230,8 +230,8 @@ * `pypy-2.3.1-src.tar.bz2`__ (sources, Unix line endings) * `pypy-2.3.1-src.zip`__ (sources, Unix line endings too) - .. __: https://bitbucket.org/pypy/pypy/downloads/pypy-2.3.1.tar.bz2 - .. __: https://bitbucket.org/pypy/pypy/downloads/pypy-2.3.1.zip + .. __: https://bitbucket.org/pypy/pypy/downloads/pypy-2.3.1-src.tar.bz2 + .. __: https://bitbucket.org/pypy/pypy/downloads/pypy-2.3.1-src.zip Or you can checkout the current trunk using Mercurial_ (the trunk usually works and is of course more up-to-date):: From noreply at buildbot.pypy.org Sun Jun 22 17:47:58 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 22 Jun 2014 17:47:58 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: Hack around until ThreadLocalReference translates Message-ID: <20140622154758.A2B421C114A@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72134:3f04bb5349f6 Date: 2014-06-22 17:47 +0200 http://bitbucket.org/pypy/pypy/changeset/3f04bb5349f6/ Log: Hack around until ThreadLocalReference translates diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py --- a/rpython/rlib/rthread.py +++ b/rpython/rlib/rthread.py @@ -281,14 +281,18 @@ class ThreadLocalReference(object): _COUNT = 1 + OPAQUEID = lltype.OpaqueType("ThreadLocalRef", + hints={"threadlocalref": True}) def __init__(self, Cls): "NOT_RPYTHON: must be prebuilt" import thread self.Cls = Cls self.local = thread._local() # <- NOT_RPYTHON - self.unique_id = ThreadLocalReference._COUNT + unique_id = ThreadLocalReference._COUNT ThreadLocalReference._COUNT += 1 + self.opaque_id = lltype.opaqueptr(ThreadLocalReference.OPAQUEID, + 'tlref%d' % unique_id) def _freeze_(self): return True @@ -298,7 +302,7 @@ if we_are_translated(): from rpython.rtyper.lltypesystem import rclass from rpython.rtyper.annlowlevel import cast_base_ptr_to_instance - ptr = llop.threadlocalref_get(rclass.OBJECTPTR, self.unique_id) + ptr = llop.threadlocalref_get(rclass.OBJECTPTR, self.opaque_id) return cast_base_ptr_to_instance(self.Cls, ptr) else: return getattr(self.local, 'value', None) @@ -310,6 +314,6 @@ if we_are_translated(): from rpython.rtyper.annlowlevel import cast_instance_to_base_ptr ptr = cast_instance_to_base_ptr(value) - llop.threadlocalref_set(lltype.Void, self.unique_id, ptr) + llop.threadlocalref_set(lltype.Void, self.opaque_id, ptr) else: self.local.value = value diff --git a/rpython/translator/c/node.py b/rpython/translator/c/node.py --- a/rpython/translator/c/node.py +++ b/rpython/translator/c/node.py @@ -959,12 +959,30 @@ args.append('0') yield 'RPyOpaque_SETUP_%s(%s);' % (T.tag, ', '.join(args)) +class ThreadLocalRefOpaqueNode(ContainerNode): + nodekind = 'tlrefopaque' + + def basename(self): + return self.obj._name + + def enum_dependencies(self): + return [] + + def initializationexpr(self, decoration=''): + return ['{ NULL }'] + + def startupcode(self): + p = self.getptrname() + yield 'RPyThreadStaticTLS_Create(%s);' % (p,) + def opaquenode_factory(db, T, obj): if T == RuntimeTypeInfo: return db.gcpolicy.rtti_node_factory()(db, T, obj) if T.hints.get("render_structure", False): return ExtType_OpaqueNode(db, T, obj) + if T.hints.get("threadlocalref", False): + return ThreadLocalRefOpaqueNode(db, T, obj) raise Exception("don't know about %r" % (T,)) diff --git a/rpython/translator/c/src/g_include.h b/rpython/translator/c/src/g_include.h --- a/rpython/translator/c/src/g_include.h +++ b/rpython/translator/c/src/g_include.h @@ -19,6 +19,7 @@ #include "src/address.h" #include "src/unichar.h" #include "src/llgroup.h" +#include "src/threadlocal.h" #include "src/instrument.h" #include "src/asm.h" diff --git a/rpython/translator/c/src/stack.c b/rpython/translator/c/src/stack.c --- a/rpython/translator/c/src/stack.c +++ b/rpython/translator/c/src/stack.c @@ -32,12 +32,7 @@ /* XXX We assume that initialization is performed early, when there is still only one thread running. This allows us to ignore race conditions here */ - char *errmsg = RPyThreadStaticTLS_Create(&end_tls_key); - if (errmsg) { - /* XXX should we exit the process? */ - fprintf(stderr, "Internal PyPy error: %s\n", errmsg); - return 1; - } + RPyThreadStaticTLS_Create(&end_tls_key); } baseptr = (char *) RPyThreadStaticTLS_Get(end_tls_key); diff --git a/rpython/translator/c/src/threadlocal.c b/rpython/translator/c/src/threadlocal.c --- a/rpython/translator/c/src/threadlocal.c +++ b/rpython/translator/c/src/threadlocal.c @@ -2,23 +2,25 @@ #ifdef _WIN32 -char *RPyThreadTLS_Create(RPyThreadTLS *result) +void RPyThreadTLS_Create(RPyThreadTLS *result) { *result = TlsAlloc(); - if (*result == TLS_OUT_OF_INDEXES) - return "out of thread-local storage indexes"; - else - return NULL; + if (*result == TLS_OUT_OF_INDEXES) { + fprintf(stderr, "Internal RPython error: " + "out of thread-local storage indexes"); + abort(); + } } #else -char *RPyThreadTLS_Create(RPyThreadTLS *result) +void RPyThreadTLS_Create(RPyThreadTLS *result) { - if (pthread_key_create(result, NULL) != 0) - return "out of thread-local storage keys"; - else - return NULL; + if (pthread_key_create(result, NULL) != 0) { + fprintf(stderr, "Internal RPython error: " + "out of thread-local storage keys"); + abort(); + } } #endif diff --git a/rpython/translator/c/src/threadlocal.h b/rpython/translator/c/src/threadlocal.h --- a/rpython/translator/c/src/threadlocal.h +++ b/rpython/translator/c/src/threadlocal.h @@ -1,4 +1,7 @@ /* Thread-local storage */ +#ifndef _SRC_THREADLOCAL_H +#define _SRC_THREADLOCAL_H + #ifdef _WIN32 @@ -22,7 +25,7 @@ #ifdef USE___THREAD #define RPyThreadStaticTLS __thread void * -#define RPyThreadStaticTLS_Create(tls) NULL +#define RPyThreadStaticTLS_Create(tls) (void)0 #define RPyThreadStaticTLS_Get(tls) tls #define RPyThreadStaticTLS_Set(tls, value) tls = value @@ -34,7 +37,15 @@ #define RPyThreadStaticTLS_Create(key) RPyThreadTLS_Create(key) #define RPyThreadStaticTLS_Get(key) RPyThreadTLS_Get(key) #define RPyThreadStaticTLS_Set(key, value) RPyThreadTLS_Set(key, value) -char *RPyThreadTLS_Create(RPyThreadTLS *result); +void RPyThreadTLS_Create(RPyThreadTLS *result); #endif + +struct pypy_opaque_ThreadLocalRef { void *gcref; }; + +#define OP_THREADLOCALREF_SET(tlref, ptr, _) tlref->gcref = ptr +#define OP_THREADLOCALREF_GET(tlref, ptr) ptr = tlref->gcref + + +#endif /* _SRC_THREADLOCAL_H */ From noreply at buildbot.pypy.org Sun Jun 22 18:08:14 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 22 Jun 2014 18:08:14 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: Fixes Message-ID: <20140622160814.200F21C114A@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72135:50f6c8e8814e Date: 2014-06-22 18:07 +0200 http://bitbucket.org/pypy/pypy/changeset/50f6c8e8814e/ Log: Fixes diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py --- a/rpython/rlib/rthread.py +++ b/rpython/rlib/rthread.py @@ -282,7 +282,9 @@ class ThreadLocalReference(object): _COUNT = 1 OPAQUEID = lltype.OpaqueType("ThreadLocalRef", - hints={"threadlocalref": True}) + hints={"threadlocalref": True, + "external": "C", + "c_name": "RPyThreadStaticTLS"}) def __init__(self, Cls): "NOT_RPYTHON: must be prebuilt" diff --git a/rpython/translator/c/node.py b/rpython/translator/c/node.py --- a/rpython/translator/c/node.py +++ b/rpython/translator/c/node.py @@ -969,7 +969,7 @@ return [] def initializationexpr(self, decoration=''): - return ['{ NULL }'] + return ['0'] def startupcode(self): p = self.getptrname() diff --git a/rpython/translator/c/src/g_include.h b/rpython/translator/c/src/g_include.h --- a/rpython/translator/c/src/g_include.h +++ b/rpython/translator/c/src/g_include.h @@ -19,7 +19,6 @@ #include "src/address.h" #include "src/unichar.h" #include "src/llgroup.h" -#include "src/threadlocal.h" #include "src/instrument.h" #include "src/asm.h" diff --git a/rpython/translator/c/src/g_prerequisite.h b/rpython/translator/c/src/g_prerequisite.h --- a/rpython/translator/c/src/g_prerequisite.h +++ b/rpython/translator/c/src/g_prerequisite.h @@ -23,3 +23,6 @@ # define RPY_LENGTH0 1 /* array decl [0] are bad */ # define RPY_DUMMY_VARLENGTH /* nothing */ #endif + + +#include "src/threadlocal.h" diff --git a/rpython/translator/c/src/threadlocal.h b/rpython/translator/c/src/threadlocal.h --- a/rpython/translator/c/src/threadlocal.h +++ b/rpython/translator/c/src/threadlocal.h @@ -42,10 +42,8 @@ #endif -struct pypy_opaque_ThreadLocalRef { void *gcref; }; - -#define OP_THREADLOCALREF_SET(tlref, ptr, _) tlref->gcref = ptr -#define OP_THREADLOCALREF_GET(tlref, ptr) ptr = tlref->gcref +#define OP_THREADLOCALREF_SET(tlref, ptr, _) RPyThreadStaticTLS_Set(*tlref, ptr) +#define OP_THREADLOCALREF_GET(tlref, ptr) ptr = RPyThreadStaticTLS_Get(*tlref) #endif /* _SRC_THREADLOCAL_H */ From noreply at buildbot.pypy.org Sun Jun 22 18:33:37 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 22 Jun 2014 18:33:37 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: Move this prebuilt instance in the class, instead of having it be a real Message-ID: <20140622163337.3663C1C114A@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72136:195d984c8ff1 Date: 2014-06-22 18:20 +0200 http://bitbucket.org/pypy/pypy/changeset/195d984c8ff1/ Log: Move this prebuilt instance in the class, instead of having it be a real global. diff --git a/pypy/module/thread/test/test_gil.py b/pypy/module/thread/test/test_gil.py --- a/pypy/module/thread/test/test_gil.py +++ b/pypy/module/thread/test/test_gil.py @@ -64,13 +64,14 @@ except Exception, e: assert 0 thread.gc_thread_die() + my_gil_threadlocals = gil.GILThreadLocals() def f(): state.data = [] state.datalen1 = 0 state.datalen2 = 0 state.datalen3 = 0 state.datalen4 = 0 - state.threadlocals = gil.GILThreadLocals() + state.threadlocals = my_gil_threadlocals state.threadlocals.setup_threads(space) subident = thread.start_new_thread(bootstrap, ()) mainident = thread.get_ident() diff --git a/pypy/module/thread/threadlocals.py b/pypy/module/thread/threadlocals.py --- a/pypy/module/thread/threadlocals.py +++ b/pypy/module/thread/threadlocals.py @@ -6,8 +6,6 @@ ExecutionContext._signals_enabled = 0 # default value -raw_thread_local = rthread.ThreadLocalReference(ExecutionContext) - class OSThreadLocals: """Thread-local storage for OS-level threads. @@ -16,8 +14,10 @@ os_thread.bootstrap().""" def __init__(self): + "NOT_RPYTHON" self._valuedict = {} # {thread_ident: ExecutionContext()} self._cleanup_() + self.raw_thread_local = rthread.ThreadLocalReference(ExecutionContext) def _cleanup_(self): self._valuedict.clear() @@ -35,7 +35,7 @@ self._valuedict[ident] = ec # This logic relies on hacks and _make_sure_does_not_move(). # It only works because we keep the 'ec' alive in '_valuedict' too. - raw_thread_local.set(ec) + self.raw_thread_local.set(ec) def leave_thread(self, space): "Notification that the current thread is about to stop." @@ -45,7 +45,7 @@ try: thread_is_stopping(ec) finally: - raw_thread_local.set(None) + self.raw_thread_local.set(None) ident = rthread.get_ident() try: del self._valuedict[ident] @@ -53,7 +53,7 @@ pass def get_ec(self): - ec = raw_thread_local.get() + ec = self.raw_thread_local.get() if not we_are_translated(): assert ec is self._valuedict.get(rthread.get_ident(), None) return ec From noreply at buildbot.pypy.org Sun Jun 22 20:35:53 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Sun, 22 Jun 2014 20:35:53 +0200 (CEST) Subject: [pypy-commit] pypy py3.3: fix (though unicodedata.ucd_ doesn't exist on cpython anyway) Message-ID: <20140622183553.084DD1C0CA6@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: py3.3 Changeset: r72137:7903d6671cda Date: 2014-06-22 11:35 -0700 http://bitbucket.org/pypy/pypy/changeset/7903d6671cda/ Log: fix (though unicodedata.ucd_ doesn't exist on cpython anyway) diff --git a/pypy/module/unicodedata/__init__.py b/pypy/module/unicodedata/__init__.py --- a/pypy/module/unicodedata/__init__.py +++ b/pypy/module/unicodedata/__init__.py @@ -14,7 +14,7 @@ interpleveldefs = { 'unidata_version' : 'space.wrap(interp_ucd.ucd.version)', 'ucd_3_2_0' : 'space.wrap(interp_ucd.ucd_3_2_0)', - 'ucd_6_0_0' : 'space.wrap(interp_ucd.ucd_6_2_0)', + 'ucd_6_2_0' : 'space.wrap(interp_ucd.ucd_6_2_0)', 'ucd' : 'space.wrap(interp_ucd.ucd)', '__doc__' : "space.wrap('unicode character database')", } From noreply at buildbot.pypy.org Sun Jun 22 21:39:49 2014 From: noreply at buildbot.pypy.org (mattip) Date: Sun, 22 Jun 2014 21:39:49 +0200 (CEST) Subject: [pypy-commit] pypy disable_pythonapi: implement windows sys.dllhandle as a _rawffi.alt.CDLL Message-ID: <20140622193949.336541C0CA6@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: disable_pythonapi Changeset: r72138:8c81f5d58b5c Date: 2014-06-22 21:03 +0300 http://bitbucket.org/pypy/pypy/changeset/8c81f5d58b5c/ Log: implement windows sys.dllhandle as a _rawffi.alt.CDLL which makes cpython.pythonapi on windows equivalent to linux diff --git a/pypy/module/_rawffi/alt/interp_funcptr.py b/pypy/module/_rawffi/alt/interp_funcptr.py --- a/pypy/module/_rawffi/alt/interp_funcptr.py +++ b/pypy/module/_rawffi/alt/interp_funcptr.py @@ -312,7 +312,7 @@ # ======================================================================== class W_CDLL(W_Root): - def __init__(self, space, name, mode): + def __init__(self, space, name, mode, handle = rffi.VOIDP): self.flags = libffi.FUNCFLAG_CDECL self.space = space if name is None: @@ -320,7 +320,7 @@ else: self.name = name try: - self.cdll = libffi.CDLL(name, mode) + self.cdll = libffi.CDLL(name, mode, handle=handle) except DLOpenError, e: raise wrap_dlopenerror(space, e, self.name) diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py --- a/pypy/module/sys/vm.py +++ b/pypy/module/sys/vm.py @@ -244,10 +244,8 @@ handle = space.fromcache(State).get_pythonapi_handle() # Make a dll object with it - from pypy.module._rawffi.interp_rawffi import W_CDLL - from rpython.rlib.clibffi import RawCDLL - cdll = RawCDLL(handle) - return space.wrap(W_CDLL(space, "python api", cdll)) + from pypy.module._rawffi.alt.interp_funcptr import W_CDLL + return space.wrap(W_CDLL(space, "python api", -1, handle=handle)) def getsizeof(space, w_object, w_default=None): """Not implemented on PyPy.""" diff --git a/rpython/rlib/libffi.py b/rpython/rlib/libffi.py --- a/rpython/rlib/libffi.py +++ b/rpython/rlib/libffi.py @@ -11,7 +11,7 @@ from rpython.rlib.clibffi import FUNCFLAG_CDECL, FUNCFLAG_STDCALL, \ AbstractFuncPtr, push_arg_as_ffiptr, c_ffi_call, FFI_TYPE_STRUCT from rpython.rlib.rdynload import dlopen, dlclose, dlsym, dlsym_byordinal -from rpython.rlib.rdynload import DLLHANDLE +from rpython.rlib.rdynload import DLLHANDLE, _WIN32 import os @@ -413,9 +413,12 @@ # XXX: it partially duplicate the code in clibffi.py class CDLL(object): - def __init__(self, libname, mode=-1): + def __init__(self, libname, mode=-1, handle=rffi.VOIDP): """Load the library, or raises DLOpenError.""" self.lib = rffi.cast(DLLHANDLE, 0) + if handle is not rffi.VOIDP : + self.lib = rffi.cast(DLLHANDLE, handle) + return with rffi.scoped_str2charp(libname) as ll_libname: self.lib = dlopen(ll_libname, mode) diff --git a/rpython/rlib/test/test_libffi.py b/rpython/rlib/test/test_libffi.py --- a/rpython/rlib/test/test_libffi.py +++ b/rpython/rlib/test/test_libffi.py @@ -186,6 +186,24 @@ chain.arg(10) sleep.call(chain, lltype.Void, is_struct=False) + def test_dll_create(self): + if os.name == 'nt': + import sys + if not isinstance(sys.dllhandle, int): + py.test.skip('Run with cpython, not pypy') + dll = CDLL(None, handle=sys.dllhandle) + else: + dll = CDLL(None) + try: + # The pythonapi of the translating python + dll.getaddressindll('Py_OptimizeFlag') + except KeyError: + try: + dll.getaddressindll('PyPy_OptimizeFlag') + except KeyError: + assert False, 'could not find function in pythonapi' + + class TestLibffiCall(BaseFfiTest): """ Test various kind of calls through libffi. From noreply at buildbot.pypy.org Sun Jun 22 21:39:50 2014 From: noreply at buildbot.pypy.org (mattip) Date: Sun, 22 Jun 2014 21:39:50 +0200 (CEST) Subject: [pypy-commit] pypy ufuncapi: wip Message-ID: <20140622193950.AA0F11C0CA6@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: ufuncapi Changeset: r72139:5dd1671a3061 Date: 2014-06-22 21:44 +0300 http://bitbucket.org/pypy/pypy/changeset/5dd1671a3061/ Log: wip diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py --- a/pypy/module/micronumpy/loop.py +++ b/pypy/module/micronumpy/loop.py @@ -10,6 +10,7 @@ from pypy.module.micronumpy.base import W_NDimArray from pypy.module.micronumpy.iterators import PureShapeIter, AxisIter, \ AllButAxisIter +from pypy.interpreter.argument import Arguments call2_driver = jit.JitDriver( @@ -93,12 +94,13 @@ def call_many_to_one(space, shape, func, res_dtype, w_in, out): # out must hav been built. func needs no calc_type, is usually an # external ufunc - iters_and_states = [i.create_iter(shape) for i in w_in] + iters_and_states = [list(i.create_iter(shape)) for i in w_in] shapelen = len(shape) + out_iter, out_state = out.create_iter(shape) while not out_iter.done(out_state): call_many_to_one_driver.jit_merge_point(shapelen=shapelen, func=func, res_dtype=res_dtype) - vals = [None] + [i_s[0].getitem(i_s[1]) for i_s in iters_and_states] + vals = [i_s[0].getitem(i_s[1]) for i_s in iters_and_states] arglist = space.wrap(vals) out_val = space.call_args(func, Arguments.frompacked(space, arglist)) out_iter.setitem(out_state, out_val.convert_to(space, res_dtype)) @@ -107,6 +109,39 @@ out_state = out_iter.next(out_state) return out +call_many_to_many_driver = jit.JitDriver( + name='numpy_call_many_to_many', + greens=['shapelen', 'func', 'res_dtype'], + reds='auto') + +def call_many_to_many(space, shape, func, res_dtype, w_in, w_out): + # out must hav been built. func needs no calc_type, is usually an + # external ufunc + in_iters_and_states = [list(i.create_iter(shape)) for i in w_in] + shapelen = len(shape) + out_iters_and_states = [list(i.create_iter(shape)) for i in w_out] + # what does the function return? + while not out_iters_and_states[0][0].done(out_iters_and_states[0][1]): + call_many_to_many_driver.jit_merge_point(shapelen=shapelen, func=func, + res_dtype=res_dtype) + vals = [i_s[0].getitem(i_s[1]) for i_s in in_iters_and_states] + arglist = space.wrap(vals) + out_vals = space.call_args(func, Arguments.frompacked(space, arglist)) + # XXX bad form + if not isinstance(out_vals,(list, tuple)): + out_iter, out_state = out_iters_and_states[0] + out_iter.setitem(out_state, out_vals.convert_to(space, res_dtype)) + out_iters_and_states[0][1] = out_iters_and_states[0][0].next(out_iters_and_states[0][1]) + else: + for i in range(len(out_iters_and_states)): + out_iter, out_state = out_iters_and_states[i] + out_iter.setitem(out_state, out_vals[i].convert_to(space, res_dtype)) + out_iters_and_states[i][1] = out_iters_and_states[i][0].next(out_iters_and_states[i][1]) + for i in range(len(iters_and_states)): + in_iters_and_states[i][1] = in_iters_and_states[i][0].next(in_iters_and_states[i][1]) + return out + + def setslice(space, shape, target, source): # note that unlike everything else, target and source here are diff --git a/pypy/module/micronumpy/ufuncs.py b/pypy/module/micronumpy/ufuncs.py --- a/pypy/module/micronumpy/ufuncs.py +++ b/pypy/module/micronumpy/ufuncs.py @@ -493,7 +493,7 @@ self.nin = nin self.nout = nout self.nargs = nin + max(nout, 1) # ufuncs can always be called with an out=<> kwarg - if dtypes != 'match' and (len(dtypes) % len(funcs) != 0 or + if dtypes[0] != 'match' and (len(dtypes) % len(funcs) != 0 or len(dtypes) / len(funcs) != self.nargs): raise oefmt(space.w_ValueError, "generic ufunc with %d functions, %d arguments, but %d dtypes", @@ -527,11 +527,13 @@ index = self.type_resolver(space, inargs, outargs) self.alloc_outargs(space, index, inargs, outargs) # XXX handle inner-loop indexing + new_shape = inargs[0].get_shape() + res_dtype = outargs[0].get_dtype() if len(outargs) < 2: - return loop.call_many_to_one(space, new_shape, self.func, + return loop.call_many_to_one(space, new_shape, self.funcs[index], res_dtype, inargs, outargs[0]) - return loop.call_many_to_many(space, new_shape, self.func, - res_dtype, inargs, out) + return loop.call_many_to_many(space, new_shape, self.funcs[index], + res_dtype, inargs, outargs) def type_resolver(self, space, index, outargs): # Find a match for the inargs.dtype in self.dtypes, like @@ -954,16 +956,14 @@ if space.is_none(w_dtypes) and not signature: raise oefmt(space.w_NotImplementedError, 'object dtype requested but not implemented') - if space.isinstance_w(w_dtypes, space.w_str): - if not space.str_w(w_dtypes) == 'match': - raise oefmt(space.w_ValueError, - 'unknown out_dtype value "%s"', space.str_w(w_dtypes)) - dtypes = 'match' elif (space.isinstance_w(w_dtypes, space.w_tuple) or space.isinstance_w(w_dtypes, space.w_list)): dtypes = space.listview(w_dtypes) - for i in range(len(dtypes)): - dtypes[i] = descriptor.decode_w_dtype(space, dtypes[i]) + if space.str_w(dtypes[0]) == 'match': + dtypes = ['match',] + else: + for i in range(len(dtypes)): + dtypes[i] = descriptor.decode_w_dtype(space, dtypes[i]) else: raise oefmt(space.w_ValueError, 'dtypes must be None or a list of dtypes') @@ -976,9 +976,9 @@ raise oefmt(space.w_ValueError, 'identity must be 0, 1, or None') if nin==1 and nout==1 and dtypes == 'match': - w_ret = W_Ufunc1(wrap_ext_func(func[0], name) + w_ret = W_Ufunc1(wrap_ext_func(func[0], name)) elif nin==2 and nout==1 and dtypes == 'match': - w_ret = W_Ufunc2(wrap_ext_func(func[0]), name) + w_ret = W_Ufunc2(wrap_ext_func(func[0], name)) else: w_ret = W_UfuncGeneric(space, func, name, identity, nin, nout, dtypes, signature) if doc: From noreply at buildbot.pypy.org Sun Jun 22 21:39:51 2014 From: noreply at buildbot.pypy.org (mattip) Date: Sun, 22 Jun 2014 21:39:51 +0200 (CEST) Subject: [pypy-commit] pypy ufuncapi: play around with different parameters, still stuck in loop() with arbitrary function return value Message-ID: <20140622193951.E58381C0CA6@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: ufuncapi Changeset: r72140:f82d10b9c773 Date: 2014-06-22 22:37 +0300 http://bitbucket.org/pypy/pypy/changeset/f82d10b9c773/ Log: play around with different parameters, still stuck in loop() with arbitrary function return value diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py --- a/pypy/module/micronumpy/loop.py +++ b/pypy/module/micronumpy/loop.py @@ -127,7 +127,8 @@ vals = [i_s[0].getitem(i_s[1]) for i_s in in_iters_and_states] arglist = space.wrap(vals) out_vals = space.call_args(func, Arguments.frompacked(space, arglist)) - # XXX bad form + # XXX bad form - out_vals should be a list or tuple of boxes. + # but func can return anything, if not isinstance(out_vals,(list, tuple)): out_iter, out_state = out_iters_and_states[0] out_iter.setitem(out_state, out_vals.convert_to(space, res_dtype)) diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py --- a/pypy/module/micronumpy/test/test_ufuncs.py +++ b/pypy/module/micronumpy/test/test_ufuncs.py @@ -201,19 +201,27 @@ def adder(a, b): return a+b try: - myufunc = frompyfunc(adder, 2, 1) + adder_ufunc0 = frompyfunc(adder, 2, 1) + adder_ufunc1 = frompyfunc(adder, 2, 1) int_func22 = frompyfunc(int, 2, 2) int_func12 = frompyfunc(int, 1, 2) retype = dtype(object) except NotImplementedError as e: + # dtype of returned value is object, which is not supported yet assert 'object' in str(e) # Use pypy specific extension for out_dtype - myufunc = frompyfunc(adder, 2, 1, dtypes=['match']) - int_func22 = frompyfunc(int, 2, 2, dtypes=['match']) - int_func12 = frompyfunc(int, 1, 2, dtypes=['match']) + adder_ufunc0 = frompyfunc(adder, 2, 1, dtypes=['match']) + adder_ufunc1 = frompyfunc([adder, adder], 2, 1, dtypes=[int, float]) + int_func22 = frompyfunc([int, int], 2, 2, signature='()->()', + dtypes=[int, int, float, int]) + int_func12 = frompyfunc([int, int], 1, 2, signature='()->()', + dtypes=[int, int, float, int]) retype = dtype(int) - assert isinstance(myufunc, ufunc) - res = myufunc(arange(10), arange(10)) + assert isinstance(adder_ufunc1, ufunc) + res = adder_ufunc0(arange(10), arange(10)) + assert res.dtype == retype + assert all(res == arange(10) + arange(10)) + res = adder_ufunc1(arange(10), arange(10)) assert res.dtype == retype assert all(res == arange(10) + arange(10)) raises(TypeError, frompyfunc, 1, 2, 3) diff --git a/pypy/module/micronumpy/ufuncs.py b/pypy/module/micronumpy/ufuncs.py --- a/pypy/module/micronumpy/ufuncs.py +++ b/pypy/module/micronumpy/ufuncs.py @@ -975,17 +975,17 @@ else: raise oefmt(space.w_ValueError, 'identity must be 0, 1, or None') - if nin==1 and nout==1 and dtypes == 'match': - w_ret = W_Ufunc1(wrap_ext_func(func[0], name)) - elif nin==2 and nout==1 and dtypes == 'match': - w_ret = W_Ufunc2(wrap_ext_func(func[0], name)) + if nin==1 and nout==1 and dtypes[0] == 'match': + w_ret = W_Ufunc1(wrap_ext_func(space, func[0]), name) + elif nin==2 and nout==1 and dtypes[0] == 'match': + w_ret = W_Ufunc2(wrap_ext_func(space, func[0]), name) else: w_ret = W_UfuncGeneric(space, func, name, identity, nin, nout, dtypes, signature) if doc: w_ret.w_doc = space.wrap(doc) return w_ret -def wrap_ext_func(func): +def wrap_ext_func(space, func): def _func(calc_dtype, w_left, w_right): arglist = space.wrap([w_left, w_right]) return space.call_args(func, Arguments.frompacked(space, arglist)) From noreply at buildbot.pypy.org Sun Jun 22 22:00:50 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 22 Jun 2014 22:00:50 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: jit-transform threadlocalref_get Message-ID: <20140622200050.7C9251D2B39@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72141:c7e0b0352700 Date: 2014-06-22 22:00 +0200 http://bitbucket.org/pypy/pypy/changeset/c7e0b0352700/ Log: jit-transform threadlocalref_get diff --git a/rpython/jit/codewriter/effectinfo.py b/rpython/jit/codewriter/effectinfo.py --- a/rpython/jit/codewriter/effectinfo.py +++ b/rpython/jit/codewriter/effectinfo.py @@ -22,6 +22,7 @@ OS_STR2UNICODE = 2 # "str.str2unicode" OS_SHRINK_ARRAY = 3 # rgc.ll_shrink_array OS_DICT_LOOKUP = 4 # ll_dict_lookup + OS_THREADLOCALREF_GET = 5 # llop.threadlocalref_get # OS_STR_CONCAT = 22 # "stroruni.concat" OS_STR_SLICE = 23 # "stroruni.slice" diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py --- a/rpython/jit/codewriter/jtransform.py +++ b/rpython/jit/codewriter/jtransform.py @@ -1903,6 +1903,14 @@ None) return [op0, op1] + def rewrite_op_threadlocalref_get(self, op): + opaqueid = op.args[0].value + op1 = self.prepare_builtin_call(op, 'threadlocalref_getter', [], + extra=(opaqueid,), + extrakey=opaqueid._obj) + return self.handle_residual_call(op1, + oopspecindex=EffectInfo.OS_THREADLOCALREF_GET) + # ____________________________________________________________ class NotSupported(Exception): diff --git a/rpython/jit/codewriter/support.py b/rpython/jit/codewriter/support.py --- a/rpython/jit/codewriter/support.py +++ b/rpython/jit/codewriter/support.py @@ -712,6 +712,11 @@ build_ll_1_raw_free_no_track_allocation = ( build_raw_free_builder(track_allocation=False)) + def build_ll_0_threadlocalref_getter(opaqueid): + def _ll_0_threadlocalref_getter(): + return llop.threadlocalref_get(rclass.OBJECTPTR, opaqueid) + return _ll_0_threadlocalref_getter + def _ll_1_weakref_create(obj): return llop.weakref_create(llmemory.WeakRefPtr, obj) diff --git a/rpython/jit/codewriter/test/test_jtransform.py b/rpython/jit/codewriter/test/test_jtransform.py --- a/rpython/jit/codewriter/test/test_jtransform.py +++ b/rpython/jit/codewriter/test/test_jtransform.py @@ -147,6 +147,7 @@ EI.OS_UNIEQ_LENGTHOK: ([PUNICODE, PUNICODE], INT), EI.OS_RAW_MALLOC_VARSIZE_CHAR: ([INT], ARRAYPTR), EI.OS_RAW_FREE: ([ARRAYPTR], lltype.Void), + EI.OS_THREADLOCALREF_GET: ([], rclass.OBJECTPTR), } argtypes = argtypes[oopspecindex] assert argtypes[0] == [v.concretetype for v in op.args[1:]] @@ -157,6 +158,8 @@ assert extraeffect == EI.EF_CAN_RAISE elif oopspecindex == EI.OS_RAW_FREE: assert extraeffect == EI.EF_CANNOT_RAISE + elif oopspecindex == EI.OS_THREADLOCALREF_GET: + assert extraeffect == None else: assert extraeffect == EI.EF_ELIDABLE_CANNOT_RAISE return 'calldescr-%d' % oopspecindex @@ -1300,6 +1303,23 @@ assert op1.result is None assert op2 is None +def test_threadlocalref_get(): + from rpython.rtyper.lltypesystem import rclass + from rpython.rlib.rthread import ThreadLocalReference + OS_THREADLOCALREF_GET = effectinfo.EffectInfo.OS_THREADLOCALREF_GET + class Foo: pass + t = ThreadLocalReference(Foo) + v2 = varoftype(rclass.OBJECTPTR) + c_opaqueid = const(t.opaque_id) + op = SpaceOperation('threadlocalref_get', [c_opaqueid], v2) + tr = Transformer(FakeCPU(), FakeBuiltinCallControl()) + op0 = tr.rewrite_operation(op) + assert op0.opname == 'residual_call_r_r' + assert op0.args[0].value == 'threadlocalref_getter' # pseudo-function as str + assert op0.args[1] == ListOfKind("ref", []) + assert op0.args[2] == 'calldescr-%d' % OS_THREADLOCALREF_GET + assert op0.result == v2 + def test_unknown_operation(): op = SpaceOperation('foobar', [], varoftype(lltype.Void)) tr = Transformer() From noreply at buildbot.pypy.org Sun Jun 22 22:05:29 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 22 Jun 2014 22:05:29 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: Test and fix Message-ID: <20140622200529.721D11D2B39@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72142:f2d50551501d Date: 2014-06-22 22:04 +0200 http://bitbucket.org/pypy/pypy/changeset/f2d50551501d/ Log: Test and fix diff --git a/rpython/jit/metainterp/test/test_threadlocal.py b/rpython/jit/metainterp/test/test_threadlocal.py new file mode 100644 --- /dev/null +++ b/rpython/jit/metainterp/test/test_threadlocal.py @@ -0,0 +1,26 @@ +import py +from rpython.jit.metainterp.test.support import LLJitMixin +from rpython.rlib.rthread import ThreadLocalReference +from rpython.rlib.jit import dont_look_inside + + +class TestThreadLocal(LLJitMixin): + + def test_threadlocalref_get(self): + class Foo: + pass + t = ThreadLocalReference(Foo) + x = Foo() + + @dont_look_inside + def setup(): + t.set(x) + + def f(): + setup() + if t.get() is x: + return 42 + return -666 + + res = self.interp_operations(f, []) + assert res == 42 diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py --- a/rpython/rtyper/llinterp.py +++ b/rpython/rtyper/llinterp.py @@ -919,6 +919,17 @@ def op_stack_current(self): return 0 + def op_threadlocalref_set(self, key, value): + try: + d = self.llinterpreter.tlrefsdict + except AttributeError: + d = self.llinterpreter.tlrefsdict = {} + d[key._obj] = value + + def op_threadlocalref_get(self, key): + d = self.llinterpreter.tlrefsdict + return d[key._obj] + # __________________________________________________________ # operations on addresses From noreply at buildbot.pypy.org Sun Jun 22 22:12:33 2014 From: noreply at buildbot.pypy.org (mattip) Date: Sun, 22 Jun 2014 22:12:33 +0200 (CEST) Subject: [pypy-commit] pypy ufuncapi: coerce not convert_to, first test passes Message-ID: <20140622201233.264871C0CA6@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: ufuncapi Changeset: r72143:8d88f29c0a86 Date: 2014-06-22 23:12 +0300 http://bitbucket.org/pypy/pypy/changeset/8d88f29c0a86/ Log: coerce not convert_to, first test passes diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py --- a/pypy/module/micronumpy/loop.py +++ b/pypy/module/micronumpy/loop.py @@ -103,7 +103,7 @@ vals = [i_s[0].getitem(i_s[1]) for i_s in iters_and_states] arglist = space.wrap(vals) out_val = space.call_args(func, Arguments.frompacked(space, arglist)) - out_iter.setitem(out_state, out_val.convert_to(space, res_dtype)) + out_iter.setitem(out_state, res_dtype.coerce(space, out_val)) for i in range(len(iters_and_states)): iters_and_states[i][1] = iters_and_states[i][0].next(iters_and_states[i][1]) out_state = out_iter.next(out_state) @@ -131,16 +131,16 @@ # but func can return anything, if not isinstance(out_vals,(list, tuple)): out_iter, out_state = out_iters_and_states[0] - out_iter.setitem(out_state, out_vals.convert_to(space, res_dtype)) + out_iter.setitem(out_state, res_dtype.coerce(space, out_vals)) out_iters_and_states[0][1] = out_iters_and_states[0][0].next(out_iters_and_states[0][1]) else: for i in range(len(out_iters_and_states)): out_iter, out_state = out_iters_and_states[i] out_iter.setitem(out_state, out_vals[i].convert_to(space, res_dtype)) out_iters_and_states[i][1] = out_iters_and_states[i][0].next(out_iters_and_states[i][1]) - for i in range(len(iters_and_states)): + for i in range(len(in_iters_and_states)): in_iters_and_states[i][1] = in_iters_and_states[i][0].next(in_iters_and_states[i][1]) - return out + return space.wrap(tuple(w_out)) diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py --- a/pypy/module/micronumpy/test/test_ufuncs.py +++ b/pypy/module/micronumpy/test/test_ufuncs.py @@ -211,11 +211,11 @@ assert 'object' in str(e) # Use pypy specific extension for out_dtype adder_ufunc0 = frompyfunc(adder, 2, 1, dtypes=['match']) - adder_ufunc1 = frompyfunc([adder, adder], 2, 1, dtypes=[int, float]) + adder_ufunc1 = frompyfunc([adder, adder], 2, 1, dtypes=['match']) int_func22 = frompyfunc([int, int], 2, 2, signature='()->()', - dtypes=[int, int, float, int]) + dtypes=['match']) int_func12 = frompyfunc([int, int], 1, 2, signature='()->()', - dtypes=[int, int, float, int]) + dtypes=['match']) retype = dtype(int) assert isinstance(adder_ufunc1, ufunc) res = adder_ufunc0(arange(10), arange(10)) From noreply at buildbot.pypy.org Sun Jun 22 22:33:48 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 22 Jun 2014 22:33:48 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: jtransform it as a EF_LOOPINVARIANT, for now. Message-ID: <20140622203348.15FB61C0CA6@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72144:4775e5fb74cb Date: 2014-06-22 22:17 +0200 http://bitbucket.org/pypy/pypy/changeset/4775e5fb74cb/ Log: jtransform it as a EF_LOOPINVARIANT, for now. diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py --- a/rpython/jit/codewriter/jtransform.py +++ b/rpython/jit/codewriter/jtransform.py @@ -390,11 +390,13 @@ lst.append(v) def handle_residual_call(self, op, extraargs=[], may_call_jitcodes=False, - oopspecindex=EffectInfo.OS_NONE): + oopspecindex=EffectInfo.OS_NONE, + extraeffect=None): """A direct_call turns into the operation 'residual_call_xxx' if it is calling a function that we don't want to JIT. The initial args of 'residual_call_xxx' are the function to call, and its calldescr.""" - calldescr = self.callcontrol.getcalldescr(op, oopspecindex=oopspecindex) + calldescr = self.callcontrol.getcalldescr(op, oopspecindex=oopspecindex, + extraeffect=extraeffect) op1 = self.rewrite_call(op, 'residual_call', [op.args[0]] + extraargs, calldescr=calldescr) if may_call_jitcodes or self.callcontrol.calldescr_canraise(calldescr): @@ -1909,7 +1911,8 @@ extra=(opaqueid,), extrakey=opaqueid._obj) return self.handle_residual_call(op1, - oopspecindex=EffectInfo.OS_THREADLOCALREF_GET) + oopspecindex=EffectInfo.OS_THREADLOCALREF_GET, + extraeffect=EffectInfo.EF_LOOPINVARIANT) # ____________________________________________________________ diff --git a/rpython/jit/codewriter/test/test_jtransform.py b/rpython/jit/codewriter/test/test_jtransform.py --- a/rpython/jit/codewriter/test/test_jtransform.py +++ b/rpython/jit/codewriter/test/test_jtransform.py @@ -159,7 +159,7 @@ elif oopspecindex == EI.OS_RAW_FREE: assert extraeffect == EI.EF_CANNOT_RAISE elif oopspecindex == EI.OS_THREADLOCALREF_GET: - assert extraeffect == None + assert extraeffect == EI.EF_LOOPINVARIANT else: assert extraeffect == EI.EF_ELIDABLE_CANNOT_RAISE return 'calldescr-%d' % oopspecindex diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py --- a/rpython/rlib/rthread.py +++ b/rpython/rlib/rthread.py @@ -275,7 +275,9 @@ # ____________________________________________________________ # -# Thread-locals. Only for references that are not changed often. +# Thread-locals. Only for references that change "not too often" -- +# for now, the JIT compiles get() as a loop-invariant, so basically +# don't change them. # KEEP THE REFERENCE ALIVE, THE GC DOES NOT FOLLOW THEM SO FAR! # We use _make_sure_does_not_move() to make sure the pointer will not move. From noreply at buildbot.pypy.org Sun Jun 22 22:33:49 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 22 Jun 2014 22:33:49 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: in-progress: see comments Message-ID: <20140622203349.58B161C0CA6@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72145:f8c6e69e11fb Date: 2014-06-22 22:33 +0200 http://bitbucket.org/pypy/pypy/changeset/f8c6e69e11fb/ Log: in-progress: see comments diff --git a/rpython/jit/codewriter/jitcode.py b/rpython/jit/codewriter/jitcode.py --- a/rpython/jit/codewriter/jitcode.py +++ b/rpython/jit/codewriter/jitcode.py @@ -117,6 +117,24 @@ raise NotImplementedError +class ThreadLocalRefDescr(AbstractDescr): + # A special descr used as the extradescr in a call to a + # threadlocalref_get function. If the backend supports it, + # it can use this 'get_tlref_addr()' to get the address *in the + # current thread* of the thread-local variable. If, on the current + # platform, the "__thread" variables are implemented as an offset + # from some base register (e.g. %fs on x86-64), then the backend will + # immediately substract the current value of the base register. + # This gives an offset from the base register, and this can be + # written down in an assembler instruction to load the "__thread" + # variable from anywhere. + + def __init__(self, opaque_id): + def get_tlref_addr(): + return llop.threadlocalref_getaddr(llmemory.Address, opaque_id) + self.get_tlref_addr = get_tlref_addr + + class LiveVarsInfo(object): def __init__(self, live_i, live_r, live_f): self.live_i = live_i diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py --- a/rpython/jit/codewriter/jtransform.py +++ b/rpython/jit/codewriter/jtransform.py @@ -391,12 +391,14 @@ def handle_residual_call(self, op, extraargs=[], may_call_jitcodes=False, oopspecindex=EffectInfo.OS_NONE, - extraeffect=None): + extraeffect=None, + extradescr=None): """A direct_call turns into the operation 'residual_call_xxx' if it is calling a function that we don't want to JIT. The initial args of 'residual_call_xxx' are the function to call, and its calldescr.""" calldescr = self.callcontrol.getcalldescr(op, oopspecindex=oopspecindex, - extraeffect=extraeffect) + extraeffect=extraeffect, + extradescr=extradescr) op1 = self.rewrite_call(op, 'residual_call', [op.args[0]] + extraargs, calldescr=calldescr) if may_call_jitcodes or self.callcontrol.calldescr_canraise(calldescr): @@ -1906,13 +1908,16 @@ return [op0, op1] def rewrite_op_threadlocalref_get(self, op): + from rpython.jit.codewriter.jitcode import ThreadLocalRefDescr opaqueid = op.args[0].value op1 = self.prepare_builtin_call(op, 'threadlocalref_getter', [], extra=(opaqueid,), extrakey=opaqueid._obj) + extradescr = ThreadLocalRefDescr(opaqueid) return self.handle_residual_call(op1, oopspecindex=EffectInfo.OS_THREADLOCALREF_GET, - extraeffect=EffectInfo.EF_LOOPINVARIANT) + extraeffect=EffectInfo.EF_LOOPINVARIANT, + extradescr=[extradescr]) # ____________________________________________________________ From noreply at buildbot.pypy.org Mon Jun 23 03:57:30 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Mon, 23 Jun 2014 03:57:30 +0200 (CEST) Subject: [pypy-commit] pypy py3k: issue1797: fix get_python_lib(standard_lib=True) per our 'lib-python/3' layout Message-ID: <20140623015730.5FD6A1D2D9F@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: py3k Changeset: r72146:aca5ce9b7a02 Date: 2014-06-22 18:56 -0700 http://bitbucket.org/pypy/pypy/changeset/aca5ce9b7a02/ Log: issue1797: fix get_python_lib(standard_lib=True) per our 'lib- python/3' layout diff --git a/lib-python/3/distutils/sysconfig_pypy.py b/lib-python/3/distutils/sysconfig_pypy.py --- a/lib-python/3/distutils/sysconfig_pypy.py +++ b/lib-python/3/distutils/sysconfig_pypy.py @@ -52,7 +52,7 @@ if prefix is None: prefix = PREFIX if standard_lib: - return os.path.join(prefix, "lib-python", get_python_version()) + return os.path.join(prefix, "lib-python", sys.version[0]) return os.path.join(prefix, 'site-packages') From noreply at buildbot.pypy.org Mon Jun 23 03:57:31 2014 From: noreply at buildbot.pypy.org (pjenvey) Date: Mon, 23 Jun 2014 03:57:31 +0200 (CEST) Subject: [pypy-commit] pypy pypy3-release-2.3.x: merge py3k Message-ID: <20140623015731.DF6A21D2D9F@cobra.cs.uni-duesseldorf.de> Author: Philip Jenvey Branch: pypy3-release-2.3.x Changeset: r72147:284180f48e94 Date: 2014-06-22 18:56 -0700 http://bitbucket.org/pypy/pypy/changeset/284180f48e94/ Log: merge py3k diff --git a/lib-python/3/distutils/sysconfig_pypy.py b/lib-python/3/distutils/sysconfig_pypy.py --- a/lib-python/3/distutils/sysconfig_pypy.py +++ b/lib-python/3/distutils/sysconfig_pypy.py @@ -52,7 +52,7 @@ if prefix is None: prefix = PREFIX if standard_lib: - return os.path.join(prefix, "lib-python", get_python_version()) + return os.path.join(prefix, "lib-python", sys.version[0]) return os.path.join(prefix, 'site-packages') From noreply at buildbot.pypy.org Mon Jun 23 10:34:54 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 10:34:54 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: Helps translation Message-ID: <20140623083454.371D71D2B39@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72148:0a347260dde5 Date: 2014-06-23 10:33 +0200 http://bitbucket.org/pypy/pypy/changeset/0a347260dde5/ Log: Helps translation diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py --- a/rpython/rlib/rthread.py +++ b/rpython/rlib/rthread.py @@ -295,29 +295,31 @@ self.local = thread._local() # <- NOT_RPYTHON unique_id = ThreadLocalReference._COUNT ThreadLocalReference._COUNT += 1 - self.opaque_id = lltype.opaqueptr(ThreadLocalReference.OPAQUEID, - 'tlref%d' % unique_id) + opaque_id = lltype.opaqueptr(ThreadLocalReference.OPAQUEID, + 'tlref%d' % unique_id) + self.opaque_id = opaque_id + + def get(): + if we_are_translated(): + from rpython.rtyper.lltypesystem import rclass + from rpython.rtyper.annlowlevel import cast_base_ptr_to_instance + ptr = llop.threadlocalref_get(rclass.OBJECTPTR, opaque_id) + return cast_base_ptr_to_instance(Cls, ptr) + else: + return getattr(self.local, 'value', None) + + @jit.dont_look_inside + def set(value): + assert isinstance(value, Cls) or value is None + if we_are_translated(): + from rpython.rtyper.annlowlevel import cast_instance_to_base_ptr + ptr = cast_instance_to_base_ptr(value) + llop.threadlocalref_set(lltype.Void, opaque_id, ptr) + else: + self.local.value = value + + self.get = get + self.set = set def _freeze_(self): return True - - @specialize.arg(0) - def get(self): - if we_are_translated(): - from rpython.rtyper.lltypesystem import rclass - from rpython.rtyper.annlowlevel import cast_base_ptr_to_instance - ptr = llop.threadlocalref_get(rclass.OBJECTPTR, self.opaque_id) - return cast_base_ptr_to_instance(self.Cls, ptr) - else: - return getattr(self.local, 'value', None) - - @specialize.arg(0) - @jit.dont_look_inside - def set(self, value): - assert isinstance(value, self.Cls) or value is None - if we_are_translated(): - from rpython.rtyper.annlowlevel import cast_instance_to_base_ptr - ptr = cast_instance_to_base_ptr(value) - llop.threadlocalref_set(lltype.Void, self.opaque_id, ptr) - else: - self.local.value = value From noreply at buildbot.pypy.org Mon Jun 23 10:34:55 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 10:34:55 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: in-progress: JIT backend support Message-ID: <20140623083455.7C82A1D2B39@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72149:5c7a1c9731c5 Date: 2014-06-23 10:34 +0200 http://bitbucket.org/pypy/pypy/changeset/5c7a1c9731c5/ Log: in-progress: JIT backend support diff --git a/rpython/jit/backend/llsupport/test/ztranslation_test.py b/rpython/jit/backend/llsupport/test/ztranslation_test.py --- a/rpython/jit/backend/llsupport/test/ztranslation_test.py +++ b/rpython/jit/backend/llsupport/test/ztranslation_test.py @@ -4,6 +4,8 @@ from rpython.rlib.jit import PARAMETERS, dont_look_inside from rpython.rlib.jit import promote from rpython.rlib import jit_hooks +from rpython.rlib.objectmodel import keepalive_until_here +from rpython.rlib.rthread import ThreadLocalReference from rpython.jit.backend.detect_cpu import getcpuclass from rpython.jit.backend.test.support import CCompiledMixin from rpython.jit.codewriter.policy import StopAtXPolicy @@ -21,6 +23,7 @@ # - profiler # - full optimizer # - floats neg and abs + # - threadlocalref_get class Frame(object): _virtualizable_ = ['i'] @@ -28,6 +31,10 @@ def __init__(self, i): self.i = i + class Foo(object): + pass + t = ThreadLocalReference(Foo) + @dont_look_inside def myabs(x): return abs(x) @@ -56,6 +63,7 @@ k = myabs(j) if k - abs(j): raise ValueError if k - abs(-j): raise ValueError + if t.get().nine != 9: raise ValueError return chr(total % 253) # from rpython.rtyper.lltypesystem import lltype, rffi @@ -78,8 +86,12 @@ return res # def main(i, j): + foo = Foo() + foo.nine = -(i + j) + t.set(foo) a_char = f(i, j) a_float = libffi_stuff(i, j) + keepalive_until_here(foo) return ord(a_char) * 10 + int(a_float) expected = main(40, -49) res = self.meta_interp(main, [40, -49]) diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -2351,10 +2351,29 @@ assert isinstance(reg, RegLoc) self.mc.MOV_rr(reg.value, ebp.value) + def threadlocalref_get(self, op, resloc): + # this function is only called on Linux + from rpython.jit.codewriter.jitcode import ThreadLocalRefDescr + from rpython.jit.backend.x86 import stmtlocal + assert isinstance(resloc, RegLoc) + effectinfo = op.getdescr().get_extra_info() + assert len(effectinfo.extradescrs) == 1 + ed = effectinfo.extradescrs[0] + assert isinstance(ed, ThreadLocalRefDescr) + addr1 = rffi.cast(lltype.Signed, ed.get_tlref_addr()) + addr0 = stmtlocal.threadlocal_base() + addr = addr1 - addr0 + assert rx86.fits_in_32bits(addr) + mc = self.mc + mc.writechar(stmtlocal.SEGMENT_TL) # prefix + mc.MOV_rj(resloc.value, addr) + + genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST genop_list = [Assembler386.not_implemented_op] * rop._LAST genop_llong_list = {} genop_math_list = {} +genop_tlref_list = {} genop_guard_list = [Assembler386.not_implemented_op_guard] * rop._LAST for name, value in Assembler386.__dict__.iteritems(): diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -2,7 +2,7 @@ """ Register allocation scheme. """ -import os +import os, sys from rpython.jit.backend.llsupport import symbolic from rpython.jit.backend.llsupport.descr import (ArrayDescr, CallDescr, unpack_arraydescr, unpack_fielddescr, unpack_interiorfielddescr) @@ -692,6 +692,15 @@ loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(1)) self.perform_math(op, [loc0], loc0) + TLREF_SUPPORT = sys.platform.startswith('linux') + + def _consider_threadlocalref_get(self, op): + if self.TLREF_SUPPORT: + resloc = self.force_allocate_reg(op.result) + self.assembler.threadlocalref_get(op, resloc) + else: + self._consider_call(op) + def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None): # we need to save registers on the stack: # @@ -769,6 +778,8 @@ return if oopspecindex == EffectInfo.OS_MATH_SQRT: return self._consider_math_sqrt(op) + if oopspecindex == EffectInfo.OS_THREADLOCALREF_GET: + return self._consider_threadlocalref_get(op) self._consider_call(op) def consider_call_may_force(self, op, guard_op): diff --git a/rpython/jit/backend/x86/stmtlocal.py b/rpython/jit/backend/x86/stmtlocal.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/x86/stmtlocal.py @@ -0,0 +1,32 @@ +from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.translator.tool.cbuild import ExternalCompilationInfo +from rpython.jit.backend.x86.arch import WORD + +SEGMENT_FS = '\x64' +SEGMENT_GS = '\x65' + +if WORD == 4: + SEGMENT_TL = SEGMENT_GS + _instruction = "movl %%gs:0, %0" +else: + SEGMENT_TL = SEGMENT_FS + _instruction = "movq %%fs:0, %0" + +eci = ExternalCompilationInfo(post_include_bits=[''' +#define RPY_STM_JIT 1 +static long pypy__threadlocal_base(void) +{ + /* XXX ONLY LINUX WITH GCC/CLANG FOR NOW XXX */ + long result; + asm("%s" : "=r"(result)); + return result; +} +''' % _instruction]) + + +threadlocal_base = rffi.llexternal( + 'pypy__threadlocal_base', + [], lltype.Signed, + compilation_info=eci, + _nowrapper=True, + ) #transactionsafe=True) diff --git a/rpython/jit/codewriter/jitcode.py b/rpython/jit/codewriter/jitcode.py --- a/rpython/jit/codewriter/jitcode.py +++ b/rpython/jit/codewriter/jitcode.py @@ -130,6 +130,8 @@ # variable from anywhere. def __init__(self, opaque_id): + from rpython.rtyper.lltypesystem.lloperation import llop + from rpython.rtyper.lltypesystem import llmemory def get_tlref_addr(): return llop.threadlocalref_getaddr(llmemory.Address, opaque_id) self.get_tlref_addr = get_tlref_addr diff --git a/rpython/jit/metainterp/test/test_threadlocal.py b/rpython/jit/metainterp/test/test_threadlocal.py --- a/rpython/jit/metainterp/test/test_threadlocal.py +++ b/rpython/jit/metainterp/test/test_threadlocal.py @@ -4,7 +4,7 @@ from rpython.rlib.jit import dont_look_inside -class TestThreadLocal(LLJitMixin): +class ThreadLocalTest(object): def test_threadlocalref_get(self): class Foo: @@ -24,3 +24,7 @@ res = self.interp_operations(f, []) assert res == 42 + + +class TestLLtype(ThreadLocalTest, LLJitMixin): + pass diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -542,6 +542,7 @@ 'check_and_clear_exc': LLOp(), 'threadlocalref_get': LLOp(sideeffects=False), + 'threadlocalref_getaddr': LLOp(sideeffects=False), 'threadlocalref_set': LLOp(), # __________ debugging __________ diff --git a/rpython/translator/c/src/threadlocal.h b/rpython/translator/c/src/threadlocal.h --- a/rpython/translator/c/src/threadlocal.h +++ b/rpython/translator/c/src/threadlocal.h @@ -28,6 +28,7 @@ #define RPyThreadStaticTLS_Create(tls) (void)0 #define RPyThreadStaticTLS_Get(tls) tls #define RPyThreadStaticTLS_Set(tls, value) tls = value +#define OP_THREADLOCALREF_GETADDR(tlref, ptr) ptr = tlref #endif From noreply at buildbot.pypy.org Mon Jun 23 10:46:15 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 10:46:15 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: Missing includes Message-ID: <20140623084615.423341C01E8@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72150:09be730ef0aa Date: 2014-06-23 10:45 +0200 http://bitbucket.org/pypy/pypy/changeset/09be730ef0aa/ Log: Missing includes diff --git a/rpython/translator/c/src/threadlocal.c b/rpython/translator/c/src/threadlocal.c --- a/rpython/translator/c/src/threadlocal.c +++ b/rpython/translator/c/src/threadlocal.c @@ -1,3 +1,5 @@ +#include +#include #include "src/threadlocal.h" #ifdef _WIN32 From noreply at buildbot.pypy.org Mon Jun 23 11:48:19 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 23 Jun 2014 11:48:19 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: fix comment in test Message-ID: <20140623094819.AF9971C023B@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r72156:c93208e79e3b Date: 2014-06-18 15:49 +0200 http://bitbucket.org/pypy/pypy/changeset/c93208e79e3b/ Log: fix comment in test diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -373,7 +373,7 @@ assert self.gc.nursery_free == self.gc.nursery # the following assert is important: make sure that - # we did not reset the whole arena used as the nursery + # we did not reset the whole nursery assert self.gc.nursery_top < self.gc.nursery_real_top def test_collect_dead_pinned_objects(self): From noreply at buildbot.pypy.org Mon Jun 23 11:48:13 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 23 Jun 2014 11:48:13 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: Merge release-2.3.x into gc-incminimark-pinning Message-ID: <20140623094813.72EFE1C023B@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r72151:2dd51f2a10d4 Date: 2014-06-10 10:52 +0200 http://bitbucket.org/pypy/pypy/changeset/2dd51f2a10d4/ Log: Merge release-2.3.x into gc-incminimark-pinning diff too long, truncating to 2000 out of 24916 lines diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -128,6 +128,7 @@ Stian Andreassen Laurence Tratt Wanja Saatkamp + Ivan Sichmann Freitas Gerald Klix Mike Blume Oscar Nierstrasz @@ -212,7 +213,9 @@ Alejandro J. Cura Jacob Oscarson Travis Francis Athougies + Ryan Gonzalez Kristjan Valur Jonsson + Sebastian Pawluś Neil Blakey-Milner anatoly techtonik Lutz Paelike @@ -245,6 +248,7 @@ Michael Hudson-Doyle Anders Sigfridsson Yasir Suhail + rafalgalczynski at gmail.com Floris Bruynooghe Laurens Van Houtven Akira Li @@ -274,6 +278,8 @@ Zooko Wilcox-O Hearn Tomer Chachamu Christopher Groskopf + Asmo Soinio + Stefan Marr jiaaro opassembler.py Antony Lee @@ -289,6 +295,7 @@ yasirs Michael Chermside Anna Ravencroft + Andrew Chambers Julien Phalip Dan Loewenherz diff --git a/_pytest/resultlog.py b/_pytest/resultlog.py --- a/_pytest/resultlog.py +++ b/_pytest/resultlog.py @@ -56,6 +56,9 @@ for line in longrepr.splitlines(): py.builtin.print_(" %s" % line, file=self.logfile) for key, text in sections: + # py.io.StdCaptureFD may send in unicode + if isinstance(text, unicode): + text = text.encode('utf-8') py.builtin.print_(" ", file=self.logfile) py.builtin.print_(" -------------------- %s --------------------" % key.rstrip(), file=self.logfile) diff --git a/lib-python/2.7/imputil.py b/lib-python/2.7/imputil.py --- a/lib-python/2.7/imputil.py +++ b/lib-python/2.7/imputil.py @@ -422,7 +422,8 @@ saved back to the filesystem for future imports. The source file's modification timestamp must be provided as a Long value. """ - codestring = open(pathname, 'rU').read() + with open(pathname, 'rU') as fp: + codestring = fp.read() if codestring and codestring[-1] != '\n': codestring = codestring + '\n' code = __builtin__.compile(codestring, pathname, 'exec') @@ -603,8 +604,8 @@ self.desc = desc def import_file(self, filename, finfo, fqname): - fp = open(filename, self.desc[1]) - module = imp.load_module(fqname, fp, filename, self.desc) + with open(filename, self.desc[1]) as fp: + module = imp.load_module(fqname, fp, filename, self.desc) module.__file__ = filename return 0, module, { } diff --git a/lib-python/2.7/modulefinder.py b/lib-python/2.7/modulefinder.py --- a/lib-python/2.7/modulefinder.py +++ b/lib-python/2.7/modulefinder.py @@ -109,16 +109,16 @@ def run_script(self, pathname): self.msg(2, "run_script", pathname) - fp = open(pathname, READ_MODE) - stuff = ("", "r", imp.PY_SOURCE) - self.load_module('__main__', fp, pathname, stuff) + with open(pathname, READ_MODE) as fp: + stuff = ("", "r", imp.PY_SOURCE) + self.load_module('__main__', fp, pathname, stuff) def load_file(self, pathname): dir, name = os.path.split(pathname) name, ext = os.path.splitext(name) - fp = open(pathname, READ_MODE) - stuff = (ext, "r", imp.PY_SOURCE) - self.load_module(name, fp, pathname, stuff) + with open(pathname, READ_MODE) as fp: + stuff = (ext, "r", imp.PY_SOURCE) + self.load_module(name, fp, pathname, stuff) def import_hook(self, name, caller=None, fromlist=None, level=-1): self.msg(3, "import_hook", name, caller, fromlist, level) @@ -461,6 +461,8 @@ fp, buf, stuff = self.find_module("__init__", m.__path__) self.load_module(fqname, fp, buf, stuff) self.msgout(2, "load_package ->", m) + if fp: + fp.close() return m def add_module(self, fqname): diff --git a/lib-python/2.7/test/test_argparse.py b/lib-python/2.7/test/test_argparse.py --- a/lib-python/2.7/test/test_argparse.py +++ b/lib-python/2.7/test/test_argparse.py @@ -48,6 +48,9 @@ def tearDown(self): os.chdir(self.old_dir) + import gc + # Force a collection which should close FileType() options + gc.collect() for root, dirs, files in os.walk(self.temp_dir, topdown=False): for name in files: os.chmod(os.path.join(self.temp_dir, name), stat.S_IWRITE) diff --git a/lib-python/2.7/test/test_gdbm.py b/lib-python/2.7/test/test_gdbm.py --- a/lib-python/2.7/test/test_gdbm.py +++ b/lib-python/2.7/test/test_gdbm.py @@ -74,6 +74,29 @@ size2 = os.path.getsize(filename) self.assertTrue(size1 > size2 >= size0) + def test_sync(self): + # check if sync works at all, not sure how to check it + self.g = gdbm.open(filename, 'cf') + self.g['x'] = 'x' * 10000 + self.g.sync() + + def test_get_key(self): + self.g = gdbm.open(filename, 'cf') + self.g['x'] = 'x' * 10000 + self.g.close() + self.g = gdbm.open(filename, 'r') + self.assertEquals(self.g['x'], 'x' * 10000) + + def test_key_with_null_bytes(self): + key = 'a\x00b' + value = 'c\x00d' + self.g = gdbm.open(filename, 'cf') + self.g[key] = value + self.g.close() + self.g = gdbm.open(filename, 'r') + self.assertEquals(self.g[key], value) + self.assertTrue(key in self.g) + self.assertTrue(self.g.has_key(key)) def test_main(): run_unittest(TestGdbm) diff --git a/lib_pypy/_tkinter/license.terms b/lib_pypy/_tkinter/license.terms new file mode 100644 --- /dev/null +++ b/lib_pypy/_tkinter/license.terms @@ -0,0 +1,39 @@ +This software is copyrighted by the Regents of the University of +California, Sun Microsystems, Inc., and other parties. The following +terms apply to all files associated with the software unless explicitly +disclaimed in individual files. + +The authors hereby grant permission to use, copy, modify, distribute, +and license this software and its documentation for any purpose, provided +that existing copyright notices are retained in all copies and that this +notice is included verbatim in any distributions. No written agreement, +license, or royalty fee is required for any of the authorized uses. +Modifications to this software may be copyrighted by their authors +and need not follow the licensing terms described here, provided that +the new terms are clearly indicated on the first page of each file where +they apply. + +IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY +FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY +DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE +IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE +NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR +MODIFICATIONS. + +GOVERNMENT USE: If you are acquiring this software on behalf of the +U.S. government, the Government shall have only "Restricted Rights" +in the software and related documentation as defined in the Federal +Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you +are acquiring the software on behalf of the Department of Defense, the +software shall be classified as "Commercial Computer Software" and the +Government shall have only "Restricted Rights" as defined in Clause +252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the +authors grant the U.S. Government and others acting in its behalf +permission to use and distribute the software in accordance with the +terms specified in this license. diff --git a/lib_pypy/gdbm.py b/lib_pypy/gdbm.py new file mode 100644 --- /dev/null +++ b/lib_pypy/gdbm.py @@ -0,0 +1,174 @@ +import cffi, os + +ffi = cffi.FFI() +ffi.cdef(''' +#define GDBM_READER ... +#define GDBM_WRITER ... +#define GDBM_WRCREAT ... +#define GDBM_NEWDB ... +#define GDBM_FAST ... +#define GDBM_SYNC ... +#define GDBM_NOLOCK ... +#define GDBM_REPLACE ... + +void* gdbm_open(char *, int, int, int, void (*)()); +void gdbm_close(void*); + +typedef struct { + char *dptr; + int dsize; +} datum; + +datum gdbm_fetch(void*, datum); +int gdbm_delete(void*, datum); +int gdbm_store(void*, datum, datum, int); +int gdbm_exists(void*, datum); + +int gdbm_reorganize(void*); + +datum gdbm_firstkey(void*); +datum gdbm_nextkey(void*, datum); +void gdbm_sync(void*); + +char* gdbm_strerror(int); +int gdbm_errno; + +void free(void*); +''') + +try: + lib = ffi.verify(''' + #include "gdbm.h" + ''', libraries=['gdbm']) +except cffi.VerificationError as e: + # distutils does not preserve the actual message, + # but the verification is simple enough that the + # failure must be due to missing gdbm dev libs + raise ImportError('%s: %s' %(e.__class__.__name__, e)) + +class error(Exception): + pass + +def _fromstr(key): + if not isinstance(key, str): + raise TypeError("gdbm mappings have string indices only") + return {'dptr': ffi.new("char[]", key), 'dsize': len(key)} + +class gdbm(object): + ll_dbm = None + + def __init__(self, filename, iflags, mode): + res = lib.gdbm_open(filename, 0, iflags, mode, ffi.NULL) + self.size = -1 + if not res: + self._raise_from_errno() + self.ll_dbm = res + + def close(self): + if self.ll_dbm: + lib.gdbm_close(self.ll_dbm) + self.ll_dbm = None + + def _raise_from_errno(self): + if ffi.errno: + raise error(os.strerror(ffi.errno)) + raise error(lib.gdbm_strerror(lib.gdbm_errno)) + + def __len__(self): + if self.size < 0: + self.size = len(self.keys()) + return self.size + + def __setitem__(self, key, value): + self._check_closed() + self._size = -1 + r = lib.gdbm_store(self.ll_dbm, _fromstr(key), _fromstr(value), + lib.GDBM_REPLACE) + if r < 0: + self._raise_from_errno() + + def __delitem__(self, key): + self._check_closed() + res = lib.gdbm_delete(self.ll_dbm, _fromstr(key)) + if res < 0: + raise KeyError(key) + + def __contains__(self, key): + self._check_closed() + return lib.gdbm_exists(self.ll_dbm, _fromstr(key)) + has_key = __contains__ + + def __getitem__(self, key): + self._check_closed() + drec = lib.gdbm_fetch(self.ll_dbm, _fromstr(key)) + if not drec.dptr: + raise KeyError(key) + res = str(ffi.buffer(drec.dptr, drec.dsize)) + lib.free(drec.dptr) + return res + + def keys(self): + self._check_closed() + l = [] + key = lib.gdbm_firstkey(self.ll_dbm) + while key.dptr: + l.append(str(ffi.buffer(key.dptr, key.dsize))) + nextkey = lib.gdbm_nextkey(self.ll_dbm, key) + lib.free(key.dptr) + key = nextkey + return l + + def firstkey(self): + self._check_closed() + key = lib.gdbm_firstkey(self.ll_dbm) + if key.dptr: + res = str(ffi.buffer(key.dptr, key.dsize)) + lib.free(key.dptr) + return res + + def nextkey(self, key): + self._check_closed() + key = lib.gdbm_nextkey(self.ll_dbm, _fromstr(key)) + if key.dptr: + res = str(ffi.buffer(key.dptr, key.dsize)) + lib.free(key.dptr) + return res + + def reorganize(self): + self._check_closed() + if lib.gdbm_reorganize(self.ll_dbm) < 0: + self._raise_from_errno() + + def _check_closed(self): + if not self.ll_dbm: + raise error("GDBM object has already been closed") + + __del__ = close + + def sync(self): + self._check_closed() + lib.gdbm_sync(self.ll_dbm) + +def open(filename, flags='r', mode=0666): + if flags[0] == 'r': + iflags = lib.GDBM_READER + elif flags[0] == 'w': + iflags = lib.GDBM_WRITER + elif flags[0] == 'c': + iflags = lib.GDBM_WRCREAT + elif flags[0] == 'n': + iflags = lib.GDBM_NEWDB + else: + raise error("First flag must be one of 'r', 'w', 'c' or 'n'") + for flag in flags[1:]: + if flag == 'f': + iflags |= lib.GDBM_FAST + elif flag == 's': + iflags |= lib.GDBM_SYNC + elif flag == 'u': + iflags |= lib.GDBM_NOLOCK + else: + raise error("Flag '%s' not supported" % flag) + return gdbm(filename, iflags, mode) + +open_flags = "rwcnfsu" diff --git a/pypy/doc/Makefile b/pypy/doc/Makefile --- a/pypy/doc/Makefile +++ b/pypy/doc/Makefile @@ -7,63 +7,80 @@ PAPER = BUILDDIR = _build +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . -.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex man changes linkcheck doctest +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " man to make manual pages" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: - -rm -rf $(BUILDDIR)/* + rm -rf $(BUILDDIR)/* html: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + pickle: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ @@ -72,35 +89,89 @@ @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/PyPy.qhc" +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/PyPy" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/PyPy" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + latex: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ - "run these through (pdf)latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." man: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo - @echo "Build finished. The manual pages are in $(BUILDDIR)/man" + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -18,11 +18,31 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.append(os.path.abspath('.')) + +# -- Read The Docs theme config ------------------------------------------------ + +# on_rtd is whether we are on readthedocs.org, this line of code grabbed from docs.readthedocs.org +on_rtd = os.environ.get('READTHEDOCS', None) == 'True' + +if not on_rtd: # only import and set the theme if we're building docs locally + try: + import sphinx_rtd_theme + html_theme = 'sphinx_rtd_theme' + html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + except ImportError: + print('sphinx_rtd_theme is not installed') + html_theme = 'default' + +# otherwise, readthedocs.org uses their theme by default, so no need to specify it + + # -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.ifconfig', 'sphinx.ext.graphviz', 'pypyconfig'] +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', 'sphinx.ext.ifconfig', 'sphinx.ext.graphviz', + 'pypyconfig'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -91,7 +111,7 @@ # The theme to use for HTML and HTML Help pages. Major themes that come with # Sphinx are currently 'default' and 'sphinxdoc'. -html_theme = 'default' +#html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst --- a/pypy/doc/contributor.rst +++ b/pypy/doc/contributor.rst @@ -99,6 +99,7 @@ Stian Andreassen Laurence Tratt Wanja Saatkamp + Ivan Sichmann Freitas Gerald Klix Mike Blume Oscar Nierstrasz @@ -183,7 +184,9 @@ Alejandro J. Cura Jacob Oscarson Travis Francis Athougies + Ryan Gonzalez Kristjan Valur Jonsson + Sebastian Pawluś Neil Blakey-Milner anatoly techtonik Lutz Paelike @@ -216,6 +219,7 @@ Michael Hudson-Doyle Anders Sigfridsson Yasir Suhail + rafalgalczynski at gmail.com Floris Bruynooghe Laurens Van Houtven Akira Li @@ -245,6 +249,8 @@ Zooko Wilcox-O Hearn Tomer Chachamu Christopher Groskopf + Asmo Soinio + Stefan Marr jiaaro opassembler.py Antony Lee diff --git a/pypy/doc/index-of-release-notes.rst b/pypy/doc/index-of-release-notes.rst --- a/pypy/doc/index-of-release-notes.rst +++ b/pypy/doc/index-of-release-notes.rst @@ -6,6 +6,7 @@ .. toctree:: + release-2.3.1.rst release-2.3.0.rst release-2.2.1.rst release-2.2.0.rst diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst --- a/pypy/doc/index.rst +++ b/pypy/doc/index.rst @@ -110,7 +110,7 @@ .. _`Getting Started`: getting-started.html .. _`Papers`: extradoc.html .. _`Videos`: video-index.html -.. _`Release 2.3.0`: http://pypy.org/download.html +.. _`Release 2.3.1`: http://pypy.org/download.html .. _`speed.pypy.org`: http://speed.pypy.org .. _`RPython toolchain`: translation.html .. _`potential project ideas`: project-ideas.html diff --git a/pypy/doc/make.bat b/pypy/doc/make.bat --- a/pypy/doc/make.bat +++ b/pypy/doc/make.bat @@ -2,11 +2,15 @@ REM Command file for Sphinx documentation -set SPHINXBUILD=sphinx-build +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) set BUILDDIR=_build set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . +set I18NSPHINXOPTS=%SPHINXOPTS% . if NOT "%PAPER%" == "" ( set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% ) if "%1" == "" goto help @@ -14,16 +18,25 @@ if "%1" == "help" ( :help echo.Please use `make ^` where ^ is one of - echo. html to make standalone HTML files - echo. dirhtml to make HTML files named index.html in directories - echo. pickle to make pickle files - echo. json to make JSON files - echo. htmlhelp to make HTML files and a HTML help project - echo. qthelp to make HTML files and a qthelp project - echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter - echo. changes to make an overview over all changed/added/deprecated items - echo. linkcheck to check all external links for integrity - echo. doctest to run all doctests embedded in the documentation if enabled + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. xml to make Docutils-native XML files + echo. pseudoxml to make pseudoxml-XML files for display purposes + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled goto end ) @@ -33,8 +46,34 @@ goto end ) + +REM Check if sphinx-build is available and fallback to Python version if any +%SPHINXBUILD% 2> nul +if errorlevel 9009 goto sphinx_python +goto sphinx_ok + +:sphinx_python + +set SPHINXBUILD=python -m sphinx.__init__ +%SPHINXBUILD% 2> nul +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +:sphinx_ok + + if "%1" == "html" ( %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/html. goto end @@ -42,13 +81,23 @@ if "%1" == "dirhtml" ( %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. goto end ) +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + if "%1" == "pickle" ( %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the pickle files. goto end @@ -56,6 +105,7 @@ if "%1" == "json" ( %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the JSON files. goto end @@ -63,6 +113,7 @@ if "%1" == "htmlhelp" ( %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run HTML Help Workshop with the ^ .hhp project file in %BUILDDIR%/htmlhelp. @@ -71,6 +122,7 @@ if "%1" == "qthelp" ( %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run "qcollectiongenerator" with the ^ .qhcp project file in %BUILDDIR%/qthelp, like this: @@ -80,15 +132,85 @@ goto end ) +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + if "%1" == "latex" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 echo. echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. goto end ) +if "%1" == "latexpdf" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf + cd %BUILDDIR%/.. + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdfja" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf-ja + cd %BUILDDIR%/.. + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + if "%1" == "changes" ( %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 echo. echo.The overview file is in %BUILDDIR%/changes. goto end @@ -96,6 +218,7 @@ if "%1" == "linkcheck" ( %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 echo. echo.Link check complete; look for any errors in the above output ^ or in %BUILDDIR%/linkcheck/output.txt. @@ -104,10 +227,27 @@ if "%1" == "doctest" ( %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 echo. echo.Testing of doctests in the sources finished, look at the ^ results in %BUILDDIR%/doctest/output.txt. goto end ) +if "%1" == "xml" ( + %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The XML files are in %BUILDDIR%/xml. + goto end +) + +if "%1" == "pseudoxml" ( + %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. + goto end +) + :end diff --git a/pypy/doc/man/pypy.1.rst b/pypy/doc/man/pypy.1.rst --- a/pypy/doc/man/pypy.1.rst +++ b/pypy/doc/man/pypy.1.rst @@ -100,6 +100,8 @@ ``debug_start``/``debug_stop`` but not any nested ``debug_print``. *fname* can be ``-`` to log to *stderr*. + Note that using a : in fname is a bad idea, Windows + users, beware. ``:``\ *fname* Full logging, including ``debug_print``. diff --git a/pypy/doc/release-2.3.0.rst b/pypy/doc/release-2.3.0.rst --- a/pypy/doc/release-2.3.0.rst +++ b/pypy/doc/release-2.3.0.rst @@ -93,7 +93,7 @@ * Fix handling of tp_name for type objects .. _`HippyVM`: http://www.hippyvm.com -.. _`whats-new`: :http://doc.pypy.org/en/latest/whatsnew-2.3.0.html +.. _`whats-new`: http://doc.pypy.org/en/latest/whatsnew-2.3.0.html New Platforms and Features diff --git a/pypy/doc/release-2.3.1.rst b/pypy/doc/release-2.3.1.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/release-2.3.1.rst @@ -0,0 +1,78 @@ +================================================= +PyPy 2.3.1 - Terrestrial Arthropod Trap Revisited +================================================= + +We're pleased to announce PyPy 2.3.1, a feature-and-bugfix improvement over our +recent release last month. + +This release contains several bugfixes and enhancements. + +You can download the PyPy 2.3.1 release here: + + http://pypy.org/download.html + +We would like to thank our donors for the continued support of the PyPy +project, and for those who donate to our three sub-projects. +We've shown quite a bit of progress +but we're slowly running out of funds. +Please consider donating more, or even better convince your employer to donate, +so we can finish those projects! The three sub-projects are: + +* `STM`_ (software transactional memory): a preview will be released very soon, + once we fix a few bugs + +* `NumPy`_ which requires installation of our fork of upstream numpy, available `on bitbucket`_ + +.. _`STM`: http://pypy.org/tmdonate2.html +.. _`NumPy`: http://pypy.org/numpydonate.html +.. _`on bitbucket`: https://www.bitbucket.org/pypy/numpy + +What is PyPy? +============= + +PyPy is a very compliant Python interpreter, almost a drop-in replacement for +CPython 2.7. It's fast (`pypy 2.3 and cpython 2.7.x`_ performance comparison; +note that cpython's speed has not changed since 2.7.2) +due to its integrated tracing JIT compiler. + +This release supports x86 machines running Linux 32/64, Mac OS X 64, Windows, +and OpenBSD, +as well as newer ARM hardware (ARMv6 or ARMv7, with VFPv3) running Linux. + +While we support 32 bit python on Windows, work on the native Windows 64 +bit python is still stalling, we would welcome a volunteer +to `handle that`_. + +.. _`pypy 2.3 and cpython 2.7.x`: http://speed.pypy.org +.. _`handle that`: http://doc.pypy.org/en/latest/windows.html#what-is-missing-for-a-full-64-bit-translation + +Highlights +========== + +Issues with the 2.3 release were resolved after being reported by users to +our new issue tracker at https://bitbucket.org/pypy/pypy/issues or on IRC at +#pypy. Here is a summary of the user-facing changes; +for more information see `whats-new`_: + +* The built-in ``struct`` module was renamed to ``_struct``, solving issues + with IDLE and other modules. + +* Support for compilation with gcc-4.9 + +* A rewrite of packaging.py which produces our downloadable packages to + modernize command line argument handling and to document third-party + contributions in our LICENSE file + +* A CFFI-based version of the gdbm module is now included in our downloads + +* Many issues were resolved_ since the 2.3 release on May 8 + +.. _`whats-new`: http://doc.pypy.org/en/latest/whatsnew-2.3.1.html +.. _resolved: https://bitbucket.org/pypy/pypy/issues?status=resolved +Please try it out and let us know what you think. We especially welcome +success stories, we know you are using PyPy, please tell us about it! + +Cheers + +The PyPy Team + diff --git a/pypy/doc/stm.rst b/pypy/doc/stm.rst --- a/pypy/doc/stm.rst +++ b/pypy/doc/stm.rst @@ -1,70 +1,78 @@ -====================== -Transactional Memory -====================== + +============================= +Software Transactional Memory +============================= .. contents:: This page is about ``pypy-stm``, a special in-development version of PyPy which can run multiple independent CPU-hungry threads in the same -process in parallel. It is side-stepping what is known in the Python -world as the "global interpreter lock (GIL)" problem. +process in parallel. It is a solution to what is known in the Python +world as the "global interpreter lock (GIL)" problem --- it is an +implementation of Python without the GIL. -"STM" stands for Software Transactional Memory, the technique used +"STM" stands for Software `Transactional Memory`_, the technique used internally. This page describes ``pypy-stm`` from the perspective of a user, describes work in progress, and finally gives references to more implementation details. -This work was done mostly by Remi Meier and Armin Rigo. Thanks to all -donors for crowd-funding the work so far! Please have a look at the -`2nd call for donation`_. +This work was done by Remi Meier and Armin Rigo. Thanks to all donors +for crowd-funding the work so far! Please have a look at the `2nd call +for donation`_. +.. _`Transactional Memory`: http://en.wikipedia.org/wiki/Transactional_memory .. _`2nd call for donation`: http://pypy.org/tmdonate2.html Introduction ============ -``pypy-stm`` is a variant of the regular PyPy interpreter. With caveats -listed below, it should be in theory within 25%-50% slower than a -regular PyPy, comparing the JIT version in both cases. It is called +``pypy-stm`` is a variant of the regular PyPy interpreter. With caveats_ +listed below, it should be in theory within 20%-50% slower than a +regular PyPy, comparing the JIT version in both cases (but see below!). +It is called STM for Software Transactional Memory, which is the internal technique used (see `Reference to implementation details`_). -What you get in exchange for this slow-down is that ``pypy-stm`` runs -any multithreaded Python program on multiple CPUs at once. Programs -running two threads or more in parallel should ideally run faster than -in a regular PyPy, either now or soon as issues are fixed. In one way, -that's all there is to it: this is a GIL-less Python, feel free to -`download and try it`__. However, the deeper idea behind the -``pypy-stm`` project is to improve what is so far the state-of-the-art -for using multiple CPUs, which for cases where separate processes don't -work is done by writing explicitly multi-threaded programs. Instead, -``pypy-stm`` is pushing forward an approach to *hide* the threads, as -described below in `atomic sections`_. +The benefit is that the resulting ``pypy-stm`` can execute multiple +threads of Python code in parallel. Programs running two threads or +more in parallel should ideally run faster than in a regular PyPy +(either now, or soon as bugs are fixed). +* ``pypy-stm`` is fully compatible with a GIL-based PyPy; you can use + it as a drop-in replacement and multithreaded programs will run on + multiple cores. -.. __: +* ``pypy-stm`` does not impose any special API to the user, but it + provides a new pure Python module called `transactional_memory`_ with + features to inspect the state or debug conflicts_ that prevent + parallelization. This module can also be imported on top of a non-STM + PyPy or CPython. -Current status -============== +* Building on top of the way the GIL is removed, we will talk + about `Atomic sections, Transactions, etc.: a better way to write + parallel programs`_. + + +Getting Started +=============== **pypy-stm requires 64-bit Linux for now.** Development is done in the branch `stmgc-c7`_. If you are only -interested in trying it out, you can download a Ubuntu 12.04 binary -here__ (``pypy-2.2.x-stm*.tar.bz2``; this version is a release mode, -but not stripped of debug symbols). The current version supports four -"segments", which means that it will run up to four threads in parallel, -in other words it is running a thread pool up to 4 threads emulating normal -threads. +interested in trying it out, you can download a Ubuntu binary here__ +(``pypy-2.3.x-stm*.tar.bz2``, Ubuntu 12.04-14.04; these versions are +release mode, but not stripped of debug symbols). The current version +supports four "segments", which means that it will run up to four +threads in parallel. To build a version from sources, you first need to compile a custom -version of clang; we recommend downloading `llvm and clang like -described here`__, but at revision 201645 (use ``svn co -r 201645 ...`` +version of clang(!); we recommend downloading `llvm and clang like +described here`__, but at revision 201645 (use ``svn co -r 201645 `` for all checkouts). Then apply all the patches in `this directory`__: -they are fixes for the very extensive usage that pypy-stm does of a -clang-only feature (without them, you get crashes of clang). Then get +they are fixes for a clang-only feature that hasn't been used so heavily +in the past (without the patches, you get crashes of clang). Then get the branch `stmgc-c7`_ of PyPy and run:: rpython/bin/rpython -Ojit --stm pypy/goal/targetpypystandalone.py @@ -75,23 +83,31 @@ .. __: https://bitbucket.org/pypy/stmgc/src/default/c7/llvmfix/ -Caveats: +.. _caveats: -* So far, small examples work fine, but there are still a number of - bugs. We're busy fixing them. +Current status +-------------- + +* So far, small examples work fine, but there are still a few bugs. + We're busy fixing them as we find them; feel free to `report bugs`_. + +* It runs with an overhead as low as 20% on examples like "richards". + There are also other examples with higher overheads --up to 10x for + "translate.py"-- which we are still trying to understand. One suspect + is our partial GC implementation, see below. * Currently limited to 1.5 GB of RAM (this is just a parameter in - `core.h`__). Memory overflows are not detected correctly, so may - cause segmentation faults. + `core.h`__). Memory overflows are not correctly handled; they cause + segfaults. -* The JIT warm-up time is abysmal (as opposed to the regular PyPy's, - which is "only" bad). Moreover, you should run it with a command like - ``pypy-stm --jit trace_limit=60000 args...``; the default value of - 6000 for ``trace_limit`` is currently too low (6000 should become - reasonable again as we improve). Also, in order to produce machine - code, the JIT needs to enter a special single-threaded mode for now. - This all means that you *will* get very bad performance results if - your program doesn't run for *many* seconds for now. +* The JIT warm-up time improved recently but is still bad. In order to + produce machine code, the JIT needs to enter a special single-threaded + mode for now. This means that you will get bad performance results if + your program doesn't run for several seconds, where *several* can mean + *many.* When trying benchmarks, be sure to check that you have + reached the warmed state, i.e. the performance is not improving any + more. This should be clear from the fact that as long as it's + producing more machine code, ``pypy-stm`` will run on a single core. * The GC is new; although clearly inspired by PyPy's regular GC, it misses a number of optimizations for now. Programs allocating large @@ -108,111 +124,197 @@ * The STM system is based on very efficient read/write barriers, which are mostly done (their placement could be improved a bit in JIT-generated machine code). But the overall bookkeeping logic could - see more improvements (see Statistics_ below). - -* You can use `atomic sections`_, but the most visible missing thing is - that you don't get reports about the "conflicts" you get. This would - be the first thing that you need in order to start using atomic - sections more extensively. Also, for now: for better results, try to - explicitly force a transaction break just before (and possibly after) - each large atomic section, with ``time.sleep(0)``. + see more improvements (see `Low-level statistics`_ below). * Forking the process is slow because the complete memory needs to be - copied manually right now. + copied manually. A warning is printed to this effect. -* Very long-running processes should eventually crash on an assertion - error because of a non-implemented overflow of an internal 29-bit - number, but this requires at the very least ten hours --- more - probably, several days or more. +* Very long-running processes (on the order of days) will eventually + crash on an assertion error because of a non-implemented overflow of + an internal 29-bit number. .. _`report bugs`: https://bugs.pypy.org/ .. __: https://bitbucket.org/pypy/pypy/raw/stmgc-c7/rpython/translator/stm/src_stm/stm/core.h -Statistics +User Guide ========== + -When a non-main thread finishes, you get statistics printed to stderr, -looking like that:: +Drop-in replacement +------------------- - thread 0x7f73377fe600: - outside transaction 42182 0.506 s - run current 85466 0.000 s - run committed 34262 3.178 s - run aborted write write 6982 0.083 s - run aborted write read 550 0.005 s - run aborted inevitable 388 0.010 s - run aborted other 0 0.000 s - wait free segment 0 0.000 s - wait write read 78 0.027 s - wait inevitable 887 0.490 s - wait other 0 0.000 s - bookkeeping 51418 0.606 s - minor gc 162970 1.135 s - major gc 1 0.019 s - sync pause 59173 1.738 s - spin loop 129512 0.094 s +Multithreaded, CPU-intensive Python programs should work unchanged on +``pypy-stm``. They will run using multiple CPU cores in parallel. -The first number is a counter; the second number gives the associated -time (the amount of real time that the thread was in this state; the sum -of all the times should be equal to the total time between the thread's -start and the thread's end). The most important points are "run -committed", which gives the amount of useful work, and "outside -transaction", which should give the time spent e.g. in library calls -(right now it seems to be a bit larger than that; to investigate). -Everything else is overhead of various forms. (Short-, medium- and -long-term future work involves reducing this overhead :-) +The existing semantics of the GIL (Global Interpreter Lock) are +unchanged: although running on multiple cores in parallel, ``pypy-stm`` +gives the illusion that threads are run serially, with switches only +occurring between bytecodes, not in the middle of them. Programs can +rely on this: using ``shared_list.append()/pop()`` or +``shared_dict.setdefault()`` as synchronization mecanisms continues to +work as expected. -These statistics are not printed out for the main thread, for now. +This works by internally considering the points where a standard PyPy or +CPython would release the GIL, and replacing them with the boundaries of +"transaction". Like their database equivalent, multiple transactions +can execute in parallel, but will commit in some serial order. They +appear to behave as if they were completely run in this serialization +order. Atomic sections -=============== +--------------- -While one of the goal of pypy-stm is to give a GIL-free but otherwise -unmodified Python, the other goal is to push for a better way to use -multithreading. For this, you (as the Python programmer) get an API -in the ``__pypy__.thread`` submodule: +PyPy supports *atomic sections,* which are blocks of code which you want +to execute without "releasing the GIL". *This is experimental and may +be removed in the future.* In STM terms, this means blocks of code that +are executed while guaranteeing that the transaction is not interrupted +in the middle. -* ``__pypy__.thread.atomic``: a context manager (i.e. you use it in - a ``with __pypy__.thread.atomic:`` statement). It runs the whole - block of code without breaking the current transaction --- from - the point of view of a regular CPython/PyPy, this is equivalent to - saying that the GIL will not be released at all between the start and - the end of this block of code. +Here is a usage example:: -The obvious usage is to use atomic blocks in the same way as one would -use locks: to protect changes to some shared data, you do them in a -``with atomic`` block, just like you would otherwise do them in a ``with -mylock`` block after ``mylock = thread.allocate_lock()``. This allows -you not to care about acquiring the correct locks in the correct order; -it is equivalent to having only one global lock. This is how -transactional memory is `generally described`__: as a way to efficiently -execute such atomic blocks, running them in parallel while giving the -illusion that they run in some serial order. + with __pypy__.thread.atomic: + assert len(lst1) == 10 + x = lst1.pop(0) + lst1.append(x) -.. __: http://en.wikipedia.org/wiki/Transactional_memory +In this (bad) example, we are sure that the item popped off one end of +the list is appened again at the other end atomically. It means that +another thread can run ``len(lst1)`` or ``x in lst1`` without any +particular synchronization, and always see the same results, +respectively ``10`` and ``True``. It will never see the intermediate +state where ``lst1`` only contains 9 elements. Atomic sections are +similar to re-entrant locks (they can be nested), but additionally they +protect against the concurrent execution of *any* code instead of just +code that happens to be protected by the same lock in other threads. -However, the less obvious intended usage of atomic sections is as a -wide-ranging replacement of explicit threads. You can turn a program -that is not multi-threaded at all into a program that uses threads -internally, together with large atomic sections to keep the behavior -unchanged. This capability can be hidden in a library or in the -framework you use; the end user's code does not need to be explicitly -aware of using threads. For a simple example of this, see -`transaction.py`_ in ``lib_pypy``. The idea is that if you have a -program where the function ``f(key, value)`` runs on every item of some -big dictionary, you can replace the loop with:: +Note that the notion of atomic sections is very strong. If you write +code like this:: + + with __pypy__.thread.atomic: + time.sleep(10) + +then, if you think about it as if we had a GIL, you are executing a +10-seconds-long atomic transaction without releasing the GIL at all. +This prevents all other threads from progressing at all. While it is +not strictly true in ``pypy-stm``, the exact rules for when other +threads can progress or not are rather complicated; you have to consider +it likely that such a piece of code will eventually block all other +threads anyway. + +Note that if you want to experiment with ``atomic``, you may have to add +manually a transaction break just before the atomic block. This is +because the boundaries of the block are not guaranteed to be the +boundaries of the transaction: the latter is at least as big as the +block, but maybe bigger. Therefore, if you run a big atomic block, it +is a good idea to break the transaction just before. This can be done +e.g. by the hack of calling ``time.sleep(0)``. (This may be fixed at +some point.) + +There are also issues with the interaction of locks and atomic blocks. +This can be seen if you write to files (which have locks), including +with a ``print`` to standard output. If one thread tries to acquire a +lock while running in an atomic block, and another thread has got the +same lock, then the former may fail with a ``thread.error``. The reason +is that "waiting" for some condition to become true --while running in +an atomic block-- does not really make sense. For now you can work +around it by making sure that, say, all your prints are either in an +``atomic`` block or none of them are. (This kind of issue is +theoretically hard to solve.) + + +Locks +----- + +**Not Implemented Yet** + +The thread module's locks have their basic semantic unchanged. However, +using them (e.g. in ``with my_lock:`` blocks) starts an alternative +running mode, called `Software lock elision`_. This means that PyPy +will try to make sure that the transaction extends until the point where +the lock is released, and if it succeeds, then the acquiring and +releasing of the lock will be "elided". This means that in this case, +the whole transaction will technically not cause any write into the lock +object --- it was unacquired before, and is still unacquired after the +transaction. + +This is specially useful if two threads run ``with my_lock:`` blocks +with the same lock. If they each run a transaction that is long enough +to contain the whole block, then all writes into the lock will be elided +and the two transactions will not conflict with each other. As usual, +they will be serialized in some order: one of the two will appear to run +before the other. Simply, each of them executes an "acquire" followed +by a "release" in the same transaction. As explained above, the lock +state goes from "unacquired" to "unacquired" and can thus be left +unchanged. + +This approach can gracefully fail: unlike atomic sections, there is no +guarantee that the transaction runs until the end of the block. If you +perform any input/output while you hold the lock, the transaction will +end as usual just before the input/output operation. If this occurs, +then the lock elision mode is cancelled and the lock's "acquired" state +is really written. + +Even if the lock is really acquired already, a transaction doesn't have +to wait for it to become free again. It can enter the elision-mode anyway +and tentatively execute the content of the block. It is only at the end, +when trying to commit, that the thread will pause. As soon as the real +value stored in the lock is switched back to "unacquired", it can then +proceed and attempt to commit its already-executed transaction (which +can fail and abort and restart from the scratch, as usual). + +Note that this is all *not implemented yet,* but we expect it to work +even if you acquire and release several locks. The elision-mode +transaction will extend until the first lock you acquired is released, +or until the code performs an input/output or a wait operation (for +example, waiting for another lock that is currently not free). In the +common case of acquiring several locks in nested order, they will all be +elided by the same transaction. + +.. _`software lock elision`: https://www.repository.cam.ac.uk/handle/1810/239410 + + +Atomic sections, Transactions, etc.: a better way to write parallel programs +---------------------------------------------------------------------------- + +(This section is based on locks as we plan to implement them, but also +works with the existing atomic sections.) + +In the cases where elision works, the block of code can run in parallel +with other blocks of code *even if they are protected by the same lock.* +You still get the illusion that the blocks are run sequentially. This +works even for multiple threads that run each a series of such blocks +and nothing else, protected by one single global lock. This is +basically the Python application-level equivalent of what was done with +the interpreter in ``pypy-stm``: while you think you are writing +thread-unfriendly code because of this global lock, actually the +underlying system is able to make it run on multiple cores anyway. + +This capability can be hidden in a library or in the framework you use; +the end user's code does not need to be explicitly aware of using +threads. For a simple example of this, there is `transaction.py`_ in +``lib_pypy``. The idea is that you write, or already have, some program +where the function ``f(key, value)`` runs on every item of some big +dictionary, say:: + + for key, value in bigdict.items(): + f(key, value) + +Then you simply replace the loop with:: for key, value in bigdict.items(): transaction.add(f, key, value) transaction.run() This code runs the various calls to ``f(key, value)`` using a thread -pool, but every single call is done in an atomic section. The end -result is that the behavior should be exactly equivalent: you don't get -any extra multithreading issue. +pool, but every single call is executed under the protection of a unique +lock. The end result is that the behavior is exactly equivalent --- in +fact it makes little sense to do it in this way on a non-STM PyPy or on +CPython. But on ``pypy-stm``, the various locked calls to ``f(key, +value)`` can tentatively be executed in parallel, even if the observable +result is as if they were executed in some serial order. This approach hides the notion of threads from the end programmer, including all the hard multithreading-related issues. This is not the @@ -223,41 +325,176 @@ only requires that the end programmer identifies where this parallelism is likely to be found, and communicates it to the system, using for example the ``transaction.add()`` scheme. - + .. _`transaction.py`: https://bitbucket.org/pypy/pypy/raw/stmgc-c7/lib_pypy/transaction.py .. _OpenMP: http://en.wikipedia.org/wiki/OpenMP -================== -Other APIs in pypy-stm: +.. _`transactional_memory`: -* ``__pypy__.thread.getsegmentlimit()``: return the number of "segments" - in this pypy-stm. This is the limit above which more threads will not - be able to execute on more cores. (Right now it is limited to 4 due - to inter-segment overhead, but should be increased in the future. It +API of transactional_memory +--------------------------- + +The new pure Python module ``transactional_memory`` runs on both CPython +and PyPy, both with and without STM. It contains: + +* ``getsegmentlimit()``: return the number of "segments" in + this pypy-stm. This is the limit above which more threads will not be + able to execute on more cores. (Right now it is limited to 4 due to + inter-segment overhead, but should be increased in the future. It should also be settable, and the default value should depend on the - number of actual CPUs.) + number of actual CPUs.) If STM is not available, this returns 1. -* ``__pypy__.thread.exclusive_atomic``: same as ``atomic``, but - raises an exception if you attempt to nest it inside another - ``atomic``. +* ``print_abort_info(minimum_time=0.0)``: debugging help. Each thread + remembers the longest abort or pause it did because of cross-thread + contention_. This function prints it to ``stderr`` if the time lost + is greater than ``minimum_time`` seconds. The record is then + cleared, to make it ready for new events. This function returns + ``True`` if it printed a report, and ``False`` otherwise. -* ``__pypy__.thread.signals_enabled``: a context manager that runs - its block with signals enabled. By default, signals are only - enabled in the main thread; a non-main thread will not receive - signals (this is like CPython). Enabling signals in non-main threads - is useful for libraries where threads are hidden and the end user is - not expecting his code to run elsewhere than in the main thread. -Note that all of this API is (or will be) implemented in a regular PyPy -too: for example, ``with atomic`` will simply mean "don't release the -GIL" and ``getsegmentlimit()`` will return 1. +API of __pypy__.thread +---------------------- -================== +The ``__pypy__.thread`` submodule is a built-in module of PyPy that +contains a few internal built-in functions used by the +``transactional_memory`` module, plus the following: + +* ``__pypy__.thread.atomic``: a context manager to run a block in + fully atomic mode, without "releasing the GIL". (May be eventually + removed?) + +* ``__pypy__.thread.signals_enabled``: a context manager that runs its + block with signals enabled. By default, signals are only enabled in + the main thread; a non-main thread will not receive signals (this is + like CPython). Enabling signals in non-main threads is useful for + libraries where threads are hidden and the end user is not expecting + his code to run elsewhere than in the main thread. + + +.. _contention: + +Conflicts +--------- + +Based on Software Transactional Memory, the ``pypy-stm`` solution is +prone to "conflicts". To repeat the basic idea, threads execute their code +speculatively, and at known points (e.g. between bytecodes) they +coordinate with each other to agree on which order their respective +actions should be "committed", i.e. become globally visible. Each +duration of time between two commit-points is called a transaction. + +A conflict occurs when there is no consistent ordering. The classical +example is if two threads both tried to change the value of the same +global variable. In that case, only one of them can be allowed to +proceed, and the other one must be either paused or aborted (restarting +the transaction). If this occurs too often, parallelization fails. + +How much actual parallelization a multithreaded program can see is a bit +subtle. Basically, a program not using ``__pypy__.thread.atomic`` or +eliding locks, or doing so for very short amounts of time, will +parallelize almost freely (as long as it's not some artificial example +where, say, all threads try to increase the same global counter and do +nothing else). + +However, using if the program requires longer transactions, it comes +with less obvious rules. The exact details may vary from version to +version, too, until they are a bit more stabilized. Here is an +overview. + +Parallelization works as long as two principles are respected. The +first one is that the transactions must not *conflict* with each other. +The most obvious sources of conflicts are threads that all increment a +global shared counter, or that all store the result of their +computations into the same list --- or, more subtly, that all ``pop()`` +the work to do from the same list, because that is also a mutation of +the list. (It is expected that some STM-aware library will eventually +be designed to help with conflict problems, like a STM-aware queue.) + +A conflict occurs as follows: when a transaction commits (i.e. finishes +successfully) it may cause other transactions that are still in progress +to abort and retry. This is a waste of CPU time, but even in the worst +case senario it is not worse than a GIL, because at least one +transaction succeeds (so we get at worst N-1 CPUs doing useless jobs and +1 CPU doing a job that commits successfully). + +Conflicts do occur, of course, and it is pointless to try to avoid them +all. For example they can be abundant during some warm-up phase. What +is important is to keep them rare enough in total. + +Another issue is that of avoiding long-running so-called "inevitable" +transactions ("inevitable" is taken in the sense of "which cannot be +avoided", i.e. transactions which cannot abort any more). Transactions +like that should only occur if you use ``__pypy__.thread.atomic``, +generally become of I/O in atomic blocks. They work, but the +transaction is turned inevitable before the I/O is performed. For all +the remaining execution time of the atomic block, they will impede +parallel work. The best is to organize the code so that such operations +are done completely outside ``__pypy__.thread.atomic``. + +(This is related to the fact that blocking I/O operations are +discouraged with Twisted, and if you really need them, you should do +them on their own separate thread.) + +In case of lock elision, we don't get long-running inevitable +transactions, but a different problem can occur: doing I/O cancels lock +elision, and the lock turns into a real lock, preventing other threads +from committing if they also need this lock. (More about it when lock +elision is implemented and tested.) + + + +Implementation +============== + +XXX this section mostly empty for now + + +Low-level statistics +-------------------- + +When a non-main thread finishes, you get low-level statistics printed to +stderr, looking like that:: + + thread 0x7f73377fe600: + outside transaction 42182 0.506 s + run current 85466 0.000 s + run committed 34262 3.178 s + run aborted write write 6982 0.083 s + run aborted write read 550 0.005 s + run aborted inevitable 388 0.010 s + run aborted other 0 0.000 s + wait free segment 0 0.000 s + wait write read 78 0.027 s + wait inevitable 887 0.490 s + wait other 0 0.000 s + sync commit soon 1 0.000 s + bookkeeping 51418 0.606 s + minor gc 162970 1.135 s + major gc 1 0.019 s + sync pause 59173 1.738 s + longest recordered marker 0.000826 s + "File "x.py", line 5, in f" + +On each line, the first number is a counter, and the second number gives +the associated time --- the amount of real time that the thread was in +this state. The sum of all the times should be equal to the total time +between the thread's start and the thread's end. The most important +points are "run committed", which gives the amount of useful work, and +"outside transaction", which should give the time spent e.g. in library +calls (right now it seems to be larger than that; to investigate). The +various "run aborted" and "wait" entries are time lost due to +conflicts_. Everything else is overhead of various forms. (Short-, +medium- and long-term future work involves reducing this overhead :-) + +The last two lines are special; they are an internal marker read by +``transactional_memory.print_abort_info()``. + +These statistics are not printed out for the main thread, for now. Reference to implementation details -=================================== +----------------------------------- The core of the implementation is in a separate C library called stmgc_, in the c7_ subdirectory. Please see the `README.txt`_ for more @@ -282,3 +519,15 @@ .. __: https://bitbucket.org/pypy/pypy/raw/stmgc-c7/rpython/translator/stm/src_stm/stmgcintf.c .. __: https://bitbucket.org/pypy/pypy/raw/stmgc-c7/rpython/jit/backend/llsupport/stmrewrite.py .. __: https://bitbucket.org/pypy/pypy/raw/stmgc-c7/rpython/jit/backend/x86/assembler.py + + + +See also +======== + +See also +https://bitbucket.org/pypy/pypy/raw/default/pypy/doc/project-ideas.rst +(section about STM). + + +.. include:: _ref.txt diff --git a/pypy/doc/whatsnew-2.3.1.rst b/pypy/doc/whatsnew-2.3.1.rst --- a/pypy/doc/whatsnew-2.3.1.rst +++ b/pypy/doc/whatsnew-2.3.1.rst @@ -9,3 +9,16 @@ Support compilation with gcc-4.9 +Added support for the stdlib gdbm module via cffi + +Annotator cleanups + +.. branch: release-2.3.x + +.. branch: unify-call-ops + +.. branch packaging +Use argparse for packaging.py, and add third-party components to LICENSE file. +Also mention that gdbm is GPL. +Do not crash the packaging process on failure in CFFI or license-building, +rather complete the build step and return -1. diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -3,10 +3,6 @@ ======================= .. this is a revision shortly after release-2.3.x -.. startrev: b2cc67adbaad +.. startrev: 87fdc76bccb4 -Added support for the stdlib gdbm module via cffi -Fixes for issues #1769, #1764, #1762, #1752 - -Annotator cleanups diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -10,8 +10,14 @@ 64bit Windows. See at the end of this page for what is missing for a full 64bit translation. -To build pypy-c you need a C compiler. Microsoft Visual Studio is -preferred, but can also use the mingw32 port of gcc. +To build pypy-c you need a working python environment, and a C compiler. +It is possible to translate with a CPython 2.6 or later, but this is not +the preferred way, because it will take a lot longer to run – depending +on your architecture, between two and three times as long. So head to +`our downloads`_ and get the latest stable version. + +Microsoft Visual Studio is preferred as a compiler, but there are reports +of success with the mingw32 port of gcc. Translating PyPy with Visual Studio @@ -34,10 +40,20 @@ **Note:** PyPy is currently not supported for 64 bit Windows, and translation will fail in this case. -The compiler is all you need to build pypy-c, but it will miss some +Python and a C compiler are all you need to build pypy, but it will miss some modules that relies on third-party libraries. See below how to get and build them. +Please see the `non-windows instructions`_ for more information, especially note +that translation is RAM-hungry. A standard translation requires around 4GB, so +special preparations are necessary, or you may want to use the method in the +notes of the `build instructions`_ to reduce memory usage at the price of a +slower translation:: + + set PYPY_GC_MAX_DELTA=200MB + pypy --jit loop_longevity=300 ../../rpython/bin/rpython -Ojit targetpypystandalone + set PYPY_GC_MAX_DELTA= + Preping Windows for the Large Build ----------------------------------- @@ -52,9 +68,10 @@ Then you need to execute:: - editbin /largeaddressaware pypy.exe + editbin /largeaddressaware translator.exe -on the pypy.exe file you compiled. +where ``translator.exe`` is the pypy.exe or cpython.exe you will use to +translate with. Installing external packages ---------------------------- @@ -244,7 +261,9 @@ .. _`msys for mingw`: http://sourceforge.net/projects/mingw-w64/files/External%20binary%20packages%20%28Win64%20hosted%29/MSYS%20%2832-bit%29 .. _`libffi source files`: http://sourceware.org/libffi/ .. _`RPython translation toolchain`: translation.html - +.. _`our downloads`: http://pypy.org/download.html +.. _`non-windows instructions`: getting-started-python.html#translating-the-pypy-python-interpreter +.. _`build instructions`: http://pypy.org/download.html#building-from-source What is missing for a full 64-bit translation --------------------------------------------- diff --git a/pypy/interpreter/app_main.py b/pypy/interpreter/app_main.py --- a/pypy/interpreter/app_main.py +++ b/pypy/interpreter/app_main.py @@ -85,7 +85,7 @@ if softspace: stdout.write('\n') - except SystemExit, e: + except SystemExit as e: handle_sys_exit(e) except: display_exception() @@ -608,10 +608,9 @@ python_startup = readenv and os.getenv('PYTHONSTARTUP') if python_startup: try: - f = open(python_startup) - startup = f.read() - f.close() - except IOError, e: + with open(python_startup) as f: + startup = f.read() + except IOError as e: print >> sys.stderr, "Could not open PYTHONSTARTUP" print >> sys.stderr, "IOError:", e else: @@ -667,7 +666,7 @@ args = (execfile, filename, mainmodule.__dict__) success = run_toplevel(*args) - except SystemExit, e: + except SystemExit as e: status = e.code if inspect_requested(): display_exception() @@ -678,10 +677,12 @@ if inspect_requested(): try: from _pypy_interact import interactive_console - irc_topic = readenv and os.getenv('PYPY_IRC_TOPIC') + pypy_version_info = getattr(sys, 'pypy_version_info', sys.version_info) + irc_topic = pypy_version_info[3] != 'final' or ( + readenv and os.getenv('PYPY_IRC_TOPIC')) success = run_toplevel(interactive_console, mainmodule, quiet=not irc_topic) - except SystemExit, e: + except SystemExit as e: status = e.code else: status = not success @@ -731,10 +732,10 @@ setup_bootstrap_path(executable) try: cmdline = parse_command_line(argv) - except CommandLineError, e: + except CommandLineError as e: print_error(str(e)) return 2 - except SystemExit, e: + except SystemExit as e: return e.code or 0 setup_and_fix_paths(**cmdline) return run_command_line(**cmdline) diff --git a/pypy/interpreter/test/test_app_main.py b/pypy/interpreter/test/test_app_main.py --- a/pypy/interpreter/test/test_app_main.py +++ b/pypy/interpreter/test/test_app_main.py @@ -7,12 +7,8 @@ from rpython.tool.udir import udir from contextlib import contextmanager from pypy.conftest import pypydir -from pypy.module.sys.version import PYPY_VERSION from lib_pypy._pypy_interact import irc_header -is_release = PYPY_VERSION[3] == "final" - - banner = sys.version.splitlines()[0] app_main = os.path.join(os.path.realpath(os.path.dirname(__file__)), os.pardir, 'app_main.py') @@ -246,10 +242,6 @@ child = self.spawn([]) child.expect('Python ') # banner child.expect('>>> ') # prompt - if is_release: - assert irc_header not in child.before - else: - assert irc_header in child.before child.sendline('[6*7]') child.expect(re.escape('[42]')) child.sendline('def f(x):') @@ -269,6 +261,22 @@ child.sendline("'' in sys.path") child.expect("True") + def test_yes_irc_topic(self, monkeypatch): + monkeypatch.setenv('PYPY_IRC_TOPIC', '1') + child = self.spawn([]) + child.expect(irc_header) # banner + + def test_maybe_irc_topic(self): + import sys + pypy_version_info = getattr(sys, 'pypy_version_info', sys.version_info) + irc_topic = pypy_version_info[3] != 'final' + child = self.spawn([]) + child.expect('>>>') # banner + if irc_topic: + assert irc_header in child.before + else: + assert irc_header not in child.before + def test_help(self): # test that -h prints the usage, including the name of the executable # which should be /full/path/to/app_main.py in this case @@ -929,6 +937,7 @@ # ---------------------------------------- from pypy.module.sys.version import CPYTHON_VERSION, PYPY_VERSION cpy_ver = '%d.%d' % CPYTHON_VERSION[:2] + from lib_pypy._pypy_interact import irc_header goal_dir = os.path.dirname(app_main) # build a directory hierarchy like which contains both bin/pypy-c and @@ -948,6 +957,7 @@ self.w_fake_exe = self.space.wrap(str(fake_exe)) self.w_expected_path = self.space.wrap(expected_path) self.w_trunkdir = self.space.wrap(os.path.dirname(pypydir)) + self.w_is_release = self.space.wrap(PYPY_VERSION[3] == "final") self.w_tmp_dir = self.space.wrap(tmp_dir) @@ -1017,3 +1027,4 @@ # assert it did not crash finally: sys.path[:] = old_sys_path + diff --git a/pypy/module/__builtin__/__init__.py b/pypy/module/__builtin__/__init__.py --- a/pypy/module/__builtin__/__init__.py +++ b/pypy/module/__builtin__/__init__.py @@ -33,7 +33,7 @@ interpleveldefs = { # constants - '__debug__' : '(space.w_True)', # XXX + '__debug__' : '(space.w_True)', 'None' : '(space.w_None)', 'False' : '(space.w_False)', 'True' : '(space.w_True)', diff --git a/pypy/module/__builtin__/app_io.py b/pypy/module/__builtin__/app_io.py --- a/pypy/module/__builtin__/app_io.py +++ b/pypy/module/__builtin__/app_io.py @@ -4,6 +4,7 @@ """ import sys +from _ast import PyCF_ACCEPT_NULL_BYTES def execfile(filename, glob=None, loc=None): """execfile(filename[, globals[, locals]]) @@ -24,7 +25,8 @@ finally: f.close() #Don't exec the source directly, as this loses the filename info - co = compile(source.rstrip()+"\n", filename, 'exec') + co = compile(source.rstrip()+"\n", filename, 'exec', + PyCF_ACCEPT_NULL_BYTES) exec co in glob, loc def _write_prompt(stdout, prompt): diff --git a/pypy/module/__builtin__/test/test_builtin.py b/pypy/module/__builtin__/test/test_builtin.py --- a/pypy/module/__builtin__/test/test_builtin.py +++ b/pypy/module/__builtin__/test/test_builtin.py @@ -1,7 +1,10 @@ import sys +from rpython.tool.udir import udir + class AppTestBuiltinApp: def setup_class(cls): + space = cls.space class X(object): def __eq__(self, other): raise OverflowError @@ -11,18 +14,25 @@ try: d[X()] From noreply at buildbot.pypy.org Mon Jun 23 11:48:14 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 23 Jun 2014 11:48:14 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: wip: adding JIT support in presence of pinned objects Message-ID: <20140623094814.B336D1C023B@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r72152:841b8277ef3a Date: 2014-06-11 14:02 +0200 http://bitbucket.org/pypy/pypy/changeset/841b8277ef3a/ Log: wip: adding JIT support in presence of pinned objects added _is_pinned method and _make_sure_does_not_move can fail now by returning a boolean diff --git a/rpython/memory/gc/base.py b/rpython/memory/gc/base.py --- a/rpython/memory/gc/base.py +++ b/rpython/memory/gc/base.py @@ -182,6 +182,9 @@ def unpin(self, addr): pass + def _is_pinned(self, addr): + return False + def set_max_heap_size(self, size): raise NotImplementedError diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -960,24 +960,27 @@ # to check if can_move(obj) already returns True in which # case a call to pin() is unnecessary. return False - if self.header(obj).tid & GCFLAG_PINNED: + if self._is_pinned(obj): # Already pinned, we do not allow to pin it again. # Reason: It would be possible that the first caller unpins # while the second caller thinks it's still pinned. return False - + # self.header(obj).tid |= GCFLAG_PINNED self.pinned_objects_in_nursery += 1 return True def unpin(self, obj): - ll_assert(self.header(obj).tid & GCFLAG_PINNED != 0, + ll_assert(self._is_pinned(obj), "unpin: object is already not pinned") # self.header(obj).tid &= ~GCFLAG_PINNED self.pinned_objects_in_nursery -= 1 + def _is_pinned(self, obj): + return (self.header(obj).tid & GCFLAG_PINNED) != 0 + def shrink_array(self, obj, smallerlength): # # Only objects in the nursery can be "resized". Resizing them @@ -1148,7 +1151,7 @@ # We are after a minor collection, and possibly after a major # collection step. No object should be in the nursery (except # pinned ones) - if self.header(obj).tid & GCFLAG_PINNED == 0: + if not self._is_pinned(obj): ll_assert(not self.is_in_nursery(obj), "object in nursery after collection") ll_assert(self.header(obj).tid & GCFLAG_VISITED_RMY == 0, @@ -1204,7 +1207,7 @@ # All objects should have this flag, except if they # don't have any GC pointer typeid = self.get_type_id(obj) - if not self.header(obj).tid & GCFLAG_PINNED: + if not self._is_pinned(obj): # XXX do we need checks if the object is actually pinned? (groggi) if self.has_gcptr(typeid): ll_assert(self.header(obj).tid & GCFLAG_TRACK_YOUNG_PTRS != 0, @@ -1818,7 +1821,7 @@ root.address[0] = self.get_forwarding_address(obj) return # - elif self.header(obj).tid & GCFLAG_PINNED: + elif self._is_pinned(obj): hdr = self.header(obj) if hdr.tid & GCFLAG_VISITED: # already visited and keeping track of the object diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -31,6 +31,15 @@ py.test.raises(Exception, self.gc.unpin, llmemory.cast_ptr_to_adr(ptr)) + def test__is_pinned(self): + ptr = self.malloc(S) + adr = llmemory.cast_ptr_to_adr(ptr) + assert not self.gc._is_pinned(adr) + assert self.gc.pin(adr) + assert self.gc._is_pinned(adr) + self.gc.unpin(adr) + assert not self.gc._is_pinned(adr) + # XXX test with multiple mallocs, and only part of them is pinned diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -466,6 +466,10 @@ [s_gc, SomeAddress()], annmodel.s_None) + self._is_pinned_ptr = getfn(GCClass._is_pinned, + [s_gc, SomeAddress()], + annmodel.SomeBool()) + self.write_barrier_ptr = None self.write_barrier_from_array_ptr = None if GCClass.needs_write_barrier: @@ -986,6 +990,11 @@ op = hop.spaceop hop.genop("direct_call", [self.unpin_ptr, self.c_const_gc, op.args[0]]) + def gct_gc__is_pinned(self, hop): + op = hop.spaceop + hop.genop("direct_call", [self._is_pinned_ptr, self.c_const_gc, op.args[0]], + resultvar=op.result) + def gct_gc_thread_run(self, hop): assert self.translator.config.translation.thread if hasattr(self.root_walker, 'thread_run_ptr'): diff --git a/rpython/memory/gctransform/transform.py b/rpython/memory/gctransform/transform.py --- a/rpython/memory/gctransform/transform.py +++ b/rpython/memory/gctransform/transform.py @@ -356,6 +356,12 @@ def gct_gc_unpin(self, hop): pass + def gct_gc__is_pinned(self, hop): + op = hop.spaceop + hop.genop("same_as", + [rmodel.inputconst(lltype.Bool, False)], + resultvar=op.result) + def gct_gc_identityhash(self, hop): # must be implemented in the various GCs raise NotImplementedError diff --git a/rpython/memory/gcwrapper.py b/rpython/memory/gcwrapper.py --- a/rpython/memory/gcwrapper.py +++ b/rpython/memory/gcwrapper.py @@ -128,6 +128,9 @@ def unpin(self, addr): self.gc.unpin(addr) + def _is_pinned(self, addr): + return self.gc._is_pinned(addr) + def weakref_create_getlazy(self, objgetter): # we have to be lazy in reading the llinterp variable containing # the 'obj' pointer, because the gc.malloc() call below could diff --git a/rpython/memory/test/gc_test_base.py b/rpython/memory/test/gc_test_base.py --- a/rpython/memory/test/gc_test_base.py +++ b/rpython/memory/test/gc_test_base.py @@ -810,6 +810,27 @@ else: assert res == 0 or res == 13 + def test__is_pinned(self): + def fn(n): + from rpython.rlib.debug import debug_print + s = str(n) + if not rgc.can_move(s): + return 13 + res = int(rgc.pin(s)) + if res: + res += int(rgc._is_pinned(s)) + rgc.unpin(s) + return res + + res = self.interpret(fn, [10]) + if not self.GCClass.moving_gc: + assert res == 13 + elif self.GCClass.can_usually_pin_objects: + assert res == 2 + else: + assert res == 0 or res == 13 + + from rpython.rlib.objectmodel import UnboxedValue class TaggedBase(object): diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py --- a/rpython/rlib/rgc.py +++ b/rpython/rlib/rgc.py @@ -76,6 +76,24 @@ resulttype=llmemory.Address) hop.genop('gc_unpin', [v_addr]) +def _is_pinned(obj): + """Method to check if 'obj' is pinned.""" + return False + +class IsPinnedEntry(ExtRegistryEntry): + _about_ = _is_pinned + + def compute_result_annotation(self, s_arg): + from rpython.annotator.model import s_Bool + return s_Bool + + def specialize_call(self, hop): + hop.exception_cannot_occur() + v_obj, = hop.inputargs(hop.args_r[0]) + v_addr = hop.genop('cast_ptr_to_adr', [v_obj], + resulttype=llmemory.Address) + return hop.genop('gc__is_pinned', [v_addr], resulttype=lltype.Bool) + # ____________________________________________________________ # Annotation and specialization @@ -136,13 +154,16 @@ on objects that are already a bit old, so have a chance to be already non-movable.""" if not we_are_translated(): - return + return True # XXX: check if True is the right return (groggi) + if _is_pinned(p): + return False i = 0 while can_move(p): if i > 6: raise NotImplementedError("can't make object non-movable!") collect(i) i += 1 + return True def _heap_stats(): raise NotImplementedError # can't be run directly diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py --- a/rpython/rtyper/llinterp.py +++ b/rpython/rtyper/llinterp.py @@ -865,6 +865,9 @@ def op_gc_unpin(self, obj): self.heap.unpin(obj) + def op_gc__is_pinned(self, obj): + return self.heap._is_pinned(obj) + def op_gc_detach_callback_pieces(self): raise NotImplementedError("gc_detach_callback_pieces") def op_gc_reattach_callback_pieces(self): diff --git a/rpython/rtyper/lltypesystem/llheap.py b/rpython/rtyper/lltypesystem/llheap.py --- a/rpython/rtyper/lltypesystem/llheap.py +++ b/rpython/rtyper/lltypesystem/llheap.py @@ -39,3 +39,6 @@ def unpin(obj): raise AssertionError("pin() always returns False, " "so unpin() should not be called") + +def _is_pinned(obj): + return False diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -482,6 +482,7 @@ 'gc_heap_stats' : LLOp(canmallocgc=True), 'gc_pin' : LLOp(canrun=True), # XXX understand this, correct? (groggi) 'gc_unpin' : LLOp(canrun=True), # XXX understand this, correct? (groggi) + 'gc_is__pinned' : LLOp(canrun=True), # XXX understand this, correct? (groggi) 'gc_get_rpy_roots' : LLOp(), 'gc_get_rpy_referents': LLOp(), From noreply at buildbot.pypy.org Mon Jun 23 11:48:20 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 23 Jun 2014 11:48:20 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: (hopefully) finish work on stepwise useage of the nursery Message-ID: <20140623094820.EA8861C023B@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r72157:8306f5ed33e9 Date: 2014-06-23 11:45 +0200 http://bitbucket.org/pypy/pypy/changeset/8306f5ed33e9/ Log: (hopefully) finish work on stepwise useage of the nursery diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -686,15 +686,23 @@ else: self.minor_and_major_collection() - def move_nursery_top(self, totalsize): - size = self.nursery_cleanup - ll_assert(self.nursery_real_top - self.nursery_top >= size, - "nursery_cleanup not a divisor of nursery_size - initial_cleanup") - ll_assert(llmemory.raw_malloc_usage(totalsize) <= size, - "totalsize > nursery_cleanup") + def try_move_nursery_top(self, totalsize): + """Tries to move 'self.nursery_top' to accommodate + an object of 'totalsize'. Returns 'True' on success, otherwise + 'False'. In case of failing (returned 'False') a minor collection + is needed.""" + + # in general we always move 'self.nursery_top' by 'self.nursery_cleanup'. + # However, because of the presence of pinned objects there are cases where + # the GC can't move by 'self.nursery_cleanup' without overflowing the arena. + # For such a case we use the space left in the nursery. + size = min(self.nursery_cleanup, self.nursery_real_top - self.nursery_top) + if llmemory.raw_malloc_usage(totalsize) > size: + return False llarena.arena_reset(self.nursery_top, size, 2) self.nursery_top += size - move_nursery_top._always_inline_ = True + return True + try_move_nursery_top._always_inline_ = True def collect_and_reserve(self, prev_result, totalsize): """To call when nursery_free overflows nursery_top. @@ -728,10 +736,9 @@ else: # # no barriers (i.e. pinned objects) after 'nursery_free'. - # if possible just enlarge the used part of the nursery. - # otherwise we are forced to clean up the nursery. - if self.nursery_top < self.nursery_real_top: - self.move_nursery_top(totalsize) + # If possible just enlarge the used part of the nursery. + # Otherwise we are forced to clean up the nursery. + if self.try_move_nursery_top(totalsize): return prev_result # self.minor_collection() @@ -1623,13 +1630,16 @@ # reset everything after the last pinned object till the end of the arena llarena.arena_reset(prev, self.nursery_real_top - prev, 0) # - # make sure we have some clean space to use after a minor collection - if self.nursery_real_top - prev >= self.nursery_cleanup: + # We assume that there are only a few pinned objects. Therefore, if there + # is 'self.nursery_cleanup' space between the nursery's start ('self.nursery') + # and the last pinned object ('prev'), we conclude that there is enough zeroed + # space inside the arena to use for new allocation. Otherwise we fill + # the nursery with zeros for 'self.nursery_cleanup' of space. + if prev - self.nursery >= self.nursery_cleanup: + nursery_barriers.append(prev) + else: llarena.arena_reset(prev, self.nursery_cleanup, 2) nursery_barriers.append(prev + self.nursery_cleanup) - else: - llarena.arena_reset(prev, self.nursery_real_top - prev, 2) - nursery_barriers.append(self.nursery_real_top) # self.nursery_barriers = nursery_barriers self.surviving_pinned_objects.delete() From noreply at buildbot.pypy.org Mon Jun 23 11:48:15 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 23 Jun 2014 11:48:15 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: fix typo. Message-ID: <20140623094815.F1D401C023B@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r72153:daf02c5649ff Date: 2014-06-16 17:00 +0200 http://bitbucket.org/pypy/pypy/changeset/daf02c5649ff/ Log: fix typo. pypy translates now diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -482,7 +482,7 @@ 'gc_heap_stats' : LLOp(canmallocgc=True), 'gc_pin' : LLOp(canrun=True), # XXX understand this, correct? (groggi) 'gc_unpin' : LLOp(canrun=True), # XXX understand this, correct? (groggi) - 'gc_is__pinned' : LLOp(canrun=True), # XXX understand this, correct? (groggi) + 'gc__is_pinned' : LLOp(canrun=True), # XXX understand this, correct? (groggi) 'gc_get_rpy_roots' : LLOp(), 'gc_get_rpy_referents': LLOp(), From noreply at buildbot.pypy.org Mon Jun 23 11:48:17 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 23 Jun 2014 11:48:17 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: code cleanup for more consistency Message-ID: <20140623094817.3683D1C023B@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r72154:172776044a92 Date: 2014-06-18 13:51 +0200 http://bitbucket.org/pypy/pypy/changeset/172776044a92/ Log: code cleanup for more consistency used 'can_move()' as the template for 'pin()', 'unpin()' and '_is_pinned()' diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -983,17 +983,24 @@ def gct_gc_pin(self, hop): op = hop.spaceop - hop.genop("direct_call", [self.pin_ptr, self.c_const_gc, op.args[0]], + v_addr = hop.genop('cast_ptr_to_adr', [op.args[0]], + resulttype=llmemory.Address) + hop.genop("direct_call", [self.pin_ptr, self.c_const_gc, v_addr], resultvar=op.result) def gct_gc_unpin(self, hop): op = hop.spaceop - hop.genop("direct_call", [self.unpin_ptr, self.c_const_gc, op.args[0]]) + v_addr = hop.genop('cast_ptr_to_adr', [op.args[0]], + resulttype=llmemory.Address) + hop.genop("direct_call", [self.unpin_ptr, self.c_const_gc, v_addr], + resultvar=op.result) def gct_gc__is_pinned(self, hop): op = hop.spaceop - hop.genop("direct_call", [self._is_pinned_ptr, self.c_const_gc, op.args[0]], - resultvar=op.result) + v_addr = hop.genop('cast_ptr_to_adr', [op.args[0]], + resulttype=llmemory.Address) + hop.genop("direct_call", [self._is_pinned_ptr, self.c_const_gc, v_addr], + resultvar=op.result) def gct_gc_thread_run(self, hop): assert self.translator.config.translation.thread diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py --- a/rpython/rlib/rgc.py +++ b/rpython/rlib/rgc.py @@ -45,16 +45,13 @@ class PinEntry(ExtRegistryEntry): _about_ = pin - def compute_result_annotation(self, s_arg): - from rpython.annotator.model import s_Bool - return s_Bool + def compute_result_annotation(self, s_p): + from rpython.annotator import model as annmodel + return annmodel.SomeBool() def specialize_call(self, hop): hop.exception_cannot_occur() - v_obj, = hop.inputargs(hop.args_r[0]) - v_addr = hop.genop('cast_ptr_to_adr', [v_obj], - resulttype=llmemory.Address) - return hop.genop('gc_pin', [v_addr], resulttype=lltype.Bool) + return hop.genop('gc_pin', hop.args_v, resulttype=hop.r_result) def unpin(obj): """Unpin 'obj', allowing it to move again. @@ -66,15 +63,12 @@ class UnpinEntry(ExtRegistryEntry): _about_ = unpin - def compute_result_annotation(self, s_arg): + def compute_result_annotation(self, s_p): pass def specialize_call(self, hop): hop.exception_cannot_occur() - v_obj, = hop.inputargs(hop.args_r[0]) - v_addr = hop.genop('cast_ptr_to_adr', [v_obj], - resulttype=llmemory.Address) - hop.genop('gc_unpin', [v_addr]) + hop.genop('gc_unpin', hop.args_v) def _is_pinned(obj): """Method to check if 'obj' is pinned.""" @@ -83,16 +77,13 @@ class IsPinnedEntry(ExtRegistryEntry): _about_ = _is_pinned - def compute_result_annotation(self, s_arg): - from rpython.annotator.model import s_Bool - return s_Bool + def compute_result_annotation(self, s_p): + from rpython.annotator import model as annmodel + return annmodel.SomeBool() def specialize_call(self, hop): hop.exception_cannot_occur() - v_obj, = hop.inputargs(hop.args_r[0]) - v_addr = hop.genop('cast_ptr_to_adr', [v_obj], - resulttype=llmemory.Address) - return hop.genop('gc__is_pinned', [v_addr], resulttype=lltype.Bool) + return hop.genop('gc__is_pinned', hop.args_v, resulttype=hop.r_result) # ____________________________________________________________ # Annotation and specialization diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py --- a/rpython/rtyper/llinterp.py +++ b/rpython/rtyper/llinterp.py @@ -860,13 +860,16 @@ pass # marker for trackgcroot.py def op_gc_pin(self, obj): - return self.heap.pin(obj) + addr = llmemory.cast_ptr_to_adr(obj) + return self.heap.pin(addr) def op_gc_unpin(self, obj): - self.heap.unpin(obj) + addr = llmemory.cast_ptr_to_adr(obj) + self.heap.unpin(addr) def op_gc__is_pinned(self, obj): - return self.heap._is_pinned(obj) + addr = llmemory.cast_ptr_to_adr(obj) + return self.heap._is_pinned(addr) def op_gc_detach_callback_pieces(self): raise NotImplementedError("gc_detach_callback_pieces") From noreply at buildbot.pypy.org Mon Jun 23 11:48:18 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 23 Jun 2014 11:48:18 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: use 'nursery_cleanup' instead of 'intial_cleanup' Message-ID: <20140623094818.67C781C023B@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r72155:ee12c163a577 Date: 2014-06-18 15:48 +0200 http://bitbucket.org/pypy/pypy/changeset/ee12c163a577/ Log: use 'nursery_cleanup' instead of 'intial_cleanup' diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -1601,33 +1601,32 @@ self.surviving_pinned_objects.length() while self.surviving_pinned_objects.non_empty(): # - next = self.surviving_pinned_objects.pop() - assert next >= prev + cur = self.surviving_pinned_objects.pop() + assert cur >= prev # # clear the arena between the last pinned object (or arena start) # and the pinned object - pinned_obj_size = llarena.getfakearenaaddress(next) - prev + pinned_obj_size = llarena.getfakearenaaddress(cur) - prev llarena.arena_reset(prev, pinned_obj_size, 2) # # clean up object's flags - obj = next + size_gc_header + obj = cur + size_gc_header self.header(obj).tid &= ~GCFLAG_VISITED # # create a new nursery barrier for the pinned object - nursery_barriers.append(next) + nursery_barriers.append(cur) # - # update 'prev' to the end of the 'next' object + # update 'prev' to the end of the 'cur' object prev = prev + pinned_obj_size + \ (size_gc_header + self.get_size(obj)) # - # clean up a bit more after the last pinned object + # reset everything after the last pinned object till the end of the arena llarena.arena_reset(prev, self.nursery_real_top - prev, 0) # - # now we want to have some amount of the nursery ready to be used - # after all the pinned objects. - if prev <= self.nursery_real_top - self.initial_cleanup: - llarena.arena_reset(prev, self.initial_cleanup, 2) - nursery_barriers.append(prev + self.initial_cleanup) + # make sure we have some clean space to use after a minor collection + if self.nursery_real_top - prev >= self.nursery_cleanup: + llarena.arena_reset(prev, self.nursery_cleanup, 2) + nursery_barriers.append(prev + self.nursery_cleanup) else: llarena.arena_reset(prev, self.nursery_real_top - prev, 2) nursery_barriers.append(self.nursery_real_top) From noreply at buildbot.pypy.org Mon Jun 23 12:39:04 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 12:39:04 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: Kill a random indirection: space.startup() must now be called first, Message-ID: <20140623103904.E3DA11C023B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72158:537fb36a7f9d Date: 2014-06-23 12:21 +0200 http://bitbucket.org/pypy/pypy/changeset/537fb36a7f9d/ Log: Kill a random indirection: space.startup() must now be called first, before space.call_function(). diff --git a/pypy/goal/targetpypystandalone.py b/pypy/goal/targetpypystandalone.py --- a/pypy/goal/targetpypystandalone.py +++ b/pypy/goal/targetpypystandalone.py @@ -30,8 +30,6 @@ if w_dict is not None: # for tests w_entry_point = space.getitem(w_dict, space.wrap('entry_point')) w_run_toplevel = space.getitem(w_dict, space.wrap('run_toplevel')) - w_call_finish_gateway = space.wrap(gateway.interp2app(call_finish)) - w_call_startup_gateway = space.wrap(gateway.interp2app(call_startup)) withjit = space.config.objspace.usemodules.pypyjit def entry_point(argv): @@ -53,7 +51,7 @@ argv = argv[:1] + argv[3:] try: try: - space.call_function(w_run_toplevel, w_call_startup_gateway) + space.startup() w_executable = space.wrap(argv[0]) w_argv = space.newlist([space.wrap(s) for s in argv[1:]]) w_exitcode = space.call_function(w_entry_point, w_executable, w_argv) @@ -69,7 +67,7 @@ return 1 finally: try: - space.call_function(w_run_toplevel, w_call_finish_gateway) + space.finish() except OperationError, e: debug("OperationError:") debug(" operror-type: " + e.w_type.getname(space)) @@ -184,11 +182,6 @@ 'pypy_thread_attach': pypy_thread_attach, 'pypy_setup_home': pypy_setup_home} -def call_finish(space): - space.finish() - -def call_startup(space): - space.startup() # _____ Define and setup target ___ From noreply at buildbot.pypy.org Mon Jun 23 13:24:03 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 23 Jun 2014 13:24:03 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: keep 'prev_result' up-to-date in 'collect_and_reserve'. Message-ID: <20140623112403.E40931C32C7@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r72159:056197b70af4 Date: 2014-06-23 13:14 +0200 http://bitbucket.org/pypy/pypy/changeset/056197b70af4/ Log: keep 'prev_result' up-to-date in 'collect_and_reserve'. fixes failing test 'test_collect_during_collect' diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -733,6 +733,12 @@ # nursery. self.nursery_free = self.nursery_top + pinned_obj_size self.nursery_top = self.nursery_barriers.popleft() + # + # because we encountered a barrier, we also have to fix + # 'prev_result' as the one provided as a method parameter + # can't be used as there is no space between 'prev_result' + # and the barrier for 'totalsize'. + prev_result = self.nursery_free else: # # no barriers (i.e. pinned objects) after 'nursery_free'. From noreply at buildbot.pypy.org Mon Jun 23 13:39:49 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 13:39:49 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: Oops, documented but forgot to call _make_sure_does_not_move() Message-ID: <20140623113949.B8F471C31FE@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72160:bb24a270b876 Date: 2014-06-23 13:28 +0200 http://bitbucket.org/pypy/pypy/changeset/bb24a270b876/ Log: Oops, documented but forgot to call _make_sure_does_not_move() diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py --- a/rpython/rlib/rthread.py +++ b/rpython/rlib/rthread.py @@ -313,7 +313,9 @@ assert isinstance(value, Cls) or value is None if we_are_translated(): from rpython.rtyper.annlowlevel import cast_instance_to_base_ptr + from rpython.rlib.rgc import _make_sure_does_not_move ptr = cast_instance_to_base_ptr(value) + _make_sure_does_not_move(ptr) llop.threadlocalref_set(lltype.Void, opaque_id, ptr) else: self.local.value = value diff --git a/rpython/rlib/test/test_rthread.py b/rpython/rlib/test/test_rthread.py --- a/rpython/rlib/test/test_rthread.py +++ b/rpython/rlib/test/test_rthread.py @@ -222,6 +222,7 @@ def f(): x1 = FooBar() t.set(x1) + import gc; gc.collect() assert t.get() is x1 return 42 fn = self.getcompiled(f, []) @@ -238,4 +239,4 @@ gcpolicy = 'boehm' class TestUsingFramework(AbstractThreadTests): - gcpolicy = 'generation' + gcpolicy = 'minimark' From noreply at buildbot.pypy.org Mon Jun 23 13:54:59 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 13:54:59 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: Must take a GCREF argument Message-ID: <20140623115500.0080A1C31FE@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72161:bbf90bac634b Date: 2014-06-23 13:54 +0200 http://bitbucket.org/pypy/pypy/changeset/bbf90bac634b/ Log: Must take a GCREF argument diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py --- a/rpython/rlib/rthread.py +++ b/rpython/rlib/rthread.py @@ -315,7 +315,8 @@ from rpython.rtyper.annlowlevel import cast_instance_to_base_ptr from rpython.rlib.rgc import _make_sure_does_not_move ptr = cast_instance_to_base_ptr(value) - _make_sure_does_not_move(ptr) + gcref = lltype.cast_opaque_ptr(llmemory.GCREF, ptr) + _make_sure_does_not_move(gcref) llop.threadlocalref_set(lltype.Void, opaque_id, ptr) else: self.local.value = value From noreply at buildbot.pypy.org Mon Jun 23 16:19:39 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 23 Jun 2014 16:19:39 +0200 (CEST) Subject: [pypy-commit] pypy utf8-unicode2: Steal functionality from runicode and start fixing _codec functions Message-ID: <20140623141939.C77911C31FE@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: utf8-unicode2 Changeset: r72162:3f2065730015 Date: 2014-06-23 09:18 -0500 http://bitbucket.org/pypy/pypy/changeset/3f2065730015/ Log: Steal functionality from runicode and start fixing _codec functions diff too long, truncating to 2000 out of 3250 lines diff --git a/pypy/interpreter/test/test_utf8.py b/pypy/interpreter/test/test_utf8.py --- a/pypy/interpreter/test/test_utf8.py +++ b/pypy/interpreter/test/test_utf8.py @@ -61,5 +61,3 @@ assert s[0:1] == u'A' assert s[0:2] == u'A\u010F' assert s[1:2] == u'\u010F' - assert s[-4:-3] == u'A' - assert s[-4:-2] == u'A\u010F' diff --git a/pypy/interpreter/test/test_utf8_codecs.py b/pypy/interpreter/test/test_utf8_codecs.py new file mode 100644 --- /dev/null +++ b/pypy/interpreter/test/test_utf8_codecs.py @@ -0,0 +1,792 @@ +# -*- coding: utf-8 -*- + +import py +import sys, random + +from pypy.interpreter.utf8 import Utf8Str +from pypy.interpreter import utf8_codecs + +''' +try: + import signal +except ImportError: + pass +else: + class MyKeyboardInterrupt(BaseException): + pass + def _interrupt(*args): + __tracebackhide__ = True + raise MyKeyboardInterrupt + signal.signal(signal.SIGINT, _interrupt) +''' + +class UnicodeTests(object): + def typeequals(self, x, y): + assert x == y + assert type(x) is type(y) + + def getdecoder(self, encoding): + return getattr(utf8_codecs, "str_decode_%s" % encoding.replace("-", "_")) + + def getencoder(self, encoding): + return getattr(utf8_codecs, + "unicode_encode_%s" % encoding.replace("-", "_")) + + def checkdecode(self, s, encoding): + decoder = self.getdecoder(encoding) + try: + if isinstance(s, str): + trueresult = s.decode(encoding) + else: + trueresult = s + s = s.encode(encoding) + except LookupError, e: + py.test.skip(e) + trueresult = Utf8Str.from_unicode(trueresult) + result, consumed = decoder(s, len(s), True) + assert consumed == len(s) + self.typeequals(trueresult, result) + + def checkencode(self, s, encoding): + encoder = self.getencoder(encoding) + try: + if isinstance(s, unicode): + trueresult = s.encode(encoding) + else: + trueresult = s + s = s.decode(encoding) + except LookupError, e: + py.test.skip(e) + s = Utf8Str.from_unicode(s) + result = encoder(s, len(s), True) + self.typeequals(trueresult, result) + + def checkencodeerror(self, s, encoding, start, stop): + called = [False] + def errorhandler(errors, enc, msg, t, startingpos, + endingpos): + called[0] = True + assert errors == "foo!" + assert enc == encoding + assert t is s + assert start == startingpos + assert stop == endingpos + return "42424242", None, stop + encoder = self.getencoder(encoding) + result = encoder(s, len(s), "foo!", errorhandler) + assert called[0] + assert "42424242" in result + + # ensure bytes results passthru + def errorhandler_bytes(errors, enc, msg, t, startingpos, + endingpos): + return None, '\xc3', endingpos + result = encoder(s, len(s), "foo!", errorhandler_bytes) + assert '\xc3' in result + + def checkdecodeerror(self, s, encoding, start, stop, + addstuff=True, msg=None): + called = [0] + def errorhandler(errors, enc, errmsg, t, startingpos, + endingpos): + called[0] += 1 + if called[0] == 1: + assert errors == "foo!" + assert enc == encoding.replace('-', '') + assert t is s + assert start == startingpos + assert stop == endingpos + if msg is not None: + assert errmsg == msg + return "42424242", stop + return "", endingpos + decoder = self.getdecoder(encoding) + if addstuff: + s += "some rest in ascii" + result, _ = decoder(s, len(s), "foo!", True, errorhandler) + assert called[0] > 0 + assert "42424242" in result + if addstuff: + assert result.endswith("some rest in ascii") + + +class TestDecoding(UnicodeTests): + # XXX test bom recognition in utf-16 + # XXX test proper error handling + + def test_all_ascii(self): + for i in range(128): + for encoding in "utf-8 latin-1 ascii".split(): + self.checkdecode(chr(i), encoding) + + def test_all_first_256(self): + for i in range(256): + for encoding in ("utf-7 utf-8 latin-1 utf-16 utf-16-be utf-16-le " + "utf-32 utf-32-be utf-32-le").split(): + self.checkdecode(unichr(i), encoding) + + def test_first_10000(self): + for i in range(10000): + for encoding in ("utf-7 utf-8 utf-16 utf-16-be utf-16-le " + "utf-32 utf-32-be utf-32-le").split(): + if encoding == 'utf-8' and 0xd800 <= i <= 0xdfff: + # Don't try to encode lone surrogates + continue + self.checkdecode(unichr(i), encoding) + + def test_random(self): + for i in range(10000): + v = random.randrange(sys.maxunicode) + if 0xd800 <= v <= 0xdfff: + continue + uni = unichr(v) + if sys.version >= "2.7": + self.checkdecode(uni, "utf-7") + for encoding in ("utf-8 utf-16 utf-16-be utf-16-le " + "utf-32 utf-32-be utf-32-le").split(): + self.checkdecode(uni, encoding) + + def test_maxunicode(self): + uni = unichr(sys.maxunicode) + if sys.version >= "2.7": + self.checkdecode(uni, "utf-7") + for encoding in ("utf-8 utf-16 utf-16-be utf-16-le " + "utf-32 utf-32-be utf-32-le").split(): + self.checkdecode(uni, encoding) + + def test_ascii_error(self): + self.checkdecodeerror("abc\xFF\xFF\xFFcde", "ascii", 3, 4) + + def test_decode_replace(self): + decoder = self.getdecoder('utf-8') + assert decoder('caf\xe9', 4, 'replace', True) == (u'caf\ufffd', 4) + + def test_utf16_errors(self): + # trunkated BOM + for s in ["\xff", "\xfe"]: + self.checkdecodeerror(s, "utf-16", 0, len(s), addstuff=False) + + for s in [ + # unexpected end of data ascii + "\xff\xfeF", + # unexpected end of data + '\xff\xfe\xc0\xdb\x00', '\xff\xfe\xc0\xdb', '\xff\xfe\xc0', + ]: + self.checkdecodeerror(s, "utf-16", 2, len(s), addstuff=False) + for s in [ + # illegal surrogate + "\xff\xfe\xff\xdb\xff\xff", + ]: + self.checkdecodeerror(s, "utf-16", 2, 4, addstuff=False) + + def test_utf16_bugs(self): + s = '\x80-\xe9\xdeL\xa3\x9b' + py.test.raises(UnicodeDecodeError, utf8_codecs.str_decode_utf_16_le, + s, len(s), True) + + def test_utf7_bugs(self): + u = Utf8Str.from_unicode(u'A\u2262\u0391.') + assert utf8_codecs.unicode_encode_utf_7(u, len(u), None) == 'A+ImIDkQ.' + + def test_utf7_tofrom_utf8_bug(self): + def _assert_decu7(input, expected): + assert (utf8_codecs.str_decode_utf_7(input, len(input), None) == + (expected, len(input))) + + _assert_decu7('+-', u'+') + _assert_decu7('+-+-', u'++') + _assert_decu7('+-+AOQ-', u'+\xe4') + _assert_decu7('+AOQ-', u'\xe4') + _assert_decu7('+AOQ-', u'\xe4') + _assert_decu7('+AOQ- ', u'\xe4 ') + _assert_decu7(' +AOQ-', u' \xe4') + _assert_decu7(' +AOQ- ', u' \xe4 ') + _assert_decu7('+AOQ-+AOQ-', u'\xe4\xe4') + + s_utf7 = 'Die M+AOQ-nner +AOQ-rgen sich!' + s_utf8 = Utf8Str.from_unicode(u'Die Männer ärgen sich!') + s_utf8_esc = Utf8Str.from_unicode(u'Die M\xe4nner \xe4rgen sich!') + + _assert_decu7(s_utf7, s_utf8_esc) + _assert_decu7(s_utf7, s_utf8) + + assert utf8_codecs.unicode_encode_utf_7(s_utf8_esc, len(s_utf8_esc), None) == s_utf7 + assert utf8_codecs.unicode_encode_utf_7(s_utf8, len(s_utf8_esc), None) == s_utf7 + + def test_utf7_partial(self): + s = u"a+-b".encode('utf-7') + assert s == "a+--b" + decode = self.getdecoder('utf-7') + assert decode(s, 1, None) == (u'a', 1) + assert decode(s, 2, None) == (u'a', 1) + assert decode(s, 3, None) == (u'a+', 3) + assert decode(s, 4, None) == (u'a+-', 4) + assert decode(s, 5, None) == (u'a+-b', 5) + + def test_utf7_surrogates(self): + encode_ = self.getencoder('utf-7') + encode = lambda u, s, err: encode_(Utf8Str.from_unicode(u), s, err) + decode = self.getdecoder('utf-7') + + u = Utf8Str.from_unicode(u'\U000abcde') + assert encode_(u, len(u), None) == '+2m/c3g-' + + # Unpaired surrogates are passed through + assert encode(u'\uD801', 1, None) == '+2AE-' + assert encode(u'\uD801x', 2, None) == '+2AE-x' + assert encode(u'\uDC01', 1, None) == '+3AE-' + assert encode(u'\uDC01x', 2, None) == '+3AE-x' + assert decode('+2AE-', 5, None) == (u'\uD801', 5) + assert decode('+2AE-x', 6, None) == (u'\uD801x', 6) + assert decode('+3AE-', 5, None) == (u'\uDC01', 5) + assert decode('+3AE-x', 6, None) == (u'\uDC01x', 6) + + u = Utf8Str.from_unicode(u'\uD801\U000abcde') + assert encode_(u, len(u), None) == '+2AHab9ze-' + assert decode('+2AHab9ze-', 10, None) == (u'\uD801\U000abcde', 10) + + +class TestUTF8Decoding(UnicodeTests): + def __init__(self): + self.decoder = self.getdecoder('utf-8') + + def to_bytestring(self, bytes): + return ''.join(chr(int(c, 16)) for c in bytes.split()) + + def test_single_chars_utf8(self): + for s in ["\xd7\x90", "\xd6\x96", "\xeb\x96\x95", "\xf0\x90\x91\x93"]: + self.checkdecode(s, "utf-8") + + def test_utf8_surrogate(self): + # surrogates used to be allowed by python 2.x + py.test.raises(UnicodeDecodeError, self.checkdecode, u"\ud800", "utf-8") + + def test_invalid_start_byte(self): + """ + Test that an 'invalid start byte' error is raised when the first byte + is not in the ASCII range or is not a valid start byte of a 2-, 3-, or + 4-bytes sequence. The invalid start byte is replaced with a single + U+FFFD when errors='replace'. + E.g. <80> is a continuation byte and can appear only after a start byte. + """ + FFFD = u'\ufffd' + for byte in '\x80\xA0\x9F\xBF\xC0\xC1\xF5\xFF': + py.test.raises(UnicodeDecodeError, self.decoder, byte, 1, None, final=True) + self.checkdecodeerror(byte, 'utf-8', 0, 1, addstuff=False, + msg='invalid start byte') + assert self.decoder(byte, 1, 'replace', final=True) == (FFFD, 1) + assert (self.decoder('aaaa' + byte + 'bbbb', 9, 'replace', + final=True) == + (u'aaaa'+ FFFD + u'bbbb', 9)) + assert self.decoder(byte, 1, 'ignore', final=True) == (u'', 1) + assert (self.decoder('aaaa' + byte + 'bbbb', 9, 'ignore', + final=True) == (u'aaaabbbb', 9)) + + def test_unexpected_end_of_data(self): + """ + Test that an 'unexpected end of data' error is raised when the string + ends after a start byte of a 2-, 3-, or 4-bytes sequence without having + enough continuation bytes. The incomplete sequence is replaced with a + single U+FFFD when errors='replace'. + E.g. in the sequence , F3 is the start byte of a 4-bytes + sequence, but it's followed by only 2 valid continuation bytes and the + last continuation bytes is missing. + Note: the continuation bytes must be all valid, if one of them is + invalid another error will be raised. + """ + sequences = [ + 'C2', 'DF', + 'E0 A0', 'E0 BF', 'E1 80', 'E1 BF', 'EC 80', 'EC BF', + 'ED 80', 'ED 9F', 'EE 80', 'EE BF', 'EF 80', 'EF BF', + 'F0 90', 'F0 BF', 'F0 90 80', 'F0 90 BF', 'F0 BF 80', 'F0 BF BF', + 'F1 80', 'F1 BF', 'F1 80 80', 'F1 80 BF', 'F1 BF 80', 'F1 BF BF', + 'F3 80', 'F3 BF', 'F3 80 80', 'F3 80 BF', 'F3 BF 80', 'F3 BF BF', + 'F4 80', 'F4 8F', 'F4 80 80', 'F4 80 BF', 'F4 8F 80', 'F4 8F BF' + ] + FFFD = u'\ufffd' + for seq in sequences: + seq = self.to_bytestring(seq) + py.test.raises(UnicodeDecodeError, self.decoder, seq, len(seq), + None, final=True) + self.checkdecodeerror(seq, 'utf-8', 0, len(seq), addstuff=False, + msg='unexpected end of data') + assert self.decoder(seq, len(seq), 'replace', final=True + ) == (FFFD, len(seq)) + assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, + 'replace', final=True) == + (u'aaaa'+ FFFD + u'bbbb', len(seq) + 8)) + assert self.decoder(seq, len(seq), 'ignore', final=True + ) == (u'', len(seq)) + assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, 'ignore', + final=True) == (u'aaaabbbb', len(seq) + 8)) + + def test_invalid_cb_for_2bytes_seq(self): + """ + Test that an 'invalid continuation byte' error is raised when the + continuation byte of a 2-bytes sequence is invalid. The start byte + is replaced by a single U+FFFD and the second byte is handled + separately when errors='replace'. + E.g. in the sequence , C2 is the start byte of a 2-bytes + sequence, but 41 is not a valid continuation byte because it's the + ASCII letter 'A'. + """ + FFFD = u'\ufffd' + FFFDx2 = FFFD * 2 + sequences = [ + ('C2 00', FFFD+u'\x00'), ('C2 7F', FFFD+u'\x7f'), + ('C2 C0', FFFDx2), ('C2 FF', FFFDx2), + ('DF 00', FFFD+u'\x00'), ('DF 7F', FFFD+u'\x7f'), + ('DF C0', FFFDx2), ('DF FF', FFFDx2), + ] + for seq, res in sequences: + seq = self.to_bytestring(seq) + py.test.raises(UnicodeDecodeError, self.decoder, seq, len(seq), + None, final=True) + self.checkdecodeerror(seq, 'utf-8', 0, 1, addstuff=False, + msg='invalid continuation byte') + assert self.decoder(seq, len(seq), 'replace', final=True + ) == (res, len(seq)) + assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, + 'replace', final=True) == + (u'aaaa' + res + u'bbbb', len(seq) + 8)) + res = res.replace(FFFD, u'') + assert self.decoder(seq, len(seq), 'ignore', final=True + ) == (res, len(seq)) + assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, + 'ignore', final=True) == + (u'aaaa' + res + u'bbbb', len(seq) + 8)) + + def test_invalid_cb_for_3bytes_seq(self): + """ + Test that an 'invalid continuation byte' error is raised when the + continuation byte(s) of a 3-bytes sequence are invalid. When + errors='replace', if the first continuation byte is valid, the first + two bytes (start byte + 1st cb) are replaced by a single U+FFFD and the + third byte is handled separately, otherwise only the start byte is + replaced with a U+FFFD and the other continuation bytes are handled + separately. + E.g. in the sequence , E1 is the start byte of a 3-bytes + sequence, 80 is a valid continuation byte, but 41 is not a valid cb + because it's the ASCII letter 'A'. + Note: when the start byte is E0 or ED, the valid ranges for the first + continuation byte are limited to A0..BF and 80..9F respectively. + However, when the start byte is ED, Python 2 considers all the bytes + in range 80..BF valid. This is fixed in Python 3. + """ + FFFD = u'\ufffd' + FFFDx2 = FFFD * 2 + sequences = [ + ('E0 00', FFFD+u'\x00'), ('E0 7F', FFFD+u'\x7f'), ('E0 80', FFFDx2), + ('E0 9F', FFFDx2), ('E0 C0', FFFDx2), ('E0 FF', FFFDx2), + ('E0 A0 00', FFFD+u'\x00'), ('E0 A0 7F', FFFD+u'\x7f'), + ('E0 A0 C0', FFFDx2), ('E0 A0 FF', FFFDx2), + ('E0 BF 00', FFFD+u'\x00'), ('E0 BF 7F', FFFD+u'\x7f'), + ('E0 BF C0', FFFDx2), ('E0 BF FF', FFFDx2), ('E1 00', FFFD+u'\x00'), + ('E1 7F', FFFD+u'\x7f'), ('E1 C0', FFFDx2), ('E1 FF', FFFDx2), + ('E1 80 00', FFFD+u'\x00'), ('E1 80 7F', FFFD+u'\x7f'), + ('E1 80 C0', FFFDx2), ('E1 80 FF', FFFDx2), + ('E1 BF 00', FFFD+u'\x00'), ('E1 BF 7F', FFFD+u'\x7f'), + ('E1 BF C0', FFFDx2), ('E1 BF FF', FFFDx2), ('EC 00', FFFD+u'\x00'), + ('EC 7F', FFFD+u'\x7f'), ('EC C0', FFFDx2), ('EC FF', FFFDx2), + ('EC 80 00', FFFD+u'\x00'), ('EC 80 7F', FFFD+u'\x7f'), + ('EC 80 C0', FFFDx2), ('EC 80 FF', FFFDx2), + ('EC BF 00', FFFD+u'\x00'), ('EC BF 7F', FFFD+u'\x7f'), + ('EC BF C0', FFFDx2), ('EC BF FF', FFFDx2), ('ED 00', FFFD+u'\x00'), + ('ED 7F', FFFD+u'\x7f'), + # ('ED A0', FFFDx2), ('ED BF', FFFDx2), # see note ^ + ('ED C0', FFFDx2), ('ED FF', FFFDx2), ('ED 80 00', FFFD+u'\x00'), + ('ED 80 7F', FFFD+u'\x7f'), ('ED 80 C0', FFFDx2), + ('ED 80 FF', FFFDx2), ('ED 9F 00', FFFD+u'\x00'), + ('ED 9F 7F', FFFD+u'\x7f'), ('ED 9F C0', FFFDx2), + ('ED 9F FF', FFFDx2), ('EE 00', FFFD+u'\x00'), + ('EE 7F', FFFD+u'\x7f'), ('EE C0', FFFDx2), ('EE FF', FFFDx2), + ('EE 80 00', FFFD+u'\x00'), ('EE 80 7F', FFFD+u'\x7f'), + ('EE 80 C0', FFFDx2), ('EE 80 FF', FFFDx2), + ('EE BF 00', FFFD+u'\x00'), ('EE BF 7F', FFFD+u'\x7f'), + ('EE BF C0', FFFDx2), ('EE BF FF', FFFDx2), ('EF 00', FFFD+u'\x00'), + ('EF 7F', FFFD+u'\x7f'), ('EF C0', FFFDx2), ('EF FF', FFFDx2), + ('EF 80 00', FFFD+u'\x00'), ('EF 80 7F', FFFD+u'\x7f'), + ('EF 80 C0', FFFDx2), ('EF 80 FF', FFFDx2), + ('EF BF 00', FFFD+u'\x00'), ('EF BF 7F', FFFD+u'\x7f'), + ('EF BF C0', FFFDx2), ('EF BF FF', FFFDx2), + ] + for seq, res in sequences: + seq = self.to_bytestring(seq) + py.test.raises(UnicodeDecodeError, self.decoder, seq, len(seq), + None, final=True) + self.checkdecodeerror(seq, 'utf-8', 0, len(seq)-1, addstuff=False, + msg='invalid continuation byte') + assert self.decoder(seq, len(seq), 'replace', final=True + ) == (res, len(seq)) + assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, + 'replace', final=True) == + (u'aaaa' + res + u'bbbb', len(seq) + 8)) + res = res.replace(FFFD, u'') + assert self.decoder(seq, len(seq), 'ignore', final=True + ) == (res, len(seq)) + assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, 'ignore', + final=True) == (u'aaaa' + res + u'bbbb', len(seq) + 8)) + + def test_invalid_cb_for_4bytes_seq(self): + """ + Test that an 'invalid continuation byte' error is raised when the + continuation byte(s) of a 4-bytes sequence are invalid. When + errors='replace',the start byte and all the following valid + continuation bytes are replaced with a single U+FFFD, and all the bytes + starting from the first invalid continuation bytes (included) are + handled separately. + E.g. in the sequence , E1 is the start byte of a 3-bytes + sequence, 80 is a valid continuation byte, but 41 is not a valid cb + because it's the ASCII letter 'A'. + Note: when the start byte is E0 or ED, the valid ranges for the first + continuation byte are limited to A0..BF and 80..9F respectively. + However, when the start byte is ED, Python 2 considers all the bytes + in range 80..BF valid. This is fixed in Python 3. + """ + FFFD = u'\ufffd' + FFFDx2 = FFFD * 2 + sequences = [ + ('F0 00', FFFD+u'\x00'), ('F0 7F', FFFD+u'\x7f'), ('F0 80', FFFDx2), + ('F0 8F', FFFDx2), ('F0 C0', FFFDx2), ('F0 FF', FFFDx2), + ('F0 90 00', FFFD+u'\x00'), ('F0 90 7F', FFFD+u'\x7f'), + ('F0 90 C0', FFFDx2), ('F0 90 FF', FFFDx2), + ('F0 BF 00', FFFD+u'\x00'), ('F0 BF 7F', FFFD+u'\x7f'), + ('F0 BF C0', FFFDx2), ('F0 BF FF', FFFDx2), + ('F0 90 80 00', FFFD+u'\x00'), ('F0 90 80 7F', FFFD+u'\x7f'), + ('F0 90 80 C0', FFFDx2), ('F0 90 80 FF', FFFDx2), + ('F0 90 BF 00', FFFD+u'\x00'), ('F0 90 BF 7F', FFFD+u'\x7f'), + ('F0 90 BF C0', FFFDx2), ('F0 90 BF FF', FFFDx2), + ('F0 BF 80 00', FFFD+u'\x00'), ('F0 BF 80 7F', FFFD+u'\x7f'), + ('F0 BF 80 C0', FFFDx2), ('F0 BF 80 FF', FFFDx2), + ('F0 BF BF 00', FFFD+u'\x00'), ('F0 BF BF 7F', FFFD+u'\x7f'), + ('F0 BF BF C0', FFFDx2), ('F0 BF BF FF', FFFDx2), + ('F1 00', FFFD+u'\x00'), ('F1 7F', FFFD+u'\x7f'), ('F1 C0', FFFDx2), + ('F1 FF', FFFDx2), ('F1 80 00', FFFD+u'\x00'), + ('F1 80 7F', FFFD+u'\x7f'), ('F1 80 C0', FFFDx2), + ('F1 80 FF', FFFDx2), ('F1 BF 00', FFFD+u'\x00'), + ('F1 BF 7F', FFFD+u'\x7f'), ('F1 BF C0', FFFDx2), + ('F1 BF FF', FFFDx2), ('F1 80 80 00', FFFD+u'\x00'), + ('F1 80 80 7F', FFFD+u'\x7f'), ('F1 80 80 C0', FFFDx2), + ('F1 80 80 FF', FFFDx2), ('F1 80 BF 00', FFFD+u'\x00'), + ('F1 80 BF 7F', FFFD+u'\x7f'), ('F1 80 BF C0', FFFDx2), + ('F1 80 BF FF', FFFDx2), ('F1 BF 80 00', FFFD+u'\x00'), + ('F1 BF 80 7F', FFFD+u'\x7f'), ('F1 BF 80 C0', FFFDx2), + ('F1 BF 80 FF', FFFDx2), ('F1 BF BF 00', FFFD+u'\x00'), + ('F1 BF BF 7F', FFFD+u'\x7f'), ('F1 BF BF C0', FFFDx2), + ('F1 BF BF FF', FFFDx2), ('F3 00', FFFD+u'\x00'), + ('F3 7F', FFFD+u'\x7f'), ('F3 C0', FFFDx2), ('F3 FF', FFFDx2), + ('F3 80 00', FFFD+u'\x00'), ('F3 80 7F', FFFD+u'\x7f'), + ('F3 80 C0', FFFDx2), ('F3 80 FF', FFFDx2), + ('F3 BF 00', FFFD+u'\x00'), ('F3 BF 7F', FFFD+u'\x7f'), + ('F3 BF C0', FFFDx2), ('F3 BF FF', FFFDx2), + ('F3 80 80 00', FFFD+u'\x00'), ('F3 80 80 7F', FFFD+u'\x7f'), + ('F3 80 80 C0', FFFDx2), ('F3 80 80 FF', FFFDx2), + ('F3 80 BF 00', FFFD+u'\x00'), ('F3 80 BF 7F', FFFD+u'\x7f'), + ('F3 80 BF C0', FFFDx2), ('F3 80 BF FF', FFFDx2), + ('F3 BF 80 00', FFFD+u'\x00'), ('F3 BF 80 7F', FFFD+u'\x7f'), + ('F3 BF 80 C0', FFFDx2), ('F3 BF 80 FF', FFFDx2), + ('F3 BF BF 00', FFFD+u'\x00'), ('F3 BF BF 7F', FFFD+u'\x7f'), + ('F3 BF BF C0', FFFDx2), ('F3 BF BF FF', FFFDx2), + ('F4 00', FFFD+u'\x00'), ('F4 7F', FFFD+u'\x7f'), ('F4 90', FFFDx2), + ('F4 BF', FFFDx2), ('F4 C0', FFFDx2), ('F4 FF', FFFDx2), + ('F4 80 00', FFFD+u'\x00'), ('F4 80 7F', FFFD+u'\x7f'), + ('F4 80 C0', FFFDx2), ('F4 80 FF', FFFDx2), + ('F4 8F 00', FFFD+u'\x00'), ('F4 8F 7F', FFFD+u'\x7f'), + ('F4 8F C0', FFFDx2), ('F4 8F FF', FFFDx2), + ('F4 80 80 00', FFFD+u'\x00'), ('F4 80 80 7F', FFFD+u'\x7f'), + ('F4 80 80 C0', FFFDx2), ('F4 80 80 FF', FFFDx2), + ('F4 80 BF 00', FFFD+u'\x00'), ('F4 80 BF 7F', FFFD+u'\x7f'), + ('F4 80 BF C0', FFFDx2), ('F4 80 BF FF', FFFDx2), + ('F4 8F 80 00', FFFD+u'\x00'), ('F4 8F 80 7F', FFFD+u'\x7f'), + ('F4 8F 80 C0', FFFDx2), ('F4 8F 80 FF', FFFDx2), + ('F4 8F BF 00', FFFD+u'\x00'), ('F4 8F BF 7F', FFFD+u'\x7f'), + ('F4 8F BF C0', FFFDx2), ('F4 8F BF FF', FFFDx2) + ] + for seq, res in sequences: + seq = self.to_bytestring(seq) + py.test.raises(UnicodeDecodeError, self.decoder, seq, len(seq), + None, final=True) + self.checkdecodeerror(seq, 'utf-8', 0, len(seq)-1, addstuff=False, + msg='invalid continuation byte') + assert self.decoder(seq, len(seq), 'replace', final=True + ) == (res, len(seq)) + assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, + 'replace', final=True) == + (u'aaaa' + res + u'bbbb', len(seq) + 8)) + res = res.replace(FFFD, u'') + assert self.decoder(seq, len(seq), 'ignore', final=True + ) == (res, len(seq)) + assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, 'ignore', + final=True) == (u'aaaa' + res + u'bbbb', len(seq) + 8)) + + def test_utf8_errors(self): + # unexpected end of data + for s in ['\xd7', '\xd6', '\xeb\x96', '\xf0\x90\x91', '\xc2', '\xdf']: + self.checkdecodeerror(s, 'utf-8', 0, len(s), addstuff=False, + msg='unexpected end of data') + + # invalid data 2 byte + for s in ["\xd7\x50", "\xd6\x06", "\xd6\xD6"]: + self.checkdecodeerror(s, "utf-8", 0, 1, addstuff=True, + msg='invalid continuation byte') + # invalid data 3 byte + for s in ["\xeb\x56\x95", "\xeb\x06\x95", "\xeb\xD6\x95"]: + self.checkdecodeerror(s, "utf-8", 0, 1, addstuff=True, + msg='invalid continuation byte') + for s in ["\xeb\x96\x55", "\xeb\x96\x05", "\xeb\x96\xD5"]: + self.checkdecodeerror(s, "utf-8", 0, 2, addstuff=True, + msg='invalid continuation byte') + # invalid data 4 byte + for s in ["\xf0\x50\x91\x93", "\xf0\x00\x91\x93", "\xf0\xd0\x91\x93"]: + self.checkdecodeerror(s, "utf-8", 0, 1, addstuff=True, + msg='invalid continuation byte') + for s in ["\xf0\x90\x51\x93", "\xf0\x90\x01\x93", "\xf0\x90\xd1\x93"]: + self.checkdecodeerror(s, "utf-8", 0, 2, addstuff=True, + msg='invalid continuation byte') + for s in ["\xf0\x90\x91\x53", "\xf0\x90\x91\x03", "\xf0\x90\x91\xd3"]: + self.checkdecodeerror(s, "utf-8", 0, 3, addstuff=True, + msg='invalid continuation byte') + + def test_issue8271(self): + # From CPython + # Issue #8271: during the decoding of an invalid UTF-8 byte sequence, + # only the start byte and the continuation byte(s) are now considered + # invalid, instead of the number of bytes specified by the start byte. + # See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (page 95, + # table 3-8, Row 2) for more information about the algorithm used. + FFFD = u'\ufffd' + sequences = [ + # invalid start bytes + ('\x80', FFFD), # continuation byte + ('\x80\x80', FFFD*2), # 2 continuation bytes + ('\xc0', FFFD), + ('\xc0\xc0', FFFD*2), + ('\xc1', FFFD), + ('\xc1\xc0', FFFD*2), + ('\xc0\xc1', FFFD*2), + # with start byte of a 2-byte sequence + ('\xc2', FFFD), # only the start byte + ('\xc2\xc2', FFFD*2), # 2 start bytes + ('\xc2\xc2\xc2', FFFD*3), # 2 start bytes + ('\xc2\x41', FFFD+'A'), # invalid continuation byte + # with start byte of a 3-byte sequence + ('\xe1', FFFD), # only the start byte + ('\xe1\xe1', FFFD*2), # 2 start bytes + ('\xe1\xe1\xe1', FFFD*3), # 3 start bytes + ('\xe1\xe1\xe1\xe1', FFFD*4), # 4 start bytes + ('\xe1\x80', FFFD), # only 1 continuation byte + ('\xe1\x41', FFFD+'A'), # invalid continuation byte + ('\xe1\x41\x80', FFFD+'A'+FFFD), # invalid cb followed by valid cb + ('\xe1\x41\x41', FFFD+'AA'), # 2 invalid continuation bytes + ('\xe1\x80\x41', FFFD+'A'), # only 1 valid continuation byte + ('\xe1\x80\xe1\x41', FFFD*2+'A'), # 1 valid and the other invalid + ('\xe1\x41\xe1\x80', FFFD+'A'+FFFD), # 1 invalid and the other valid + # with start byte of a 4-byte sequence + ('\xf1', FFFD), # only the start byte + ('\xf1\xf1', FFFD*2), # 2 start bytes + ('\xf1\xf1\xf1', FFFD*3), # 3 start bytes + ('\xf1\xf1\xf1\xf1', FFFD*4), # 4 start bytes + ('\xf1\xf1\xf1\xf1\xf1', FFFD*5), # 5 start bytes + ('\xf1\x80', FFFD), # only 1 continuation bytes + ('\xf1\x80\x80', FFFD), # only 2 continuation bytes + ('\xf1\x80\x41', FFFD+'A'), # 1 valid cb and 1 invalid + ('\xf1\x80\x41\x41', FFFD+'AA'), # 1 valid cb and 1 invalid + ('\xf1\x80\x80\x41', FFFD+'A'), # 2 valid cb and 1 invalid + ('\xf1\x41\x80', FFFD+'A'+FFFD), # 1 invalid cv and 1 valid + ('\xf1\x41\x80\x80', FFFD+'A'+FFFD*2), # 1 invalid cb and 2 invalid + ('\xf1\x41\x80\x41', FFFD+'A'+FFFD+'A'), # 2 invalid cb and 1 invalid + ('\xf1\x41\x41\x80', FFFD+'AA'+FFFD), # 1 valid cb and 1 invalid + ('\xf1\x41\xf1\x80', FFFD+'A'+FFFD), + ('\xf1\x41\x80\xf1', FFFD+'A'+FFFD*2), + ('\xf1\xf1\x80\x41', FFFD*2+'A'), + ('\xf1\x41\xf1\xf1', FFFD+'A'+FFFD*2), + # with invalid start byte of a 4-byte sequence (rfc2279) + ('\xf5', FFFD), # only the start byte + ('\xf5\xf5', FFFD*2), # 2 start bytes + ('\xf5\x80', FFFD*2), # only 1 continuation byte + ('\xf5\x80\x80', FFFD*3), # only 2 continuation byte + ('\xf5\x80\x80\x80', FFFD*4), # 3 continuation bytes + ('\xf5\x80\x41', FFFD*2+'A'), # 1 valid cb and 1 invalid + ('\xf5\x80\x41\xf5', FFFD*2+'A'+FFFD), + ('\xf5\x41\x80\x80\x41', FFFD+'A'+FFFD*2+'A'), + # with invalid start byte of a 5-byte sequence (rfc2279) + ('\xf8', FFFD), # only the start byte + ('\xf8\xf8', FFFD*2), # 2 start bytes + ('\xf8\x80', FFFD*2), # only one continuation byte + ('\xf8\x80\x41', FFFD*2 + 'A'), # 1 valid cb and 1 invalid + ('\xf8\x80\x80\x80\x80', FFFD*5), # invalid 5 bytes seq with 5 bytes + # with invalid start byte of a 6-byte sequence (rfc2279) + ('\xfc', FFFD), # only the start byte + ('\xfc\xfc', FFFD*2), # 2 start bytes + ('\xfc\x80\x80', FFFD*3), # only 2 continuation bytes + ('\xfc\x80\x80\x80\x80\x80', FFFD*6), # 6 continuation bytes + # invalid start byte + ('\xfe', FFFD), + ('\xfe\x80\x80', FFFD*3), + # other sequences + ('\xf1\x80\x41\x42\x43', u'\ufffd\x41\x42\x43'), + ('\xf1\x80\xff\x42\x43', u'\ufffd\ufffd\x42\x43'), + ('\xf1\x80\xc2\x81\x43', u'\ufffd\x81\x43'), + ('\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64', + u'\x61\uFFFD\uFFFD\uFFFD\x62\uFFFD\x63\uFFFD\uFFFD\x64'), + ] + + for n, (seq, res) in enumerate(sequences): + decoder = self.getdecoder('utf-8') + py.test.raises(UnicodeDecodeError, decoder, seq, len(seq), None, final=True) + assert decoder(seq, len(seq), 'replace', final=True + ) == (res, len(seq)) + assert decoder(seq + 'b', len(seq) + 1, 'replace', final=True + ) == (res + u'b', len(seq) + 1) + res = res.replace(FFFD, u'') + assert decoder(seq, len(seq), 'ignore', final=True + ) == (res, len(seq)) + + +class TestEncoding(UnicodeTests): + def test_all_ascii(self): + for i in range(128): + if sys.version >= "2.7": + self.checkencode(unichr(i), "utf-7") + for encoding in "utf-8 latin-1 ascii".split(): + self.checkencode(unichr(i), encoding) + + def test_all_first_256(self): + for i in range(256): + if sys.version >= "2.7": + self.checkencode(unichr(i), "utf-7") + for encoding in ("utf-8 utf-16 utf-16-be utf-16-le " + "utf-32 utf-32-be utf-32-le").split(): + self.checkencode(unichr(i), encoding) + + def test_first_10000(self): + for i in range(10000): + if sys.version >= "2.7": + self.checkencode(unichr(i), "utf-7") + for encoding in ("utf-8 utf-16 utf-16-be utf-16-le " + "utf-32 utf-32-be utf-32-le").split(): + self.checkencode(unichr(i), encoding) + + def test_random(self): + for i in range(10000): + v = random.randrange(sys.maxunicode) + if 0xd800 <= v <= 0xdfff: + continue + uni = unichr(v) + if sys.version >= "2.7": + self.checkencode(uni, "utf-7") + for encoding in ("utf-8 utf-16 utf-16-be utf-16-le " + "utf-32 utf-32-be utf-32-le").split(): + self.checkencode(uni, encoding) + + def test_maxunicode(self): + uni = unichr(sys.maxunicode) + if sys.version >= "2.7": + self.checkencode(uni, "utf-7") + for encoding in ("utf-8 utf-16 utf-16-be utf-16-le " + "utf-32 utf-32-be utf-32-le").split(): + self.checkencode(uni, encoding) + + def test_empty(self): + for encoding in ("utf-8 utf-16 utf-16-be utf-16-le " + "utf-32 utf-32-be utf-32-le").split(): + self.checkencode(u'', encoding) + + def test_single_chars_utf8(self): + # check every number of bytes per char + for s in ["\xd7\x90", "\xd6\x96", "\xeb\x96\x95", "\xf0\x90\x91\x93"]: + self.checkencode(s, "utf-8") + + # TODO: Is this test useful? + def test_utf8_surrogates(self): + # make sure that the string itself is not marshalled + u = u"\ud800" + for i in range(4): + u += u"\udc00" + + if utf8_codecs.MAXUNICODE < 65536: + # Check replacing of two surrogates by single char while encoding + self.checkencode(u, "utf-8") + else: + # This is not done in wide unicode builds + py.test.raises(UnicodeEncodeError, self.checkencode, u, "utf-8") + + def test_ascii_error(self): + self.checkencodeerror( + Utf8Str.from_unicode(u"abc\xFF\xFF\xFFcde"), "ascii", 3, 6) + + def test_latin1_error(self): + self.checkencodeerror( + Utf8Str.from_unicode(u"abc\uffff\uffff\uffffcde"), "latin-1", 3, 6) + + def test_mbcs(self): + if sys.platform != 'win32': + py.test.skip("mbcs encoding is win32-specific") + self.checkencode(u'encoding test', "mbcs") + self.checkdecode('decoding test', "mbcs") + # XXX test this on a non-western Windows installation + self.checkencode(u"\N{GREEK CAPITAL LETTER PHI}", "mbcs") # a F + self.checkencode(u"\N{GREEK CAPITAL LETTER PSI}", "mbcs") # a ? + + def test_mbcs_decode_force_ignore(self): + if sys.platform != 'win32': + py.test.skip("mbcs encoding is win32-specific") + + # XXX: requires a locale w/ a restrictive encoding to test + from rpython.rlib.rlocale import getdefaultlocale + if getdefaultlocale()[1] != 'cp932': + py.test.skip("requires cp932 locale") + + s = '\xff\xf4\x8f\xbf\xbf' + decoder = self.getdecoder('mbcs') + assert decoder(s, len(s), 'strict') == (u'\U0010ffff', 5) + py.test.raises(UnicodeEncodeError, decoder, s, len(s), 'strict', + force_ignore=False) + + def test_mbcs_encode_force_replace(self): + if sys.platform != 'win32': + py.test.skip("mbcs encoding is win32-specific") + u = u'@test_2224_tmp-?L??\udc80' + encoder = self.getencoder('mbcs') + assert encoder(u, len(u), 'strict') == '@test_2224_tmp-?L???' + py.test.raises(UnicodeEncodeError, encoder, u, len(u), 'strict', + force_replace=False) + + def test_encode_decimal(self): + encoder = self.getencoder('decimal') + assert encoder(u' 12, 34 ', 8, None) == ' 12, 34 ' + py.test.raises(UnicodeEncodeError, encoder, u' 12, \u1234 ', 7, None) + assert encoder(u'u\u1234', 2, 'replace') == 'u?' + + +# TODO: Do I need to actually skip these? +class TestTranslation(object): + def setup_class(cls): + if utf8_codecs.MAXUNICODE != sys.maxunicode: + py.test.skip("these tests cannot run on the llinterp") + + def test_utf8(self): + from rpython.rtyper.test.test_llinterp import interpret + def f(x): + + s1 = "".join(["\xd7\x90\xd6\x96\xeb\x96\x95\xf0\x90\x91\x93"] * x) + u, consumed = utf8_codecs.str_decode_utf_8(s1, len(s1), True) + s2 = utf8_codecs.unicode_encode_utf_8(u, len(u), True) + return s1 == s2 + res = interpret(f, [2]) + assert res + + def test_encode_surrogate_pair(self): + u = runicode.UNICHR(0xD800) + runicode.UNICHR(0xDC00) + if runicode.MAXUNICODE < 65536: + # Narrow unicode build, consider utf16 surrogate pairs + assert runicode.unicode_encode_unicode_escape( + u, len(u), True) == r'\U00010000' + assert runicode.unicode_encode_raw_unicode_escape( + u, len(u), True) == r'\U00010000' + else: + # Wide unicode build, don't merge utf16 surrogate pairs + assert runicode.unicode_encode_unicode_escape( + u, len(u), True) == r'\ud800\udc00' + assert runicode.unicode_encode_raw_unicode_escape( + u, len(u), True) == r'\ud800\udc00' diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -34,25 +34,25 @@ # These functions take and return unwrapped rpython strings and unicodes def decode_unicode_escape(space, string): - from pypy.interpreter.utf8 import decode_unicode_escape + from pypy.interpreter.utf8_codecs import str_decode_unicode_escape state = space.fromcache(interp_codecs.CodecState) unicodedata_handler = state.get_unicodedata_handler(space) - result, consumed = decode_unicode_escape( + result, consumed = str_decode_unicode_escape( string, len(string), "strict", final=True, errorhandler=decode_error_handler(space), unicodedata_handler=unicodedata_handler) return result def decode_raw_unicode_escape(space, string): - from pypy.interpreter.utf8 import decode_raw_unicode_escape - result, consumed = decode_raw_unicode_escape( + from pypy.interpreter.utf8_codecs import str_decode_raw_unicode_escape + result, consumed = str_decode_raw_unicode_escape( string, len(string), "strict", final=True, errorhandler=decode_error_handler(space)) return result def decode_utf8(space, string): - from pypy.interpreter.utf8 import decode_utf_8 - result, consumed = decode_utf_8( + from pypy.interpreter.utf8_codecs import str_decode_utf_8 + result, consumed = str_decode_utf_8( string, len(string), "strict", final=True, errorhandler=decode_error_handler(space), allow_surrogates=True) @@ -60,4 +60,4 @@ def encode_utf8(space, uni): # unicode to string... - return s.bytes + return uni.bytes diff --git a/pypy/interpreter/utf8.py b/pypy/interpreter/utf8.py --- a/pypy/interpreter/utf8.py +++ b/pypy/interpreter/utf8.py @@ -1,8 +1,7 @@ from rpython.rlib.rstring import StringBuilder from rpython.rlib.objectmodel import specialize from rpython.rlib.runicode import utf8_code_length - -MAXUNICODE = 0x10ffff +from rpython.rlib.rarithmetic import r_uint def utf8chr(value): # Like unichr, but returns a Utf8Str object @@ -91,7 +90,7 @@ stop_pos = start # TODO: Is detecting ascii-ness here actually useful? If it will # happen in __init__ anyway, maybe its not worth the extra - # complexity. + # (code) complexity. is_ascii = True while stop_pos < stop: stop_pos += 1 @@ -115,6 +114,54 @@ return False + @specialize.argtype(1) + def __contains__(self, other): + if isinstance(other, Utf8Str): + return other.bytes in self.bytes + if isinstance(other, unicode): + # TODO: Assert fail if translated + return other in unicode(self.bytes, 'utf8') + if isinstance(other, str): + return other in self.bytes + + raise TypeError() + + def __iter__(self): + byte_pos = 0 + while byte_pos < len(self.bytes): + cplen = utf8_code_length[ord(self.bytes[byte_pos])] + yield Utf8Str(self.bytes[byte_pos:byte_pos+cplen]) + byte_pos += cplen + + @specialize.argtype(1) + def find(self, other): + if isinstance(other, Utf8Str): + return self.bytes.find(other.bytes) + if isinstance(other, unicode): + return unicode(self.bytes, 'utf8').find(other) + if isinstance(other, str): + return self.bytes.find(other) + + def rfind(self, other): + if isinstance(other, Utf8Str): + return self.bytes.rfind(other.bytes) + if isinstance(other, unicode): + return unicode(self.bytes, 'utf8').rfind(other) + if isinstance(other, str): + return self.bytes.rfind(other) + + def endswith(self, other): + return self.rfind(other) == len(self) - len(other) + + def as_unicode(self): + """NOT_RPYTHON""" + return self.bytes.decode('utf-8') + + @staticmethod + def from_unicode(u): + """NOT_RPYTHON""" + return Utf8Str(u.encode('utf-8')) + class Utf8Builder(object): @specialize.argtype(1) def __init__(self, init_size=None): @@ -127,7 +174,7 @@ @specialize.argtype(1) def append(self, c): - if isinstance(c, int): + if isinstance(c, int) or isinstance(c, r_uint): if c < 0x80: self._builder.append(chr(c)) elif c < 0x800: @@ -147,8 +194,14 @@ self._is_ascii = False else: raise ValueError("Invalid unicode codepoint > 0x10FFFF.") + elif isinstance(c, Utf8Str): + self._builder.append(c.bytes) + if not c._is_ascii: + self._is_ascii = False else: - # TODO: Only allow ord(c) in [0, 127] + # TODO: Remove this check? + if len(c) == 1: + assert ord(c) < 128 self._builder.append(c) def append_slice(self, s, start, end, is_ascii=False): @@ -159,411 +212,3 @@ def build(self): return Utf8Str(self._builder.build(), self._is_ascii) - -# ____________________________________________________________ -# Escape-parsing functions - -def decode_raw_unicode_escape(s, size, errors, final=False, - errorhandler=None): - if errorhandler is None: - errorhandler = default_unicode_error_decode - if size == 0: - # TODO:? - return Utf8Str('', True), 0 - - result = Utf8Builder(size) - pos = 0 - while pos < size: - ch = s[pos] - - # Non-escape characters are interpreted as Unicode ordinals - if ch != '\\': - result.append(ch) - pos += 1 - continue - - # \u-escapes are only interpreted iff the number of leading - # backslashes is odd - bs = pos - while pos < size: - pos += 1 - if pos == size or s[pos] != '\\': - break - result.append('\\') - - # we have a backslash at the end of the string, stop here - if pos >= size: - result.append('\\') - break - - if ((pos - bs) & 1 == 0 or - pos >= size or - (s[pos] != 'u' and s[pos] != 'U')): - result.append('\\') - result.append(s[pos]) - pos += 1 - continue - - digits = 4 if s[pos] == 'u' else 8 - message = "truncated \\uXXXX" - pos += 1 - pos = hexescape(result, s, pos, digits, - "rawunicodeescape", errorhandler, message, errors) - - return result.build(), pos - -# Specialize on the errorhandler when it's a constant - at specialize.arg_or_var(4) -def decode_unicode_escape(s, size, errors, final=False, - errorhandler=None, - unicodedata_handler=None): - if errorhandler is None: - errorhandler = default_unicode_error_decode - - if size == 0: - return Utf8Str('', True), 0 - - builder = Utf8Builder(size) - pos = 0 - while pos < size: - ch = s[pos] - - # Non-escape characters are interpreted as Unicode ordinals - if ch != '\\': - builder.append(ch) - pos += 1 - continue - - # - Escapes - pos += 1 - if pos >= size: - message = "\\ at end of string" - res, pos = errorhandler(errors, "unicodeescape", - message, s, pos-1, size) - builder.append(res) - continue - - ch = s[pos] - pos += 1 - # \x escapes - if ch == '\n': pass - elif ch == '\\': builder.append('\\') - elif ch == '\'': builder.append('\'') - elif ch == '\"': builder.append('\"') - elif ch == 'b' : builder.append('\b') - elif ch == 'f' : builder.append('\f') - elif ch == 't' : builder.append('\t') - elif ch == 'n' : builder.append('\n') - elif ch == 'r' : builder.append('\r') - elif ch == 'v' : builder.append('\v') - elif ch == 'a' : builder.append('\a') - elif '0' <= ch <= '7': - x = ord(ch) - ord('0') - if pos < size: - ch = s[pos] - if '0' <= ch <= '7': - pos += 1 - x = (x<<3) + ord(ch) - ord('0') - if pos < size: - ch = s[pos] - if '0' <= ch <= '7': - pos += 1 - x = (x<<3) + ord(ch) - ord('0') - builder.append(x) - # hex escapes - # \xXX - elif ch == 'x': - digits = 2 - message = "truncated \\xXX escape" - pos = hexescape(builder, s, pos, digits, - "unicodeescape", errorhandler, message, errors) - - # \uXXXX - elif ch == 'u': - digits = 4 - message = "truncated \\uXXXX escape" - pos = hexescape(builder, s, pos, digits, - "unicodeescape", errorhandler, message, errors) - - # \UXXXXXXXX - elif ch == 'U': - digits = 8 - message = "truncated \\UXXXXXXXX escape" - pos = hexescape(builder, s, pos, digits, - "unicodeescape", errorhandler, message, errors) - - # \N{name} - elif ch == 'N': - message = "malformed \\N character escape" - look = pos - if unicodedata_handler is None: - message = ("\\N escapes not supported " - "(can't load unicodedata module)") - res, pos = errorhandler(errors, "unicodeescape", - message, s, pos-1, size) - builder.append(res) - continue - - if look < size and s[look] == '{': - # look for the closing brace - while look < size and s[look] != '}': - look += 1 - if look < size and s[look] == '}': - # found a name. look it up in the unicode database - message = "unknown Unicode character name" - name = s[pos+1:look] - code = unicodedata_handler.call(name) - if code < 0: - res, pos = errorhandler(errors, "unicodeescape", - message, s, pos-1, look+1) - builder.append(res) - continue - pos = look + 1 - builder.append(code) - else: - res, pos = errorhandler(errors, "unicodeescape", - message, s, pos-1, look+1) - builder.append(res) - else: - res, pos = errorhandler(errors, "unicodeescape", - message, s, pos-1, look+1) - builder.append(res) - else: - builder.append('\\') - builder.append(ch) - - return builder.build(), pos - -hexdigits = "0123456789ABCDEFabcdef" - -def hexescape(builder, s, pos, digits, - encoding, errorhandler, message, errors): - chr = 0 - if pos + digits > len(s): - endinpos = pos - while endinpos < len(s) and s[endinpos] in hexdigits: - endinpos += 1 - res, pos = errorhandler(errors, encoding, - message, s, pos-2, endinpos) - builder.append(res) - else: - try: - chr = r_uint(int(s[pos:pos+digits], 16)) - except ValueError: - endinpos = pos - while s[endinpos] in hexdigits: - endinpos += 1 - res, pos = errorhandler(errors, encoding, - message, s, pos-2, endinpos) - builder.append(res) - else: - # when we get here, chr is a 32-bit unicode character - if chr <= MAXUNICODE: - builder.append(chr) - pos += digits - - else: - message = "illegal Unicode character" - res, pos = errorhandler(errors, encoding, - message, s, pos-2, pos+digits) - builder.append(res) - return pos - -# ____________________________________________________________ - -# Converting bytes (utf8) to unicode? -# I guess we just make sure we're looking at valid utf-8 and then make the -# object? - -def decode_utf_8(s, size, errors, final=False, - errorhandler=None, allow_surrogates=False): - if errorhandler is None: - errorhandler = default_unicode_error_decode - result = Utf8Builder(size) - pos = decode_utf_8_impl(s, size, errors, final, errorhandler, result, - allow_surrogates=allow_surrogates) - return result.build(), pos - -def decode_utf_8_impl(s, size, errors, final, errorhandler, result, - allow_surrogates): - if size == 0: - return 0 - - # TODO: Instead of assembling and then re-disassembling the codepoints, - # just use builder.append_slice - pos = 0 - while pos < size: - ordch1 = ord(s[pos]) - # fast path for ASCII - # XXX maybe use a while loop here - if ordch1 < 0x80: - result.append(ordch1) - pos += 1 - continue - - n = utf8_code_length[ordch1] - if pos + n > size: - if not final: - break - charsleft = size - pos - 1 # either 0, 1, 2 - # note: when we get the 'unexpected end of data' we don't care - # about the pos anymore and we just ignore the value - if not charsleft: - # there's only the start byte and nothing else - r, pos = errorhandler(errors, 'utf8', - 'unexpected end of data', - s, pos, pos+1) - result.append(r) - break - ordch2 = ord(s[pos+1]) - if n == 3: - # 3-bytes seq with only a continuation byte - if (ordch2>>6 != 0x2 or # 0b10 - (ordch1 == 0xe0 and ordch2 < 0xa0)): - # or (ordch1 == 0xed and ordch2 > 0x9f) - # second byte invalid, take the first and continue - r, pos = errorhandler(errors, 'utf8', - 'invalid continuation byte', - s, pos, pos+1) - result.append(r) - continue - else: - # second byte valid, but third byte missing - r, pos = errorhandler(errors, 'utf8', - 'unexpected end of data', - s, pos, pos+2) - result.append(r) - break - elif n == 4: - # 4-bytes seq with 1 or 2 continuation bytes - if (ordch2>>6 != 0x2 or # 0b10 - (ordch1 == 0xf0 and ordch2 < 0x90) or - (ordch1 == 0xf4 and ordch2 > 0x8f)): - # second byte invalid, take the first and continue - r, pos = errorhandler(errors, 'utf8', - 'invalid continuation byte', - s, pos, pos+1) - result.append(r) - continue - elif charsleft == 2 and ord(s[pos+2])>>6 != 0x2: # 0b10 - # third byte invalid, take the first two and continue - r, pos = errorhandler(errors, 'utf8', - 'invalid continuation byte', - s, pos, pos+2) - result.append(r) - continue - else: - # there's only 1 or 2 valid cb, but the others are missing - r, pos = errorhandler(errors, 'utf8', - 'unexpected end of data', - s, pos, pos+charsleft+1) - result.append(r) - break - - if n == 0: - r, pos = errorhandler(errors, 'utf8', - 'invalid start byte', - s, pos, pos+1) - result.append(r) - - elif n == 1: - assert 0, "ascii should have gone through the fast path" - - elif n == 2: - ordch2 = ord(s[pos+1]) - if ordch2>>6 != 0x2: # 0b10 - r, pos = errorhandler(errors, 'utf8', - 'invalid continuation byte', - s, pos, pos+1) - result.append(r) - continue - # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz - result.append(((ordch1 & 0x1F) << 6) + # 0b00011111 - (ordch2 & 0x3F)) # 0b00111111 - pos += 2 - - elif n == 3: - ordch2 = ord(s[pos+1]) - ordch3 = ord(s[pos+2]) - if (ordch2>>6 != 0x2 or # 0b10 - (ordch1 == 0xe0 and ordch2 < 0xa0) - # surrogates shouldn't be valid UTF-8! - or (not allow_surrogates and ordch1 == 0xed and ordch2 > 0x9f) - ): - r, pos = errorhandler(errors, 'utf8', - 'invalid continuation byte', - s, pos, pos+1) - result.append(r) - continue - elif ordch3>>6 != 0x2: # 0b10 - r, pos = errorhandler(errors, 'utf8', - 'invalid continuation byte', - s, pos, pos+2) - result.append(r) - continue - # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz - result.append((((ordch1 & 0x0F) << 12) + # 0b00001111 - ((ordch2 & 0x3F) << 6) + # 0b00111111 - (ordch3 & 0x3F))) # 0b00111111 - pos += 3 - - elif n == 4: - ordch2 = ord(s[pos+1]) - ordch3 = ord(s[pos+2]) - ordch4 = ord(s[pos+3]) - if (ordch2>>6 != 0x2 or # 0b10 - (ordch1 == 0xf0 and ordch2 < 0x90) or - (ordch1 == 0xf4 and ordch2 > 0x8f)): - r, pos = errorhandler(errors, 'utf8', - 'invalid continuation byte', - s, pos, pos+1) - result.append(r) - continue - elif ordch3>>6 != 0x2: # 0b10 - r, pos = errorhandler(errors, 'utf8', - 'invalid continuation byte', - s, pos, pos+2) - result.append(r) - continue - elif ordch4>>6 != 0x2: # 0b10 - r, pos = errorhandler(errors, 'utf8', - 'invalid continuation byte', - s, pos, pos+3) - result.append(r) - continue - # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz - c = (((ordch1 & 0x07) << 18) + # 0b00000111 - ((ordch2 & 0x3F) << 12) + # 0b00111111 - ((ordch3 & 0x3F) << 6) + # 0b00111111 - (ordch4 & 0x3F)) # 0b00111111 - - # TODO: Why doesn't this raise an error when c > MAXUNICODE? If I'm - # converting utf8 -> utf8 is this necessary - if c <= MAXUNICODE: - result.append(c) - pos += 4 - - return pos - -# ____________________________________________________________ -# Default error handlers - - -def default_unicode_error_decode(errors, encoding, msg, s, - startingpos, endingpos): - if errors == 'replace': - return _unicode_error_replacement, endingpos - if errors == 'ignore': - return '', endingpos - raise UnicodeDecodeError(encoding, s, startingpos, endingpos, msg) -_unicode_error_replacement = decode_raw_unicode_escape( - '\ufffd', 1, default_unicode_error_decode) - -def default_unicode_error_encode(errors, encoding, msg, u, - startingpos, endingpos): - if errors == 'replace': - return '?', None, endingpos - if errors == 'ignore': - return '', None, endingpos - raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg) - diff --git a/pypy/interpreter/utf8_codecs.py b/pypy/interpreter/utf8_codecs.py new file mode 100644 --- /dev/null +++ b/pypy/interpreter/utf8_codecs.py @@ -0,0 +1,1598 @@ +import sys + +from rpython.rlib.rstring import StringBuilder +from rpython.rlib.objectmodel import specialize +from rpython.rlib.rarithmetic import r_uint, intmask +from rpython.rlib.unicodedata import unicodedb +from rpython.rlib.runicode import utf8_code_length + +from pypy.interpreter.utf8 import Utf8Str, Utf8Builder, utf8chr, utf8ord + + +BYTEORDER = sys.byteorder +MAXUNICODE = 0x10ffff + +# ____________________________________________________________ +# Unicode escape {{{ + +# Specialize on the errorhandler when it's a constant + at specialize.arg_or_var(4) +def str_decode_unicode_escape(s, size, errors, final=False, + errorhandler=None, + unicodedata_handler=None): + if errorhandler is None: + errorhandler = default_unicode_error_decode + + if size == 0: + return Utf8Str('', True), 0 + + builder = Utf8Builder(size) + pos = 0 + while pos < size: + ch = s[pos] + + # Non-escape characters are interpreted as Unicode ordinals + if ch != '\\': + builder.append(ch) + pos += 1 + continue + + # - Escapes + pos += 1 + if pos >= size: + message = "\\ at end of string" + res, pos = errorhandler(errors, "unicodeescape", + message, s, pos-1, size) + builder.append(res) + continue + + ch = s[pos] + pos += 1 + # \x escapes + if ch == '\n': pass + elif ch == '\\': builder.append('\\') + elif ch == '\'': builder.append('\'') + elif ch == '\"': builder.append('\"') + elif ch == 'b' : builder.append('\b') + elif ch == 'f' : builder.append('\f') + elif ch == 't' : builder.append('\t') + elif ch == 'n' : builder.append('\n') + elif ch == 'r' : builder.append('\r') + elif ch == 'v' : builder.append('\v') + elif ch == 'a' : builder.append('\a') + elif '0' <= ch <= '7': + x = ord(ch) - ord('0') + if pos < size: + ch = s[pos] + if '0' <= ch <= '7': + pos += 1 + x = (x<<3) + ord(ch) - ord('0') + if pos < size: + ch = s[pos] + if '0' <= ch <= '7': + pos += 1 + x = (x<<3) + ord(ch) - ord('0') + builder.append(x) + # hex escapes + # \xXX + elif ch == 'x': + digits = 2 + message = "truncated \\xXX escape" + pos = hexescape(builder, s, pos, digits, + "unicodeescape", errorhandler, message, errors) + + # \uXXXX + elif ch == 'u': + digits = 4 + message = "truncated \\uXXXX escape" + pos = hexescape(builder, s, pos, digits, + "unicodeescape", errorhandler, message, errors) + + # \UXXXXXXXX + elif ch == 'U': + digits = 8 + message = "truncated \\UXXXXXXXX escape" + pos = hexescape(builder, s, pos, digits, + "unicodeescape", errorhandler, message, errors) + + # \N{name} + elif ch == 'N': + message = "malformed \\N character escape" + look = pos + if unicodedata_handler is None: + message = ("\\N escapes not supported " + "(can't load unicodedata module)") + res, pos = errorhandler(errors, "unicodeescape", + message, s, pos-1, size) + builder.append(res) + continue + + if look < size and s[look] == '{': + # look for the closing brace + while look < size and s[look] != '}': + look += 1 + if look < size and s[look] == '}': + # found a name. look it up in the unicode database + message = "unknown Unicode character name" + name = s[pos+1:look] + code = unicodedata_handler.call(name) + if code < 0: + res, pos = errorhandler(errors, "unicodeescape", + message, s, pos-1, look+1) + builder.append(res) + continue + pos = look + 1 + builder.append(code) + else: + res, pos = errorhandler(errors, "unicodeescape", + message, s, pos-1, look+1) + builder.append(res) + else: + res, pos = errorhandler(errors, "unicodeescape", + message, s, pos-1, look+1) + builder.append(res) + else: + builder.append('\\') + builder.append(ch) + + return builder.build(), pos + +hexdigits = "0123456789ABCDEFabcdef" + +def hexescape(builder, s, pos, digits, + encoding, errorhandler, message, errors): + chr = 0 + if pos + digits > len(s): + endinpos = pos + while endinpos < len(s) and s[endinpos] in hexdigits: + endinpos += 1 + res, pos = errorhandler(errors, encoding, + message, s, pos-2, endinpos) + builder.append(res) + else: + try: + chr = r_uint(int(s[pos:pos+digits], 16)) + except ValueError: + endinpos = pos + while s[endinpos] in hexdigits: + endinpos += 1 + res, pos = errorhandler(errors, encoding, + message, s, pos-2, endinpos) + builder.append(res) + else: + # when we get here, chr is a 32-bit unicode character + if chr <= MAXUNICODE: + builder.append(chr) + pos += digits + + else: + message = "illegal Unicode character" + res, pos = errorhandler(errors, encoding, + message, s, pos-2, pos+digits) + builder.append(res) + return pos + +def make_unicode_escape_function(pass_printable=False, unicode_output=False, + quotes=False, prefix=None): + # Python3 has two similar escape functions: One to implement + # encode('unicode_escape') and which outputs bytes, and unicode.__repr__ + # which outputs unicode. They cannot share RPython code, so we generate + # them with the template below. + # Python2 does not really need this, but it reduces diffs between branches. + + if unicode_output: + STRING_BUILDER = Utf8Builder + STR = Utf8Str + else: + STRING_BUILDER = StringBuilder + STR = str + + def unicode_escape(s, size, errors, errorhandler=None): + # errorhandler is not used: this function cannot cause Unicode errors + result = STRING_BUILDER(size) + + if quotes: + if prefix: + result.append(prefix) + if s.find('\'') != -1 and s.find('\"') == -1: + quote = ord('\"') + result.append('"') + else: + quote = ord('\'') + result.append('\'') + else: + quote = 0 + + if size == 0: + return STR('') + + pos = 0 + while pos < size: + #oc = ORD(s, pos) + oc = utf8ord(s, pos) + + # Escape quotes + if quotes and (oc == quote or oc == ord('\\')): + result.append('\\') + result.append(chr(oc)) + pos += 1 + continue + + # Map special whitespace to '\t', \n', '\r' + if oc == ord('\t'): + result.append('\\t') + elif oc == ord('\n'): + result.append('\\n') + elif oc == ord('\r'): + result.append('\\r') + elif oc == ord('\\'): + result.append('\\\\') + + # Map non-printable or non-ascii to '\xhh' or '\uhhhh' + elif pass_printable and not unicodedb.isprintable(oc): + char_escape_helper(result, oc) + elif not pass_printable and (oc < 32 or oc >= 0x7F): + char_escape_helper(result, oc) + + # Copy everything else as-is + else: + # TODO: Is this safe? Will we only have ascii characters here? + result.append(chr(oc)) + pos += 1 + + if quotes: + result.append(chr(quote)) + return result.build() + + def char_escape_helper(result, char): + num = hex(char) + if char >= 0x10000: + result.append("\\U") + zeros = 8 + elif char >= 0x100: + result.append("\\u") + zeros = 4 + else: + result.append("\\x") + zeros = 2 + lnum = len(num) + nb = zeros + 2 - lnum # num starts with '0x' + if nb > 0: + result.append_multiple_char('0', nb) + result.append_slice(num, 2, lnum) + + return unicode_escape, char_escape_helper + +# This function is also used by _codecs/interp_codecs.py +(unicode_encode_unicode_escape, raw_unicode_escape_helper + ) = make_unicode_escape_function() + + +# }}} + +# ____________________________________________________________ +# Raw unicode escape {{{ + +def str_decode_raw_unicode_escape(s, size, errors, final=False, + errorhandler=None): + if errorhandler is None: + errorhandler = default_unicode_error_decode + if size == 0: + # TODO:? + return Utf8Str('', True), 0 + + result = Utf8Builder(size) + pos = 0 + while pos < size: + ch = s[pos] + + # Non-escape characters are interpreted as Unicode ordinals + if ch != '\\': + result.append(ch) + pos += 1 + continue + + # \u-escapes are only interpreted iff the number of leading + # backslashes is odd + bs = pos + while pos < size: + pos += 1 + if pos == size or s[pos] != '\\': + break + result.append('\\') + + # we have a backslash at the end of the string, stop here + if pos >= size: + result.append('\\') + break + + if ((pos - bs) & 1 == 0 or + pos >= size or + (s[pos] != 'u' and s[pos] != 'U')): + result.append('\\') + result.append(s[pos]) + pos += 1 + continue + + digits = 4 if s[pos] == 'u' else 8 + message = "truncated \\uXXXX" + pos += 1 + pos = hexescape(result, s, pos, digits, + "rawunicodeescape", errorhandler, message, errors) + + return result.build(), pos + +def unicode_encode_raw_unicode_escape(s, size, errors, errorhandler=None): + # errorhandler is not used: this function cannot cause Unicode errors + if size == 0: + return '' + result = StringBuilder(size) + pos = 0 + while pos < size: + oc = utf8ord(s, pos) + + if oc < 0x100: + result.append(chr(oc)) + else: + raw_unicode_escape_helper(result, oc) + pos += 1 + + return result.build() + +# }}} + +# ____________________________________________________________ +# ascii & latin-1 {{{ + +def str_decode_latin_1(s, size, errors, final=False, + errorhandler=None): + # latin1 is equivalent to the first 256 ordinals in Unicode. + pos = 0 + result = Utf8Builder(size) + while pos < size: + result.append(ord(s[pos])) + pos += 1 + return result.build(), pos + + +# Specialize on the errorhandler when it's a constant + at specialize.arg_or_var(4) +def str_decode_ascii(s, size, errors, final=False, + errorhandler=None): + # TODO: Is it worth while to try to avoid the making copy by first checking + # the string for errors? + + if errorhandler is None: + errorhandler = default_unicode_error_decode + # ASCII is equivalent to the first 128 ordinals in Unicode. + result = Utf8Builder(size) + pos = 0 + while pos < size: + c = s[pos] + if ord(c) < 128: + result.append(c) + pos += 1 + else: + r, pos = errorhandler(errors, "ascii", "ordinal not in range(128)", + s, pos, pos + 1) + result.append(r) + return result.build(), pos + + +# Specialize on the errorhandler when it's a constant + at specialize.arg_or_var(3) +def unicode_encode_ucs1_helper(p, size, errors, + errorhandler=None, limit=256): + if errorhandler is None: + errorhandler = default_unicode_error_encode + if limit == 256: + reason = "ordinal not in range(256)" + encoding = "latin-1" + else: + reason = "ordinal not in range(128)" + encoding = "ascii" + + if size == 0: + return '' + result = StringBuilder(size) + pos = 0 + while pos < size: + od = utf8ord(p, pos) + + if od < limit: + result.append(chr(od)) + pos += 1 + else: + # startpos for collecting unencodable chars + collstart = pos + collend = pos+1 + while collend < len(p) and utf8ord(p, collend) >= limit: + collend += 1 + ru, rs, pos = errorhandler(errors, encoding, reason, p, + collstart, collend) + if rs is not None: + # py3k only + result.append(rs) + continue + for ch in ru: + if ord(ch) < limit: + result.append(chr(ord(ch))) + else: + errorhandler("strict", encoding, reason, p, + collstart, collend) + + return result.build() + +def unicode_encode_latin_1(p, size, errors, errorhandler=None): + res = unicode_encode_ucs1_helper(p, size, errors, errorhandler, 256) + return res + +def unicode_encode_ascii(p, size, errors, errorhandler=None): + res = unicode_encode_ucs1_helper(p, size, errors, errorhandler, 128) + return res + +# }}} + +# ____________________________________________________________ +# utf-8 {{{ + +# Converting bytes (utf8) to unicode? +# I guess we just make sure we're looking at valid utf-8 and then make the +# object? + +def unicode_encode_utf_8(s, size, errors, errorhandler=None, + allow_surrogates=False): + if size < len(s): + return s.bytes[0:s.index_of_char(size)] + return s.bytes + +def str_decode_utf_8(s, size, errors, final=False, + errorhandler=None, allow_surrogates=False): + if errorhandler is None: + errorhandler = default_unicode_error_decode + result = Utf8Builder(size) + pos = str_decode_utf_8_impl(s, size, errors, final, errorhandler, result, + allow_surrogates=allow_surrogates) + return result.build(), pos + +def str_decode_utf_8_impl(s, size, errors, final, errorhandler, result, + allow_surrogates): + if size == 0: + return 0 + + # TODO: Instead of assembling and then re-disassembling the codepoints, + # just use builder.append_slice + pos = 0 + while pos < size: + ordch1 = ord(s[pos]) + # fast path for ASCII + # XXX maybe use a while loop here + if ordch1 < 0x80: + result.append(ordch1) + pos += 1 + continue + + n = utf8_code_length[ordch1] + if pos + n > size: + if not final: + break + charsleft = size - pos - 1 # either 0, 1, 2 + # note: when we get the 'unexpected end of data' we don't care + # about the pos anymore and we just ignore the value + if not charsleft: + # there's only the start byte and nothing else + r, pos = errorhandler(errors, 'utf8', + 'unexpected end of data', + s, pos, pos+1) + result.append(r) + break + ordch2 = ord(s[pos+1]) + if n == 3: + # 3-bytes seq with only a continuation byte + if (ordch2>>6 != 0x2 or # 0b10 + (ordch1 == 0xe0 and ordch2 < 0xa0)): + # or (ordch1 == 0xed and ordch2 > 0x9f) + # second byte invalid, take the first and continue + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+1) + result.append(r) + continue + else: + # second byte valid, but third byte missing + r, pos = errorhandler(errors, 'utf8', + 'unexpected end of data', + s, pos, pos+2) + result.append(r) + break + elif n == 4: + # 4-bytes seq with 1 or 2 continuation bytes + if (ordch2>>6 != 0x2 or # 0b10 + (ordch1 == 0xf0 and ordch2 < 0x90) or + (ordch1 == 0xf4 and ordch2 > 0x8f)): + # second byte invalid, take the first and continue + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+1) + result.append(r) + continue + elif charsleft == 2 and ord(s[pos+2])>>6 != 0x2: # 0b10 + # third byte invalid, take the first two and continue + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+2) + result.append(r) + continue + else: + # there's only 1 or 2 valid cb, but the others are missing + r, pos = errorhandler(errors, 'utf8', + 'unexpected end of data', + s, pos, pos+charsleft+1) + result.append(r) + break + + if n == 0: + r, pos = errorhandler(errors, 'utf8', + 'invalid start byte', + s, pos, pos+1) + result.append(r) + + elif n == 1: + assert 0, "ascii should have gone through the fast path" + + elif n == 2: + ordch2 = ord(s[pos+1]) + if ordch2>>6 != 0x2: # 0b10 + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+1) + result.append(r) + continue + # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz + result.append(((ordch1 & 0x1F) << 6) + # 0b00011111 + (ordch2 & 0x3F)) # 0b00111111 + pos += 2 + + elif n == 3: + ordch2 = ord(s[pos+1]) + ordch3 = ord(s[pos+2]) + if (ordch2>>6 != 0x2 or # 0b10 + (ordch1 == 0xe0 and ordch2 < 0xa0) + # surrogates shouldn't be valid UTF-8! + or (not allow_surrogates and ordch1 == 0xed and ordch2 > 0x9f) + ): + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+1) + result.append(r) + continue + elif ordch3>>6 != 0x2: # 0b10 + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+2) + result.append(r) + continue + # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz + result.append((((ordch1 & 0x0F) << 12) + # 0b00001111 + ((ordch2 & 0x3F) << 6) + # 0b00111111 + (ordch3 & 0x3F))) # 0b00111111 + pos += 3 + + elif n == 4: + ordch2 = ord(s[pos+1]) + ordch3 = ord(s[pos+2]) + ordch4 = ord(s[pos+3]) + if (ordch2>>6 != 0x2 or # 0b10 + (ordch1 == 0xf0 and ordch2 < 0x90) or + (ordch1 == 0xf4 and ordch2 > 0x8f)): + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+1) + result.append(r) + continue + elif ordch3>>6 != 0x2: # 0b10 + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+2) + result.append(r) + continue + elif ordch4>>6 != 0x2: # 0b10 + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+3) + result.append(r) + continue + # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz + c = (((ordch1 & 0x07) << 18) + # 0b00000111 + ((ordch2 & 0x3F) << 12) + # 0b00111111 + ((ordch3 & 0x3F) << 6) + # 0b00111111 + (ordch4 & 0x3F)) # 0b00111111 + + # TODO: Why doesn't this raise an error when c > MAXUNICODE? If I'm + # converting utf8 -> utf8 is this necessary + if c <= MAXUNICODE: + result.append(c) + pos += 4 + + return pos + +# }}} + +# ____________________________________________________________ +# utf-16 {{{ + +def str_decode_utf_16(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final, + errorhandler, "native") + return result, length + +def str_decode_utf_16_be(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final, + errorhandler, "big") + return result, length From noreply at buildbot.pypy.org Mon Jun 23 18:47:20 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 18:47:20 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: test fix in jit/metainterp/test/test_threadlocal Message-ID: <20140623164720.B349C1C023B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72163:0181317b2980 Date: 2014-06-23 18:46 +0200 http://bitbucket.org/pypy/pypy/changeset/0181317b2980/ Log: test fix in jit/metainterp/test/test_threadlocal diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py --- a/rpython/rlib/rthread.py +++ b/rpython/rlib/rthread.py @@ -314,9 +314,11 @@ if we_are_translated(): from rpython.rtyper.annlowlevel import cast_instance_to_base_ptr from rpython.rlib.rgc import _make_sure_does_not_move + from rpython.rlib.objectmodel import running_on_llinterp ptr = cast_instance_to_base_ptr(value) - gcref = lltype.cast_opaque_ptr(llmemory.GCREF, ptr) - _make_sure_does_not_move(gcref) + if not running_on_llinterp: + gcref = lltype.cast_opaque_ptr(llmemory.GCREF, ptr) + _make_sure_does_not_move(gcref) llop.threadlocalref_set(lltype.Void, opaque_id, ptr) else: self.local.value = value From noreply at buildbot.pypy.org Mon Jun 23 19:08:30 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 19:08:30 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: Translation fix Message-ID: <20140623170830.7B5661C023B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72164:f43924494b76 Date: 2014-06-23 19:07 +0200 http://bitbucket.org/pypy/pypy/changeset/f43924494b76/ Log: Translation fix diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -2357,7 +2357,7 @@ from rpython.jit.backend.x86 import stmtlocal assert isinstance(resloc, RegLoc) effectinfo = op.getdescr().get_extra_info() - assert len(effectinfo.extradescrs) == 1 + assert effectinfo.extradescrs is not None ed = effectinfo.extradescrs[0] assert isinstance(ed, ThreadLocalRefDescr) addr1 = rffi.cast(lltype.Signed, ed.get_tlref_addr()) From noreply at buildbot.pypy.org Mon Jun 23 19:26:08 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 19:26:08 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: Fix reinit_threads() Message-ID: <20140623172608.9D9A21C0CA6@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72165:d6e8e7819baf Date: 2014-06-23 19:18 +0200 http://bitbucket.org/pypy/pypy/changeset/d6e8e7819baf/ Log: Fix reinit_threads() diff --git a/pypy/module/thread/threadlocals.py b/pypy/module/thread/threadlocals.py --- a/pypy/module/thread/threadlocals.py +++ b/pypy/module/thread/threadlocals.py @@ -84,8 +84,10 @@ ident = rthread.get_ident() ec = self.get_ec() assert ec is not None + old_sig = ec._signals_enabled if ident != self._mainthreadident: - ec._signals_enabled += 1 + old_sig += 1 self._cleanup_() self._mainthreadident = ident self._set_ec(ec) + ec._signals_enabled = old_sig From noreply at buildbot.pypy.org Mon Jun 23 19:26:09 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 19:26:09 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: Fix checkmodule() Message-ID: <20140623172609.C7E221C0CA6@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72166:a34550fc5717 Date: 2014-06-23 19:25 +0200 http://bitbucket.org/pypy/pypy/changeset/a34550fc5717/ Log: Fix checkmodule() diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -314,6 +314,9 @@ t = TranslationContext(config=config) self.t = t # for debugging ann = t.buildannotator() + def _do_startup(): + self.threadlocals.enter_thread(self) + ann.build_types(_do_startup, [], complete_now=False) if func is not None: ann.build_types(func, argtypes, complete_now=False) if seeobj_w: From noreply at buildbot.pypy.org Mon Jun 23 19:30:34 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 19:30:34 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: fix test_lloperation Message-ID: <20140623173034.115C01C31FE@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72167:a6d80238a8aa Date: 2014-06-23 19:29 +0200 http://bitbucket.org/pypy/pypy/changeset/a6d80238a8aa/ Log: fix test_lloperation diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py --- a/rpython/rtyper/llinterp.py +++ b/rpython/rtyper/llinterp.py @@ -930,6 +930,9 @@ d = self.llinterpreter.tlrefsdict return d[key._obj] + def op_threadlocalref_getaddr(self, key): + raise NotImplementedError("threadlocalref_getaddr") + # __________________________________________________________ # operations on addresses From noreply at buildbot.pypy.org Mon Jun 23 19:39:24 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 19:39:24 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: Use a more reasonable default for "no__thread", right now True on all Message-ID: <20140623173924.49BC61C31FE@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72168:163fed2c37f8 Date: 2014-06-23 19:38 +0200 http://bitbucket.org/pypy/pypy/changeset/163fed2c37f8/ Log: Use a more reasonable default for "no__thread", right now True on all platforms apart from Linux. diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py --- a/rpython/config/translationoption.py +++ b/rpython/config/translationoption.py @@ -22,6 +22,12 @@ IS_64_BITS = sys.maxint > 2147483647 +SUPPORT__THREAD = ( # whether the particular C compiler supports __thread + sys.platform.startswith("linux")) # Linux works + # OS/X doesn't work, because we still target 10.5/10.6 and the + # minimum required version is 10.7. Windows doesn't work. Please + # add other platforms here if it works on them. + MAINDIR = os.path.dirname(os.path.dirname(__file__)) CACHE_DIR = os.path.realpath(os.path.join(MAINDIR, '_cache')) @@ -156,7 +162,8 @@ # portability options BoolOption("no__thread", "don't use __thread for implementing TLS", - default=False, cmdline="--no__thread", negation=False), + default=not SUPPORT__THREAD, cmdline="--no__thread", + negation=False), IntOption("make_jobs", "Specify -j argument to make for compilation" " (C backend only)", cmdline="--make-jobs", default=detect_number_of_processors()), From noreply at buildbot.pypy.org Mon Jun 23 19:49:17 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 19:49:17 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: Force the inclusion of 'src/threadlocal.*' from here, Message-ID: <20140623174917.8E2A91C31FE@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72169:e0a6a161c834 Date: 2014-06-23 19:48 +0200 http://bitbucket.org/pypy/pypy/changeset/e0a6a161c834/ Log: Force the inclusion of 'src/threadlocal.*' from here, needed in particular when we're compiling the no__thread version. diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py --- a/rpython/rlib/rthread.py +++ b/rpython/rlib/rthread.py @@ -281,6 +281,11 @@ # KEEP THE REFERENCE ALIVE, THE GC DOES NOT FOLLOW THEM SO FAR! # We use _make_sure_does_not_move() to make sure the pointer will not move. +ecitl = ExternalCompilationInfo( + includes = ['src/threadlocal.h'], + separate_module_files = [translator_c_dir / 'src' / 'threadlocal.c']) +ensure_threadlocal = rffi.llexternal_use_eci(ecitl) + class ThreadLocalReference(object): _COUNT = 1 OPAQUEID = lltype.OpaqueType("ThreadLocalRef", @@ -320,6 +325,7 @@ gcref = lltype.cast_opaque_ptr(llmemory.GCREF, ptr) _make_sure_does_not_move(gcref) llop.threadlocalref_set(lltype.Void, opaque_id, ptr) + ensure_threadlocal() else: self.local.value = value From noreply at buildbot.pypy.org Mon Jun 23 20:01:25 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 20:01:25 +0200 (CEST) Subject: [pypy-commit] pypy ec-threadlocal: Ready for merge Message-ID: <20140623180125.7936A1D2DC0@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: ec-threadlocal Changeset: r72170:1fd0f3c8fc6c Date: 2014-06-23 19:56 +0200 http://bitbucket.org/pypy/pypy/changeset/1fd0f3c8fc6c/ Log: Ready for merge From noreply at buildbot.pypy.org Mon Jun 23 20:01:27 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 20:01:27 +0200 (CEST) Subject: [pypy-commit] pypy default: hg merge ec-threadlocal Message-ID: <20140623180127.8003F1D2DC0@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72171:abe70e8eeca9 Date: 2014-06-23 19:59 +0200 http://bitbucket.org/pypy/pypy/changeset/abe70e8eeca9/ Log: hg merge ec-threadlocal Change the executioncontext's lookup to be done by reading a thread- local variable (which is implemented in C using '__thread' if possible, and pthread_getspecific() otherwise). On Linux x86 and x86-64, the JIT backend has a special optimization that lets it emit directly a single MOV from a %gs- or %fs-based address. It seems actually to give a good boost in performance. diff --git a/pypy/goal/targetpypystandalone.py b/pypy/goal/targetpypystandalone.py --- a/pypy/goal/targetpypystandalone.py +++ b/pypy/goal/targetpypystandalone.py @@ -30,8 +30,6 @@ if w_dict is not None: # for tests w_entry_point = space.getitem(w_dict, space.wrap('entry_point')) w_run_toplevel = space.getitem(w_dict, space.wrap('run_toplevel')) - w_call_finish_gateway = space.wrap(gateway.interp2app(call_finish)) - w_call_startup_gateway = space.wrap(gateway.interp2app(call_startup)) withjit = space.config.objspace.usemodules.pypyjit def entry_point(argv): @@ -53,7 +51,7 @@ argv = argv[:1] + argv[3:] try: try: - space.call_function(w_run_toplevel, w_call_startup_gateway) + space.startup() w_executable = space.wrap(argv[0]) w_argv = space.newlist([space.wrap(s) for s in argv[1:]]) w_exitcode = space.call_function(w_entry_point, w_executable, w_argv) @@ -69,7 +67,7 @@ return 1 finally: try: - space.call_function(w_run_toplevel, w_call_finish_gateway) + space.finish() except OperationError, e: debug("OperationError:") debug(" operror-type: " + e.w_type.getname(space)) @@ -184,11 +182,6 @@ 'pypy_thread_attach': pypy_thread_attach, 'pypy_setup_home': pypy_setup_home} -def call_finish(space): - space.finish() - -def call_startup(space): - space.startup() # _____ Define and setup target ___ diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -395,6 +395,7 @@ def startup(self): # To be called before using the space + self.threadlocals.enter_thread(self) # Initialize already imported builtin modules from pypy.interpreter.module import Module @@ -639,30 +640,33 @@ """NOT_RPYTHON: Abstract method that should put some minimal content into the w_builtins.""" - @jit.loop_invariant def getexecutioncontext(self): "Return what we consider to be the active execution context." # Important: the annotator must not see a prebuilt ExecutionContext: # you should not see frames while you translate # so we make sure that the threadlocals never *have* an # ExecutionContext during translation. - if self.config.translating and not we_are_translated(): - assert self.threadlocals.getvalue() is None, ( - "threadlocals got an ExecutionContext during translation!") - try: - return self._ec_during_translation - except AttributeError: - ec = self.createexecutioncontext() - self._ec_during_translation = ec + if not we_are_translated(): + if self.config.translating: + assert self.threadlocals.get_ec() is None, ( + "threadlocals got an ExecutionContext during translation!") + try: + return self._ec_during_translation + except AttributeError: + ec = self.createexecutioncontext() + self._ec_during_translation = ec + return ec + else: + ec = self.threadlocals.get_ec() + if ec is None: + self.threadlocals.enter_thread(self) + ec = self.threadlocals.get_ec() return ec - # normal case follows. The 'thread' module installs a real - # thread-local object in self.threadlocals, so this builds - # and caches a new ec in each thread. - ec = self.threadlocals.getvalue() - if ec is None: - ec = self.createexecutioncontext() - self.threadlocals.setvalue(ec) - return ec + else: + # translated case follows. self.threadlocals is either from + # 'pypy.interpreter.miscutils' or 'pypy.module.thread.threadlocals'. + # the result is assumed to be non-null: enter_thread() was called. + return self.threadlocals.get_ec() def _freeze_(self): return True diff --git a/pypy/interpreter/miscutils.py b/pypy/interpreter/miscutils.py --- a/pypy/interpreter/miscutils.py +++ b/pypy/interpreter/miscutils.py @@ -11,11 +11,11 @@ """ _value = None - def getvalue(self): + def get_ec(self): return self._value - def setvalue(self, value): - self._value = value + def enter_thread(self, space): + self._value = space.createexecutioncontext() def signals_enabled(self): return True diff --git a/pypy/module/thread/__init__.py b/pypy/module/thread/__init__.py --- a/pypy/module/thread/__init__.py +++ b/pypy/module/thread/__init__.py @@ -26,10 +26,11 @@ "NOT_RPYTHON: patches space.threadlocals to use real threadlocals" from pypy.module.thread import gil MixedModule.__init__(self, space, *args) - prev = space.threadlocals.getvalue() + prev_ec = space.threadlocals.get_ec() space.threadlocals = gil.GILThreadLocals() space.threadlocals.initialize(space) - space.threadlocals.setvalue(prev) + if prev_ec is not None: + space.threadlocals._set_ec(prev_ec) from pypy.module.posix.interp_posix import add_fork_hook from pypy.module.thread.os_thread import reinit_threads diff --git a/pypy/module/thread/os_thread.py b/pypy/module/thread/os_thread.py --- a/pypy/module/thread/os_thread.py +++ b/pypy/module/thread/os_thread.py @@ -126,6 +126,8 @@ release = staticmethod(release) def run(space, w_callable, args): + # add the ExecutionContext to space.threadlocals + space.threadlocals.enter_thread(space) try: space.call_args(w_callable, args) except OperationError, e: diff --git a/pypy/module/thread/test/test_gil.py b/pypy/module/thread/test/test_gil.py --- a/pypy/module/thread/test/test_gil.py +++ b/pypy/module/thread/test/test_gil.py @@ -64,13 +64,14 @@ except Exception, e: assert 0 thread.gc_thread_die() + my_gil_threadlocals = gil.GILThreadLocals() def f(): state.data = [] state.datalen1 = 0 state.datalen2 = 0 state.datalen3 = 0 state.datalen4 = 0 - state.threadlocals = gil.GILThreadLocals() + state.threadlocals = my_gil_threadlocals state.threadlocals.setup_threads(space) subident = thread.start_new_thread(bootstrap, ()) mainident = thread.get_ident() diff --git a/pypy/module/thread/threadlocals.py b/pypy/module/thread/threadlocals.py --- a/pypy/module/thread/threadlocals.py +++ b/pypy/module/thread/threadlocals.py @@ -1,4 +1,5 @@ from rpython.rlib import rthread +from rpython.rlib.objectmodel import we_are_translated from pypy.module.thread.error import wrap_thread_error from pypy.interpreter.executioncontext import ExecutionContext @@ -13,53 +14,62 @@ os_thread.bootstrap().""" def __init__(self): + "NOT_RPYTHON" self._valuedict = {} # {thread_ident: ExecutionContext()} self._cleanup_() + self.raw_thread_local = rthread.ThreadLocalReference(ExecutionContext) def _cleanup_(self): self._valuedict.clear() self._mainthreadident = 0 - self._mostrecentkey = 0 # fast minicaching for the common case - self._mostrecentvalue = None # fast minicaching for the common case - def getvalue(self): + def enter_thread(self, space): + "Notification that the current thread is about to start running." + self._set_ec(space.createexecutioncontext()) + + def _set_ec(self, ec): ident = rthread.get_ident() - if ident == self._mostrecentkey: - result = self._mostrecentvalue - else: - value = self._valuedict.get(ident, None) - # slow path: update the minicache - self._mostrecentkey = ident - self._mostrecentvalue = value - result = value - return result + if self._mainthreadident == 0 or self._mainthreadident == ident: + ec._signals_enabled = 1 # the main thread is enabled + self._mainthreadident = ident + self._valuedict[ident] = ec + # This logic relies on hacks and _make_sure_does_not_move(). + # It only works because we keep the 'ec' alive in '_valuedict' too. + self.raw_thread_local.set(ec) - def setvalue(self, value): - ident = rthread.get_ident() - if value is not None: - if self._mainthreadident == 0: - value._signals_enabled = 1 # the main thread is enabled - self._mainthreadident = ident - self._valuedict[ident] = value - else: + def leave_thread(self, space): + "Notification that the current thread is about to stop." + from pypy.module.thread.os_local import thread_is_stopping + ec = self.get_ec() + if ec is not None: try: - del self._valuedict[ident] - except KeyError: - pass - # update the minicache to prevent it from containing an outdated value - self._mostrecentkey = ident - self._mostrecentvalue = value + thread_is_stopping(ec) + finally: + self.raw_thread_local.set(None) + ident = rthread.get_ident() + try: + del self._valuedict[ident] + except KeyError: + pass + + def get_ec(self): + ec = self.raw_thread_local.get() + if not we_are_translated(): + assert ec is self._valuedict.get(rthread.get_ident(), None) + return ec def signals_enabled(self): - ec = self.getvalue() + ec = self.get_ec() return ec is not None and ec._signals_enabled def enable_signals(self, space): - ec = self.getvalue() + ec = self.get_ec() + assert ec is not None ec._signals_enabled += 1 def disable_signals(self, space): - ec = self.getvalue() + ec = self.get_ec() + assert ec is not None new = ec._signals_enabled - 1 if new < 0: raise wrap_thread_error(space, @@ -69,22 +79,15 @@ def getallvalues(self): return self._valuedict - def leave_thread(self, space): - "Notification that the current thread is about to stop." - from pypy.module.thread.os_local import thread_is_stopping - ec = self.getvalue() - if ec is not None: - try: - thread_is_stopping(ec) - finally: - self.setvalue(None) - def reinit_threads(self, space): "Called in the child process after a fork()" ident = rthread.get_ident() - ec = self.getvalue() + ec = self.get_ec() + assert ec is not None + old_sig = ec._signals_enabled if ident != self._mainthreadident: - ec._signals_enabled += 1 + old_sig += 1 self._cleanup_() self._mainthreadident = ident - self.setvalue(ec) + self._set_ec(ec) + ec._signals_enabled = old_sig diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -314,6 +314,9 @@ t = TranslationContext(config=config) self.t = t # for debugging ann = t.buildannotator() + def _do_startup(): + self.threadlocals.enter_thread(self) + ann.build_types(_do_startup, [], complete_now=False) if func is not None: ann.build_types(func, argtypes, complete_now=False) if seeobj_w: diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py --- a/rpython/config/translationoption.py +++ b/rpython/config/translationoption.py @@ -22,6 +22,12 @@ IS_64_BITS = sys.maxint > 2147483647 +SUPPORT__THREAD = ( # whether the particular C compiler supports __thread + sys.platform.startswith("linux")) # Linux works + # OS/X doesn't work, because we still target 10.5/10.6 and the + # minimum required version is 10.7. Windows doesn't work. Please + # add other platforms here if it works on them. + MAINDIR = os.path.dirname(os.path.dirname(__file__)) CACHE_DIR = os.path.realpath(os.path.join(MAINDIR, '_cache')) @@ -156,7 +162,8 @@ # portability options BoolOption("no__thread", "don't use __thread for implementing TLS", - default=False, cmdline="--no__thread", negation=False), + default=not SUPPORT__THREAD, cmdline="--no__thread", + negation=False), IntOption("make_jobs", "Specify -j argument to make for compilation" " (C backend only)", cmdline="--make-jobs", default=detect_number_of_processors()), diff --git a/rpython/jit/backend/llsupport/test/ztranslation_test.py b/rpython/jit/backend/llsupport/test/ztranslation_test.py --- a/rpython/jit/backend/llsupport/test/ztranslation_test.py +++ b/rpython/jit/backend/llsupport/test/ztranslation_test.py @@ -4,6 +4,8 @@ from rpython.rlib.jit import PARAMETERS, dont_look_inside from rpython.rlib.jit import promote from rpython.rlib import jit_hooks +from rpython.rlib.objectmodel import keepalive_until_here +from rpython.rlib.rthread import ThreadLocalReference from rpython.jit.backend.detect_cpu import getcpuclass from rpython.jit.backend.test.support import CCompiledMixin from rpython.jit.codewriter.policy import StopAtXPolicy @@ -21,6 +23,7 @@ # - profiler # - full optimizer # - floats neg and abs + # - threadlocalref_get class Frame(object): _virtualizable_ = ['i'] @@ -28,6 +31,10 @@ def __init__(self, i): self.i = i + class Foo(object): + pass + t = ThreadLocalReference(Foo) + @dont_look_inside def myabs(x): return abs(x) @@ -56,6 +63,7 @@ k = myabs(j) if k - abs(j): raise ValueError if k - abs(-j): raise ValueError + if t.get().nine != 9: raise ValueError return chr(total % 253) # from rpython.rtyper.lltypesystem import lltype, rffi @@ -78,8 +86,12 @@ return res # def main(i, j): + foo = Foo() + foo.nine = -(i + j) + t.set(foo) a_char = f(i, j) a_float = libffi_stuff(i, j) + keepalive_until_here(foo) return ord(a_char) * 10 + int(a_float) expected = main(40, -49) res = self.meta_interp(main, [40, -49]) diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -2351,10 +2351,29 @@ assert isinstance(reg, RegLoc) self.mc.MOV_rr(reg.value, ebp.value) + def threadlocalref_get(self, op, resloc): + # this function is only called on Linux + from rpython.jit.codewriter.jitcode import ThreadLocalRefDescr + from rpython.jit.backend.x86 import stmtlocal + assert isinstance(resloc, RegLoc) + effectinfo = op.getdescr().get_extra_info() + assert effectinfo.extradescrs is not None + ed = effectinfo.extradescrs[0] + assert isinstance(ed, ThreadLocalRefDescr) + addr1 = rffi.cast(lltype.Signed, ed.get_tlref_addr()) + addr0 = stmtlocal.threadlocal_base() + addr = addr1 - addr0 + assert rx86.fits_in_32bits(addr) + mc = self.mc + mc.writechar(stmtlocal.SEGMENT_TL) # prefix + mc.MOV_rj(resloc.value, addr) + + genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST genop_list = [Assembler386.not_implemented_op] * rop._LAST genop_llong_list = {} genop_math_list = {} +genop_tlref_list = {} genop_guard_list = [Assembler386.not_implemented_op_guard] * rop._LAST for name, value in Assembler386.__dict__.iteritems(): diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -2,7 +2,7 @@ """ Register allocation scheme. """ -import os +import os, sys from rpython.jit.backend.llsupport import symbolic from rpython.jit.backend.llsupport.descr import (ArrayDescr, CallDescr, unpack_arraydescr, unpack_fielddescr, unpack_interiorfielddescr) @@ -692,6 +692,15 @@ loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(1)) self.perform_math(op, [loc0], loc0) + TLREF_SUPPORT = sys.platform.startswith('linux') + + def _consider_threadlocalref_get(self, op): + if self.TLREF_SUPPORT: + resloc = self.force_allocate_reg(op.result) + self.assembler.threadlocalref_get(op, resloc) + else: + self._consider_call(op) + def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None): # we need to save registers on the stack: # @@ -769,6 +778,8 @@ return if oopspecindex == EffectInfo.OS_MATH_SQRT: return self._consider_math_sqrt(op) + if oopspecindex == EffectInfo.OS_THREADLOCALREF_GET: + return self._consider_threadlocalref_get(op) self._consider_call(op) def consider_call_may_force(self, op, guard_op): diff --git a/rpython/jit/backend/x86/stmtlocal.py b/rpython/jit/backend/x86/stmtlocal.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/x86/stmtlocal.py @@ -0,0 +1,32 @@ +from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.translator.tool.cbuild import ExternalCompilationInfo +from rpython.jit.backend.x86.arch import WORD + +SEGMENT_FS = '\x64' +SEGMENT_GS = '\x65' + +if WORD == 4: + SEGMENT_TL = SEGMENT_GS + _instruction = "movl %%gs:0, %0" +else: + SEGMENT_TL = SEGMENT_FS + _instruction = "movq %%fs:0, %0" + +eci = ExternalCompilationInfo(post_include_bits=[''' +#define RPY_STM_JIT 1 +static long pypy__threadlocal_base(void) +{ + /* XXX ONLY LINUX WITH GCC/CLANG FOR NOW XXX */ + long result; + asm("%s" : "=r"(result)); + return result; +} +''' % _instruction]) + + +threadlocal_base = rffi.llexternal( + 'pypy__threadlocal_base', + [], lltype.Signed, + compilation_info=eci, + _nowrapper=True, + ) #transactionsafe=True) diff --git a/rpython/jit/codewriter/effectinfo.py b/rpython/jit/codewriter/effectinfo.py --- a/rpython/jit/codewriter/effectinfo.py +++ b/rpython/jit/codewriter/effectinfo.py @@ -22,6 +22,7 @@ OS_STR2UNICODE = 2 # "str.str2unicode" OS_SHRINK_ARRAY = 3 # rgc.ll_shrink_array OS_DICT_LOOKUP = 4 # ll_dict_lookup + OS_THREADLOCALREF_GET = 5 # llop.threadlocalref_get # OS_STR_CONCAT = 22 # "stroruni.concat" OS_STR_SLICE = 23 # "stroruni.slice" diff --git a/rpython/jit/codewriter/jitcode.py b/rpython/jit/codewriter/jitcode.py --- a/rpython/jit/codewriter/jitcode.py +++ b/rpython/jit/codewriter/jitcode.py @@ -117,6 +117,26 @@ raise NotImplementedError +class ThreadLocalRefDescr(AbstractDescr): + # A special descr used as the extradescr in a call to a + # threadlocalref_get function. If the backend supports it, + # it can use this 'get_tlref_addr()' to get the address *in the + # current thread* of the thread-local variable. If, on the current + # platform, the "__thread" variables are implemented as an offset + # from some base register (e.g. %fs on x86-64), then the backend will + # immediately substract the current value of the base register. + # This gives an offset from the base register, and this can be + # written down in an assembler instruction to load the "__thread" + # variable from anywhere. + + def __init__(self, opaque_id): + from rpython.rtyper.lltypesystem.lloperation import llop + from rpython.rtyper.lltypesystem import llmemory + def get_tlref_addr(): + return llop.threadlocalref_getaddr(llmemory.Address, opaque_id) + self.get_tlref_addr = get_tlref_addr + + class LiveVarsInfo(object): def __init__(self, live_i, live_r, live_f): self.live_i = live_i diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py --- a/rpython/jit/codewriter/jtransform.py +++ b/rpython/jit/codewriter/jtransform.py @@ -390,11 +390,15 @@ lst.append(v) def handle_residual_call(self, op, extraargs=[], may_call_jitcodes=False, - oopspecindex=EffectInfo.OS_NONE): + oopspecindex=EffectInfo.OS_NONE, + extraeffect=None, + extradescr=None): """A direct_call turns into the operation 'residual_call_xxx' if it is calling a function that we don't want to JIT. The initial args of 'residual_call_xxx' are the function to call, and its calldescr.""" - calldescr = self.callcontrol.getcalldescr(op, oopspecindex=oopspecindex) + calldescr = self.callcontrol.getcalldescr(op, oopspecindex=oopspecindex, + extraeffect=extraeffect, + extradescr=extradescr) op1 = self.rewrite_call(op, 'residual_call', [op.args[0]] + extraargs, calldescr=calldescr) if may_call_jitcodes or self.callcontrol.calldescr_canraise(calldescr): @@ -1903,6 +1907,18 @@ None) return [op0, op1] + def rewrite_op_threadlocalref_get(self, op): + from rpython.jit.codewriter.jitcode import ThreadLocalRefDescr + opaqueid = op.args[0].value + op1 = self.prepare_builtin_call(op, 'threadlocalref_getter', [], + extra=(opaqueid,), + extrakey=opaqueid._obj) + extradescr = ThreadLocalRefDescr(opaqueid) + return self.handle_residual_call(op1, + oopspecindex=EffectInfo.OS_THREADLOCALREF_GET, + extraeffect=EffectInfo.EF_LOOPINVARIANT, + extradescr=[extradescr]) + # ____________________________________________________________ class NotSupported(Exception): diff --git a/rpython/jit/codewriter/support.py b/rpython/jit/codewriter/support.py --- a/rpython/jit/codewriter/support.py +++ b/rpython/jit/codewriter/support.py @@ -712,6 +712,11 @@ build_ll_1_raw_free_no_track_allocation = ( build_raw_free_builder(track_allocation=False)) + def build_ll_0_threadlocalref_getter(opaqueid): + def _ll_0_threadlocalref_getter(): + return llop.threadlocalref_get(rclass.OBJECTPTR, opaqueid) + return _ll_0_threadlocalref_getter + def _ll_1_weakref_create(obj): return llop.weakref_create(llmemory.WeakRefPtr, obj) diff --git a/rpython/jit/codewriter/test/test_jtransform.py b/rpython/jit/codewriter/test/test_jtransform.py --- a/rpython/jit/codewriter/test/test_jtransform.py +++ b/rpython/jit/codewriter/test/test_jtransform.py @@ -147,6 +147,7 @@ EI.OS_UNIEQ_LENGTHOK: ([PUNICODE, PUNICODE], INT), EI.OS_RAW_MALLOC_VARSIZE_CHAR: ([INT], ARRAYPTR), EI.OS_RAW_FREE: ([ARRAYPTR], lltype.Void), + EI.OS_THREADLOCALREF_GET: ([], rclass.OBJECTPTR), } argtypes = argtypes[oopspecindex] assert argtypes[0] == [v.concretetype for v in op.args[1:]] @@ -157,6 +158,8 @@ assert extraeffect == EI.EF_CAN_RAISE elif oopspecindex == EI.OS_RAW_FREE: assert extraeffect == EI.EF_CANNOT_RAISE + elif oopspecindex == EI.OS_THREADLOCALREF_GET: + assert extraeffect == EI.EF_LOOPINVARIANT else: assert extraeffect == EI.EF_ELIDABLE_CANNOT_RAISE return 'calldescr-%d' % oopspecindex @@ -1300,6 +1303,23 @@ assert op1.result is None assert op2 is None +def test_threadlocalref_get(): + from rpython.rtyper.lltypesystem import rclass + from rpython.rlib.rthread import ThreadLocalReference + OS_THREADLOCALREF_GET = effectinfo.EffectInfo.OS_THREADLOCALREF_GET + class Foo: pass + t = ThreadLocalReference(Foo) + v2 = varoftype(rclass.OBJECTPTR) + c_opaqueid = const(t.opaque_id) + op = SpaceOperation('threadlocalref_get', [c_opaqueid], v2) + tr = Transformer(FakeCPU(), FakeBuiltinCallControl()) + op0 = tr.rewrite_operation(op) + assert op0.opname == 'residual_call_r_r' + assert op0.args[0].value == 'threadlocalref_getter' # pseudo-function as str + assert op0.args[1] == ListOfKind("ref", []) + assert op0.args[2] == 'calldescr-%d' % OS_THREADLOCALREF_GET + assert op0.result == v2 + def test_unknown_operation(): op = SpaceOperation('foobar', [], varoftype(lltype.Void)) tr = Transformer() diff --git a/rpython/jit/metainterp/test/test_threadlocal.py b/rpython/jit/metainterp/test/test_threadlocal.py new file mode 100644 --- /dev/null +++ b/rpython/jit/metainterp/test/test_threadlocal.py @@ -0,0 +1,30 @@ +import py +from rpython.jit.metainterp.test.support import LLJitMixin +from rpython.rlib.rthread import ThreadLocalReference +from rpython.rlib.jit import dont_look_inside + + +class ThreadLocalTest(object): + + def test_threadlocalref_get(self): + class Foo: + pass + t = ThreadLocalReference(Foo) + x = Foo() + + @dont_look_inside + def setup(): + t.set(x) + + def f(): + setup() + if t.get() is x: + return 42 + return -666 + + res = self.interp_operations(f, []) + assert res == 42 + + +class TestLLtype(ThreadLocalTest, LLJitMixin): + pass diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py --- a/rpython/rlib/rthread.py +++ b/rpython/rlib/rthread.py @@ -272,3 +272,65 @@ llop.gc_thread_after_fork(lltype.Void, result_of_fork, opaqueaddr) else: assert opaqueaddr == llmemory.NULL + +# ____________________________________________________________ +# +# Thread-locals. Only for references that change "not too often" -- +# for now, the JIT compiles get() as a loop-invariant, so basically +# don't change them. +# KEEP THE REFERENCE ALIVE, THE GC DOES NOT FOLLOW THEM SO FAR! +# We use _make_sure_does_not_move() to make sure the pointer will not move. + +ecitl = ExternalCompilationInfo( + includes = ['src/threadlocal.h'], + separate_module_files = [translator_c_dir / 'src' / 'threadlocal.c']) +ensure_threadlocal = rffi.llexternal_use_eci(ecitl) + +class ThreadLocalReference(object): + _COUNT = 1 + OPAQUEID = lltype.OpaqueType("ThreadLocalRef", + hints={"threadlocalref": True, + "external": "C", + "c_name": "RPyThreadStaticTLS"}) + + def __init__(self, Cls): + "NOT_RPYTHON: must be prebuilt" + import thread + self.Cls = Cls + self.local = thread._local() # <- NOT_RPYTHON + unique_id = ThreadLocalReference._COUNT + ThreadLocalReference._COUNT += 1 + opaque_id = lltype.opaqueptr(ThreadLocalReference.OPAQUEID, + 'tlref%d' % unique_id) + self.opaque_id = opaque_id + + def get(): + if we_are_translated(): + from rpython.rtyper.lltypesystem import rclass + from rpython.rtyper.annlowlevel import cast_base_ptr_to_instance + ptr = llop.threadlocalref_get(rclass.OBJECTPTR, opaque_id) + return cast_base_ptr_to_instance(Cls, ptr) + else: + return getattr(self.local, 'value', None) + + @jit.dont_look_inside + def set(value): + assert isinstance(value, Cls) or value is None + if we_are_translated(): + from rpython.rtyper.annlowlevel import cast_instance_to_base_ptr + from rpython.rlib.rgc import _make_sure_does_not_move + from rpython.rlib.objectmodel import running_on_llinterp + ptr = cast_instance_to_base_ptr(value) + if not running_on_llinterp: + gcref = lltype.cast_opaque_ptr(llmemory.GCREF, ptr) + _make_sure_does_not_move(gcref) + llop.threadlocalref_set(lltype.Void, opaque_id, ptr) + ensure_threadlocal() + else: + self.local.value = value + + self.get = get + self.set = set + + def _freeze_(self): + return True diff --git a/rpython/rlib/test/test_rthread.py b/rpython/rlib/test/test_rthread.py --- a/rpython/rlib/test/test_rthread.py +++ b/rpython/rlib/test/test_rthread.py @@ -1,4 +1,4 @@ -import gc +import gc, time from rpython.rlib.rthread import * from rpython.translator.c.test.test_boehm import AbstractGCTestClass from rpython.rtyper.lltypesystem import lltype, rffi @@ -29,6 +29,23 @@ else: py.test.fail("Did not raise") +def test_tlref_untranslated(): + class FooBar(object): + pass + t = ThreadLocalReference(FooBar) + results = [] + def subthread(): + x = FooBar() + results.append(t.get() is None) + t.set(x) + results.append(t.get() is x) + time.sleep(0.2) + results.append(t.get() is x) + for i in range(5): + start_new_thread(subthread, ()) + time.sleep(0.5) + assert results == [True] * 15 + class AbstractThreadTests(AbstractGCTestClass): use_threads = True @@ -198,6 +215,20 @@ res = fn() assert res >= 0.95 + def test_tlref(self): + class FooBar(object): + pass + t = ThreadLocalReference(FooBar) + def f(): + x1 = FooBar() + t.set(x1) + import gc; gc.collect() + assert t.get() is x1 + return 42 + fn = self.getcompiled(f, []) + res = fn() + assert res == 42 + #class TestRunDirectly(AbstractThreadTests): # def getcompiled(self, f, argtypes): # return f @@ -208,4 +239,4 @@ gcpolicy = 'boehm' class TestUsingFramework(AbstractThreadTests): - gcpolicy = 'generation' + gcpolicy = 'minimark' diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py --- a/rpython/rtyper/llinterp.py +++ b/rpython/rtyper/llinterp.py @@ -919,6 +919,20 @@ def op_stack_current(self): return 0 + def op_threadlocalref_set(self, key, value): + try: + d = self.llinterpreter.tlrefsdict + except AttributeError: + d = self.llinterpreter.tlrefsdict = {} + d[key._obj] = value + + def op_threadlocalref_get(self, key): + d = self.llinterpreter.tlrefsdict + return d[key._obj] + + def op_threadlocalref_getaddr(self, key): + raise NotImplementedError("threadlocalref_getaddr") + # __________________________________________________________ # operations on addresses diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -541,6 +541,10 @@ 'getslice': LLOp(canraise=(Exception,)), 'check_and_clear_exc': LLOp(), + 'threadlocalref_get': LLOp(sideeffects=False), + 'threadlocalref_getaddr': LLOp(sideeffects=False), + 'threadlocalref_set': LLOp(), + # __________ debugging __________ 'debug_view': LLOp(), 'debug_print': LLOp(canrun=True), diff --git a/rpython/translator/c/node.py b/rpython/translator/c/node.py --- a/rpython/translator/c/node.py +++ b/rpython/translator/c/node.py @@ -959,12 +959,30 @@ args.append('0') yield 'RPyOpaque_SETUP_%s(%s);' % (T.tag, ', '.join(args)) +class ThreadLocalRefOpaqueNode(ContainerNode): + nodekind = 'tlrefopaque' + + def basename(self): + return self.obj._name + + def enum_dependencies(self): + return [] + + def initializationexpr(self, decoration=''): + return ['0'] + + def startupcode(self): + p = self.getptrname() + yield 'RPyThreadStaticTLS_Create(%s);' % (p,) + def opaquenode_factory(db, T, obj): if T == RuntimeTypeInfo: return db.gcpolicy.rtti_node_factory()(db, T, obj) if T.hints.get("render_structure", False): return ExtType_OpaqueNode(db, T, obj) + if T.hints.get("threadlocalref", False): + return ThreadLocalRefOpaqueNode(db, T, obj) raise Exception("don't know about %r" % (T,)) diff --git a/rpython/translator/c/src/g_prerequisite.h b/rpython/translator/c/src/g_prerequisite.h --- a/rpython/translator/c/src/g_prerequisite.h +++ b/rpython/translator/c/src/g_prerequisite.h @@ -23,3 +23,6 @@ # define RPY_LENGTH0 1 /* array decl [0] are bad */ # define RPY_DUMMY_VARLENGTH /* nothing */ #endif + + +#include "src/threadlocal.h" diff --git a/rpython/translator/c/src/stack.c b/rpython/translator/c/src/stack.c --- a/rpython/translator/c/src/stack.c +++ b/rpython/translator/c/src/stack.c @@ -32,12 +32,7 @@ /* XXX We assume that initialization is performed early, when there is still only one thread running. This allows us to ignore race conditions here */ - char *errmsg = RPyThreadStaticTLS_Create(&end_tls_key); - if (errmsg) { - /* XXX should we exit the process? */ - fprintf(stderr, "Internal PyPy error: %s\n", errmsg); - return 1; - } + RPyThreadStaticTLS_Create(&end_tls_key); } baseptr = (char *) RPyThreadStaticTLS_Get(end_tls_key); diff --git a/rpython/translator/c/src/threadlocal.c b/rpython/translator/c/src/threadlocal.c --- a/rpython/translator/c/src/threadlocal.c +++ b/rpython/translator/c/src/threadlocal.c @@ -1,24 +1,28 @@ +#include +#include #include "src/threadlocal.h" #ifdef _WIN32 -char *RPyThreadTLS_Create(RPyThreadTLS *result) +void RPyThreadTLS_Create(RPyThreadTLS *result) { *result = TlsAlloc(); - if (*result == TLS_OUT_OF_INDEXES) - return "out of thread-local storage indexes"; - else - return NULL; + if (*result == TLS_OUT_OF_INDEXES) { + fprintf(stderr, "Internal RPython error: " + "out of thread-local storage indexes"); + abort(); + } } #else -char *RPyThreadTLS_Create(RPyThreadTLS *result) +void RPyThreadTLS_Create(RPyThreadTLS *result) { - if (pthread_key_create(result, NULL) != 0) - return "out of thread-local storage keys"; - else - return NULL; + if (pthread_key_create(result, NULL) != 0) { + fprintf(stderr, "Internal RPython error: " + "out of thread-local storage keys"); + abort(); + } } #endif diff --git a/rpython/translator/c/src/threadlocal.h b/rpython/translator/c/src/threadlocal.h --- a/rpython/translator/c/src/threadlocal.h +++ b/rpython/translator/c/src/threadlocal.h @@ -1,4 +1,7 @@ /* Thread-local storage */ +#ifndef _SRC_THREADLOCAL_H +#define _SRC_THREADLOCAL_H + #ifdef _WIN32 @@ -22,9 +25,10 @@ #ifdef USE___THREAD #define RPyThreadStaticTLS __thread void * -#define RPyThreadStaticTLS_Create(tls) NULL +#define RPyThreadStaticTLS_Create(tls) (void)0 #define RPyThreadStaticTLS_Get(tls) tls #define RPyThreadStaticTLS_Set(tls, value) tls = value +#define OP_THREADLOCALREF_GETADDR(tlref, ptr) ptr = tlref #endif @@ -34,7 +38,13 @@ #define RPyThreadStaticTLS_Create(key) RPyThreadTLS_Create(key) #define RPyThreadStaticTLS_Get(key) RPyThreadTLS_Get(key) #define RPyThreadStaticTLS_Set(key, value) RPyThreadTLS_Set(key, value) -char *RPyThreadTLS_Create(RPyThreadTLS *result); +void RPyThreadTLS_Create(RPyThreadTLS *result); #endif + +#define OP_THREADLOCALREF_SET(tlref, ptr, _) RPyThreadStaticTLS_Set(*tlref, ptr) +#define OP_THREADLOCALREF_GET(tlref, ptr) ptr = RPyThreadStaticTLS_Get(*tlref) + + +#endif /* _SRC_THREADLOCAL_H */ From noreply at buildbot.pypy.org Mon Jun 23 20:01:28 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 20:01:28 +0200 (CEST) Subject: [pypy-commit] pypy default: document merge Message-ID: <20140623180128.B5B341D2DC0@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72172:ffd8d893e36b Date: 2014-06-23 20:00 +0200 http://bitbucket.org/pypy/pypy/changeset/ffd8d893e36b/ Log: document merge diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -22,3 +22,11 @@ conditional_calls). I would expect the net result to be a slight slow-down on some simple benchmarks and a speed-up on bigger programs. + +.. branch: ec-threadlocal +Change the executioncontext's lookup to be done by reading a thread- +local variable (which is implemented in C using '__thread' if +possible, and pthread_getspecific() otherwise). On Linux x86 and +x86-64, the JIT backend has a special optimization that lets it emit +directly a single MOV from a %gs- or %fs-based address. It seems +actually to give a good boost in performance. From noreply at buildbot.pypy.org Mon Jun 23 20:36:22 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 23 Jun 2014 20:36:22 +0200 (CEST) Subject: [pypy-commit] pypy ufuncapi: simplify and assert till ztranslation passes Message-ID: <20140623183622.59AEB1C023B@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: ufuncapi Changeset: r72173:839449360021 Date: 2014-06-23 21:33 +0300 http://bitbucket.org/pypy/pypy/changeset/839449360021/ Log: simplify and assert till ztranslation passes diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py --- a/pypy/module/micronumpy/loop.py +++ b/pypy/module/micronumpy/loop.py @@ -7,7 +7,7 @@ from rpython.rlib.rstring import StringBuilder from rpython.rtyper.lltypesystem import lltype, rffi from pypy.module.micronumpy import support, constants as NPY -from pypy.module.micronumpy.base import W_NDimArray +from pypy.module.micronumpy.base import W_NDimArray, convert_to_array from pypy.module.micronumpy.iterators import PureShapeIter, AxisIter, \ AllButAxisIter from pypy.interpreter.argument import Arguments @@ -82,67 +82,89 @@ obj_state = obj_iter.next(obj_state) return out -setslice_driver = jit.JitDriver(name='numpy_setslice', - greens = ['shapelen', 'dtype'], - reds = 'auto') - call_many_to_one_driver = jit.JitDriver( name='numpy_call_many_to_one', - greens=['shapelen', 'func', 'res_dtype'], + greens=['shapelen', 'nin', 'func', 'res_dtype'], reds='auto') -def call_many_to_one(space, shape, func, res_dtype, w_in, out): +def call_many_to_one(space, shape, func, res_dtype, in_args, out): # out must hav been built. func needs no calc_type, is usually an # external ufunc - iters_and_states = [list(i.create_iter(shape)) for i in w_in] + nin = len(in_args) + in_iters = [None] * nin + in_states = [None] * nin + for i in range(nin): + assert isinstance(in_args[i], W_NDimArray) + in_iter, in_state = in_args[i].create_iter(shape) + in_iters[i] = in_iter + in_states[i] = in_state shapelen = len(shape) + assert isinstance(out, W_NDimArray) out_iter, out_state = out.create_iter(shape) + vals = [None] * nin while not out_iter.done(out_state): call_many_to_one_driver.jit_merge_point(shapelen=shapelen, func=func, - res_dtype=res_dtype) - vals = [i_s[0].getitem(i_s[1]) for i_s in iters_and_states] - arglist = space.wrap(vals) + res_dtype=res_dtype, nin=nin) + for i in range(nin): + vals[i] = in_iters[i].getitem(in_states[i]) + arglist = space.newlist(vals) out_val = space.call_args(func, Arguments.frompacked(space, arglist)) out_iter.setitem(out_state, res_dtype.coerce(space, out_val)) - for i in range(len(iters_and_states)): - iters_and_states[i][1] = iters_and_states[i][0].next(iters_and_states[i][1]) + for i in range(nin): + in_states[i] = in_iters[i].next(in_states[i]) out_state = out_iter.next(out_state) return out call_many_to_many_driver = jit.JitDriver( name='numpy_call_many_to_many', - greens=['shapelen', 'func', 'res_dtype'], + greens=['shapelen', 'nin', 'nout', 'func', 'res_dtype'], reds='auto') -def call_many_to_many(space, shape, func, res_dtype, w_in, w_out): +def call_many_to_many(space, shape, func, res_dtype, in_args, out_args): # out must hav been built. func needs no calc_type, is usually an # external ufunc - in_iters_and_states = [list(i.create_iter(shape)) for i in w_in] + nin = len(in_args) + in_iters = [None] * nin + in_states = [None] * nin + nout = len(out_args) + out_iters = [None] * nout + out_states = [None] * nout + for i in range(nin): + assert isinstance(in_args[i], W_NDimArray) + in_iter, in_state = in_args[i].create_iter(shape) + in_iters[i] = in_iter + in_states[i] = in_state + for i in range(nout): + assert isinstance(out_args[i], W_NDimArray) + out_iter, out_state = out_args[i].create_iter(shape) + out_iters[i] = out_iter + out_states[i] = out_state shapelen = len(shape) - out_iters_and_states = [list(i.create_iter(shape)) for i in w_out] + vals = [None] * nin # what does the function return? - while not out_iters_and_states[0][0].done(out_iters_and_states[0][1]): + while not out_iters[0].done(out_states[0]): call_many_to_many_driver.jit_merge_point(shapelen=shapelen, func=func, - res_dtype=res_dtype) - vals = [i_s[0].getitem(i_s[1]) for i_s in in_iters_and_states] - arglist = space.wrap(vals) + res_dtype=res_dtype, nin=nin, nout=nout) + for i in range(nin): + vals[i] = in_iters[i].getitem(in_states[i]) + arglist = space.newlist(vals) out_vals = space.call_args(func, Arguments.frompacked(space, arglist)) # XXX bad form - out_vals should be a list or tuple of boxes. # but func can return anything, - if not isinstance(out_vals,(list, tuple)): - out_iter, out_state = out_iters_and_states[0] - out_iter.setitem(out_state, res_dtype.coerce(space, out_vals)) - out_iters_and_states[0][1] = out_iters_and_states[0][0].next(out_iters_and_states[0][1]) + if not isinstance(out_vals, list) and not isinstance(out_vals, tuple): + out_iters[0].setitem(out_states[0], res_dtype.coerce(space, out_vals)) + out_states[0] = out_iters[0].next(out_states[0]) else: - for i in range(len(out_iters_and_states)): - out_iter, out_state = out_iters_and_states[i] - out_iter.setitem(out_state, out_vals[i].convert_to(space, res_dtype)) - out_iters_and_states[i][1] = out_iters_and_states[i][0].next(out_iters_and_states[i][1]) - for i in range(len(in_iters_and_states)): - in_iters_and_states[i][1] = in_iters_and_states[i][0].next(in_iters_and_states[i][1]) - return space.wrap(tuple(w_out)) + for i in range(len(out_vals)): + out_iters[i].setitem(out_states[i], res_dtype.coerce(space, out_vals[i])) + out_states[i] = out_iters[i].next(out_states[i]) + for i in range(nin): + in_states[i] = in_iters[i].next(in_states[i]) + return space.newtuple([convert_to_array(space, o) for o in out_args]) - +setslice_driver = jit.JitDriver(name='numpy_setslice', + greens = ['shapelen', 'dtype'], + reds = 'auto') def setslice(space, shape, target, source): # note that unlike everything else, target and source here are diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py --- a/pypy/module/micronumpy/test/test_ufuncs.py +++ b/pypy/module/micronumpy/test/test_ufuncs.py @@ -228,7 +228,8 @@ raises (ValueError, int_func22, arange(10)) res = int_func12(arange(10)) assert len(res) == 2 - assert isinstance(res, tuple) + # XXX makes ztranslation unhappy + # assert isinstance(res, tuple) assert (res[0] == arange(10)).all() def test_from_cffi_func(self): diff --git a/pypy/module/micronumpy/ufuncs.py b/pypy/module/micronumpy/ufuncs.py --- a/pypy/module/micronumpy/ufuncs.py +++ b/pypy/module/micronumpy/ufuncs.py @@ -21,8 +21,9 @@ class W_Ufunc(W_Root): _immutable_fields_ = [ - "name", "promote_to_largest", "promote_to_float", "promote_bools", - "identity", "int_only", "allow_bool", "allow_complex", "complex_to_float" + "name", "promote_to_largest", "promote_to_float", "promote_bools", "nin", + "identity", "int_only", "allow_bool", "allow_complex", + "complex_to_float", "nargs", "nout", "signature" ] def __init__(self, name, promote_to_largest, promote_to_float, promote_bools, @@ -475,10 +476,9 @@ If dtypes == 'match', only one argument is provided and the output dtypes will match the input dtype (not cpython numpy compatible) ''' - _immutable_fields_ = ["funcs", "signature", "nin", "nout", "nargs", - "dtypes", "data"] + _immutable_fields_ = ["funcs", "dtypes", "data"] - def __init__(self, space, funcs, name, identity, nin, nout, dtypes, signature): + def __init__(self, space, funcs, name, identity, nin, nout, dtypes, signature, match_dtypes=False): # XXX make sure funcs, signature, dtypes, nin, nout are consistent # These don't matter, we use the signature and dtypes for determining @@ -492,8 +492,9 @@ self.dtypes = dtypes self.nin = nin self.nout = nout + self.match_dtypes = match_dtypes self.nargs = nin + max(nout, 1) # ufuncs can always be called with an out=<> kwarg - if dtypes[0] != 'match' and (len(dtypes) % len(funcs) != 0 or + if not match_dtypes and (len(dtypes) % len(funcs) != 0 or len(dtypes) / len(funcs) != self.nargs): raise oefmt(space.w_ValueError, "generic ufunc with %d functions, %d arguments, but %d dtypes", @@ -509,7 +510,7 @@ out = None inargs = [] if len(args_w) < self.nin: - raise oefmt(space.ValueError, + raise oefmt(space.w_ValueError, '%s called with too few input args, expected at least %d got %d', self.name, self.nin, len(args_w)) for i in range(self.nin): @@ -528,6 +529,7 @@ self.alloc_outargs(space, index, inargs, outargs) # XXX handle inner-loop indexing new_shape = inargs[0].get_shape() + assert isinstance(outargs[0], W_NDimArray) res_dtype = outargs[0].get_dtype() if len(outargs) < 2: return loop.call_many_to_one(space, new_shape, self.funcs[index], @@ -549,7 +551,6 @@ if outargs[i] is None: outargs[i] = W_NDimArray.from_shape(space, temp_shape, dtype, order) - def prep_call(self, space, index, inargs, outargs): # Use the index and signature to determine # dims and steps for function call @@ -953,40 +954,33 @@ raise oefmt(space.w_TypeError, 'func must be callable') func = [w_func] + match_dtypes = False if space.is_none(w_dtypes) and not signature: raise oefmt(space.w_NotImplementedError, 'object dtype requested but not implemented') elif (space.isinstance_w(w_dtypes, space.w_tuple) or space.isinstance_w(w_dtypes, space.w_list)): - dtypes = space.listview(w_dtypes) - if space.str_w(dtypes[0]) == 'match': - dtypes = ['match',] + _dtypes = space.listview(w_dtypes) + if space.str_w(_dtypes[0]) == 'match': + dtypes = [] + match_dtypes = True else: + dtypes = [None]*len(_dtypes) for i in range(len(dtypes)): - dtypes[i] = descriptor.decode_w_dtype(space, dtypes[i]) + dtypes[i] = descriptor.decode_w_dtype(space, _dtypes[i]) else: raise oefmt(space.w_ValueError, 'dtypes must be None or a list of dtypes') if space.is_none(w_identity): - identity = None - elif space.isinstance_w(w_identity, space.int_w): - identity = space.int_w(w_identity) - else: - raise oefmt(space.w_ValueError, - 'identity must be 0, 1, or None') - if nin==1 and nout==1 and dtypes[0] == 'match': - w_ret = W_Ufunc1(wrap_ext_func(space, func[0]), name) - elif nin==2 and nout==1 and dtypes[0] == 'match': - w_ret = W_Ufunc2(wrap_ext_func(space, func[0]), name) - else: - w_ret = W_UfuncGeneric(space, func, name, identity, nin, nout, dtypes, signature) + identity = None + else: + identity = \ + descriptor.get_dtype_cache(space).w_longdtype.box(w_identity) + + w_ret = W_UfuncGeneric(space, func, name, identity, nin, nout, dtypes, signature, + match_dtypes=match_dtypes) if doc: w_ret.w_doc = space.wrap(doc) return w_ret -def wrap_ext_func(space, func): - def _func(calc_dtype, w_left, w_right): - arglist = space.wrap([w_left, w_right]) - return space.call_args(func, Arguments.frompacked(space, arglist)) - return _func From noreply at buildbot.pypy.org Mon Jun 23 20:36:23 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 23 Jun 2014 20:36:23 +0200 (CEST) Subject: [pypy-commit] pypy ufuncapi: test now passes Message-ID: <20140623183623.ACEDD1C023B@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: ufuncapi Changeset: r72174:3a39208a51ad Date: 2014-06-23 21:35 +0300 http://bitbucket.org/pypy/pypy/changeset/3a39208a51ad/ Log: test now passes diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py --- a/pypy/module/micronumpy/test/test_ufuncs.py +++ b/pypy/module/micronumpy/test/test_ufuncs.py @@ -228,8 +228,7 @@ raises (ValueError, int_func22, arange(10)) res = int_func12(arange(10)) assert len(res) == 2 - # XXX makes ztranslation unhappy - # assert isinstance(res, tuple) + assert isinstance(res, tuple) assert (res[0] == arange(10)).all() def test_from_cffi_func(self): From noreply at buildbot.pypy.org Mon Jun 23 20:55:11 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 20:55:11 +0200 (CEST) Subject: [pypy-commit] pypy default: Fixes that should have been done in the ec-threadlocal branch Message-ID: <20140623185511.542F01C0CA6@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72175:45430a184cd3 Date: 2014-06-23 18:52 +0000 http://bitbucket.org/pypy/pypy/changeset/45430a184cd3/ Log: Fixes that should have been done in the ec-threadlocal branch diff --git a/pypy/module/pypyjit/test_pypy_c/test_call.py b/pypy/module/pypyjit/test_pypy_c/test_call.py --- a/pypy/module/pypyjit/test_pypy_c/test_call.py +++ b/pypy/module/pypyjit/test_pypy_c/test_call.py @@ -71,13 +71,13 @@ "getfield_gc", "guard_value", "guard_not_invalidated"] ops = entry_bridge.ops_by_id('add', opcode='LOAD_GLOBAL') - assert log.opnames(ops) == ["guard_not_invalidated"] + assert log.opnames(ops) == [] # ops = entry_bridge.ops_by_id('call', opcode='LOAD_GLOBAL') assert log.opnames(ops) == [] # assert entry_bridge.match_by_id('call', """ - p38 = call(ConstClass(getexecutioncontext), descr=) + p38 = call(ConstClass(_ll_0_threadlocalref_getter___), descr=) p39 = getfield_gc(p38, descr=) i40 = force_token() p41 = getfield_gc(p38, descr=) @@ -435,7 +435,7 @@ p26 = getfield_gc(p7, descr=) guard_value(p26, ConstPtr(ptr27), descr=...) guard_not_invalidated(descr=...) - p29 = call(ConstClass(getexecutioncontext), descr=) + p29 = call(ConstClass(_ll_0_threadlocalref_getter___), descr=) p30 = getfield_gc(p29, descr=) p31 = force_token() p32 = getfield_gc(p29, descr=) @@ -448,7 +448,6 @@ i39 = getfield_gc_pure(p37, descr=) i40 = int_add_ovf(i22, i39) guard_no_overflow(descr=...) - guard_not_invalidated(descr=...) --TICK-- """) From noreply at buildbot.pypy.org Mon Jun 23 21:27:01 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 23 Jun 2014 21:27:01 +0200 (CEST) Subject: [pypy-commit] pypy disable_pythonapi: backed out changeset 8c81f5d58b5c, disable it all instead Message-ID: <20140623192701.9518D1D2DC0@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: disable_pythonapi Changeset: r72176:748cfaeaea89 Date: 2014-06-23 21:44 +0300 http://bitbucket.org/pypy/pypy/changeset/748cfaeaea89/ Log: backed out changeset 8c81f5d58b5c, disable it all instead diff --git a/pypy/module/_rawffi/alt/interp_funcptr.py b/pypy/module/_rawffi/alt/interp_funcptr.py --- a/pypy/module/_rawffi/alt/interp_funcptr.py +++ b/pypy/module/_rawffi/alt/interp_funcptr.py @@ -312,7 +312,7 @@ # ======================================================================== class W_CDLL(W_Root): - def __init__(self, space, name, mode, handle = rffi.VOIDP): + def __init__(self, space, name, mode): self.flags = libffi.FUNCFLAG_CDECL self.space = space if name is None: @@ -320,7 +320,7 @@ else: self.name = name try: - self.cdll = libffi.CDLL(name, mode, handle=handle) + self.cdll = libffi.CDLL(name, mode) except DLOpenError, e: raise wrap_dlopenerror(space, e, self.name) diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py --- a/pypy/module/sys/vm.py +++ b/pypy/module/sys/vm.py @@ -244,8 +244,10 @@ handle = space.fromcache(State).get_pythonapi_handle() # Make a dll object with it - from pypy.module._rawffi.alt.interp_funcptr import W_CDLL - return space.wrap(W_CDLL(space, "python api", -1, handle=handle)) + from pypy.module._rawffi.interp_rawffi import W_CDLL + from rpython.rlib.clibffi import RawCDLL + cdll = RawCDLL(handle) + return space.wrap(W_CDLL(space, "python api", cdll)) def getsizeof(space, w_object, w_default=None): """Not implemented on PyPy.""" diff --git a/rpython/rlib/libffi.py b/rpython/rlib/libffi.py --- a/rpython/rlib/libffi.py +++ b/rpython/rlib/libffi.py @@ -11,7 +11,7 @@ from rpython.rlib.clibffi import FUNCFLAG_CDECL, FUNCFLAG_STDCALL, \ AbstractFuncPtr, push_arg_as_ffiptr, c_ffi_call, FFI_TYPE_STRUCT from rpython.rlib.rdynload import dlopen, dlclose, dlsym, dlsym_byordinal -from rpython.rlib.rdynload import DLLHANDLE, _WIN32 +from rpython.rlib.rdynload import DLLHANDLE import os @@ -413,12 +413,9 @@ # XXX: it partially duplicate the code in clibffi.py class CDLL(object): - def __init__(self, libname, mode=-1, handle=rffi.VOIDP): + def __init__(self, libname, mode=-1): """Load the library, or raises DLOpenError.""" self.lib = rffi.cast(DLLHANDLE, 0) - if handle is not rffi.VOIDP : - self.lib = rffi.cast(DLLHANDLE, handle) - return with rffi.scoped_str2charp(libname) as ll_libname: self.lib = dlopen(ll_libname, mode) diff --git a/rpython/rlib/test/test_libffi.py b/rpython/rlib/test/test_libffi.py --- a/rpython/rlib/test/test_libffi.py +++ b/rpython/rlib/test/test_libffi.py @@ -186,24 +186,6 @@ chain.arg(10) sleep.call(chain, lltype.Void, is_struct=False) - def test_dll_create(self): - if os.name == 'nt': - import sys - if not isinstance(sys.dllhandle, int): - py.test.skip('Run with cpython, not pypy') - dll = CDLL(None, handle=sys.dllhandle) - else: - dll = CDLL(None) - try: - # The pythonapi of the translating python - dll.getaddressindll('Py_OptimizeFlag') - except KeyError: - try: - dll.getaddressindll('PyPy_OptimizeFlag') - except KeyError: - assert False, 'could not find function in pythonapi' - - class TestLibffiCall(BaseFfiTest): """ Test various kind of calls through libffi. From noreply at buildbot.pypy.org Mon Jun 23 21:27:02 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 23 Jun 2014 21:27:02 +0200 (CEST) Subject: [pypy-commit] pypy disable_pythonapi: remove dllhandle from sys module on windows, test and document Message-ID: <20140623192702.D5E301D2DC0@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: disable_pythonapi Changeset: r72177:6476e0537293 Date: 2014-06-23 22:04 +0300 http://bitbucket.org/pypy/pypy/changeset/6476e0537293/ Log: remove dllhandle from sys module on windows, test and document diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -65,7 +65,7 @@ signal struct symbol - sys + sys (without sys.dllhandle on windows) termios thread time diff --git a/pypy/doc/ctypes-implementation.rst b/pypy/doc/ctypes-implementation.rst --- a/pypy/doc/ctypes-implementation.rst +++ b/pypy/doc/ctypes-implementation.rst @@ -73,10 +73,7 @@ current implementation: * ``ctypes.pythonapi`` lets you access the CPython C API - emulation layer. It does not work on PyPy at the moment, we are missing a - ``getfunc`` method for CDLL. Work was begun - to refactor the rpython implementation of _rawffi (in - pypy/modules/_rawffi/alt) but that project has stalled. + emulation layer. It does not work on PyPy. Note that even if it worked, our implementation would not do anything sensible about the GIL and the functions will be named with an extra diff --git a/pypy/module/sys/__init__.py b/pypy/module/sys/__init__.py --- a/pypy/module/sys/__init__.py +++ b/pypy/module/sys/__init__.py @@ -108,12 +108,6 @@ # don't get the filesystemencoding at translation time assert self.filesystemencoding is None - else: - if _WIN: - from pypy.module.sys import vm - w_handle = vm.get_dllhandle(space) - space.setitem(self.w_dict, space.wrap("dllhandle"), w_handle) - def getmodule(self, name): space = self.space w_modules = self.get('modules') diff --git a/pypy/module/sys/test/test_sysmodule.py b/pypy/module/sys/test/test_sysmodule.py --- a/pypy/module/sys/test/test_sysmodule.py +++ b/pypy/module/sys/test/test_sysmodule.py @@ -391,7 +391,8 @@ import sys if hasattr(sys, "getwindowsversion"): v = sys.getwindowsversion() - assert isinstance(v, tuple) + if '__pypy__' in sys.builtin_module_names: + assert isinstance(v, tuple) assert len(v) == 5 assert isinstance(v[0], int) assert isinstance(v[1], int) @@ -419,6 +420,14 @@ if hasattr(sys, "winver"): assert sys.winver == sys.version[:3] + def test_no_dllhandle(self): + import sys + if '__pypy__' in sys.builtin_module_names: + assert not hasattr(sys, 'dllhandle') + elif sys.platform == 'win32': + # only on cpython win32 + assert hasattr(sys, 'dllhandle') + def test_dlopenflags(self): import sys if hasattr(sys, "setdlopenflags"): @@ -486,7 +495,8 @@ assert isinstance(sys.version, basestring) assert isinstance(sys.warnoptions, list) vi = sys.version_info - assert isinstance(vi, tuple) + if '__pypy__' in sys.builtin_module_names: + assert isinstance(vi, tuple) assert len(vi) == 5 assert isinstance(vi[0], int) assert isinstance(vi[1], int) @@ -512,6 +522,8 @@ def test_pypy_attributes(self): import sys + if '__pypy__' not in sys.builtin_module_names: + skip("only on PyPy") assert isinstance(sys.pypy_objspaceclass, str) vi = sys.pypy_version_info assert isinstance(vi, tuple) @@ -528,10 +540,14 @@ def test_subversion(self): import sys + if '__pypy__' not in sys.builtin_module_names: + skip("only on PyPy") assert sys.subversion == ('PyPy', '', '') def test__mercurial(self): import sys, re + if '__pypy__' not in sys.builtin_module_names: + skip("only on PyPy") project, hgtag, hgid = sys._mercurial assert project == 'PyPy' # the tag or branch may be anything, including the empty string diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py --- a/pypy/module/sys/vm.py +++ b/pypy/module/sys/vm.py @@ -229,26 +229,6 @@ ]) return space.call_function(w_windows_version_info, raw_version) - at jit.dont_look_inside -def get_dllhandle(space): - if not space.config.objspace.usemodules.cpyext: - return space.wrap(0) - if not space.config.objspace.usemodules._rawffi: - return space.wrap(0) - - return _get_dllhandle(space) - -def _get_dllhandle(space): - # Retrieve cpyext api handle - from pypy.module.cpyext.api import State - handle = space.fromcache(State).get_pythonapi_handle() - - # Make a dll object with it - from pypy.module._rawffi.interp_rawffi import W_CDLL - from rpython.rlib.clibffi import RawCDLL - cdll = RawCDLL(handle) - return space.wrap(W_CDLL(space, "python api", cdll)) - def getsizeof(space, w_object, w_default=None): """Not implemented on PyPy.""" if w_default is None: From noreply at buildbot.pypy.org Mon Jun 23 21:27:04 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 23 Jun 2014 21:27:04 +0200 (CEST) Subject: [pypy-commit] pypy disable_pythonapi: remove from pythonapi support from cpyext Message-ID: <20140623192704.1A71D1D2DC0@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: disable_pythonapi Changeset: r72178:750dd6fe24ef Date: 2014-06-23 22:09 +0300 http://bitbucket.org/pypy/pypy/changeset/750dd6fe24ef/ Log: remove from pythonapi support from cpyext diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -809,8 +809,6 @@ import ctypes bridge = ctypes.CDLL(str(modulename), mode=ctypes.RTLD_GLOBAL) - space.fromcache(State).install_dll(eci) - # populate static data for name, (typ, expr) in GLOBALS.iteritems(): from pypy.module import cpyext @@ -1004,23 +1002,6 @@ separate_module_sources = [code, struct_source] - if sys.platform == 'win32': - get_pythonapi_source = ''' - #include - HANDLE pypy_get_pythonapi_handle() { - MEMORY_BASIC_INFORMATION mi; - memset(&mi, 0, sizeof(mi)); - - if( !VirtualQueryEx(GetCurrentProcess(), &pypy_get_pythonapi_handle, - &mi, sizeof(mi)) ) - return 0; - - return (HMODULE)mi.AllocationBase; - } - ''' - separate_module_sources.append(get_pythonapi_source) - export_symbols_eci.append('pypy_get_pythonapi_handle') - eci = ExternalCompilationInfo( include_dirs=include_dirs, separate_module_files=[source_dir / "varargwrapper.c", @@ -1065,8 +1046,6 @@ eci = build_eci(False, export_symbols, code) - space.fromcache(State).install_dll(eci) - run_bootstrap_functions(space) setup_va_functions(eci) diff --git a/pypy/module/cpyext/state.py b/pypy/module/cpyext/state.py --- a/pypy/module/cpyext/state.py +++ b/pypy/module/cpyext/state.py @@ -62,14 +62,6 @@ else: api.setup_library(self.space) - def install_dll(self, eci): - """NOT_RPYTHON - Called when the dll has been compiled""" - if sys.platform == 'win32': - self.get_pythonapi_handle = rffi.llexternal( - 'pypy_get_pythonapi_handle', [], DLLHANDLE, - compilation_info=eci) - def startup(self, space): "This function is called when the program really starts" diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -201,17 +201,6 @@ raises(ImportError, cpyext.load_module, "missing.file", "foo") raises(ImportError, cpyext.load_module, self.libc, "invalid.function") - def test_dllhandle(self): - import sys - if sys.platform != "win32" or sys.version_info < (2, 6): - skip("Windows Python >= 2.6 only") - assert sys.dllhandle - assert sys.dllhandle.getaddressindll('cpyexttestErr_NewException') - import ctypes - PyUnicode_GetDefaultEncoding = ctypes.pythonapi.cpyexttestUnicode_GetDefaultEncoding - PyUnicode_GetDefaultEncoding.restype = ctypes.c_char_p - assert PyUnicode_GetDefaultEncoding() == 'ascii' - class AppTestCpythonExtensionBase(LeakCheckingTest): def setup_class(cls): From noreply at buildbot.pypy.org Mon Jun 23 21:27:05 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 23 Jun 2014 21:27:05 +0200 (CEST) Subject: [pypy-commit] pypy disable_pythonapi: remove PyDLL and pythonapi from ctypes Message-ID: <20140623192705.4D4A41D2DC0@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: disable_pythonapi Changeset: r72179:0aca52160aff Date: 2014-06-23 22:23 +0300 http://bitbucket.org/pypy/pypy/changeset/0aca52160aff/ Log: remove PyDLL and pythonapi from ctypes diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py --- a/lib-python/2.7/ctypes/__init__.py +++ b/lib-python/2.7/ctypes/__init__.py @@ -389,12 +389,13 @@ func.__name__ = name_or_ordinal return func -class PyDLL(CDLL): - """This class represents the Python library itself. It allows to - access Python API functions. The GIL is not released, and - Python exceptions are handled correctly. - """ - _func_flags_ = _FUNCFLAG_CDECL | _FUNCFLAG_PYTHONAPI +# Not in PyPy +#class PyDLL(CDLL): +# """This class represents the Python library itself. It allows to +# access Python API functions. The GIL is not released, and +# Python exceptions are handled correctly. +# """ +# _func_flags_ = _FUNCFLAG_CDECL | _FUNCFLAG_PYTHONAPI if _os.name in ("nt", "ce"): @@ -447,15 +448,8 @@ return self._dlltype(name) cdll = LibraryLoader(CDLL) -pydll = LibraryLoader(PyDLL) - -if _os.name in ("nt", "ce"): - pythonapi = PyDLL("python dll", None, _sys.dllhandle) -elif _sys.platform == "cygwin": - pythonapi = PyDLL("libpython%d.%d.dll" % _sys.version_info[:2]) -else: - pythonapi = PyDLL(None) - +# not on PyPy +#pydll = LibraryLoader(PyDLL) if _os.name in ("nt", "ce"): windll = LibraryLoader(WinDLL) diff --git a/lib-python/2.7/ctypes/test/test_values.py b/lib-python/2.7/ctypes/test/test_values.py --- a/lib-python/2.7/ctypes/test/test_values.py +++ b/lib-python/2.7/ctypes/test/test_values.py @@ -4,6 +4,7 @@ import unittest from ctypes import * +from ctypes.test import xfail import _ctypes_test @@ -23,7 +24,8 @@ class Win_ValuesTestCase(unittest.TestCase): """This test only works when python itself is a dll/shared library""" - + + @xfail def test_optimizeflag(self): # This test accesses the Py_OptimizeFlag intger, which is # exported by the Python dll. @@ -40,6 +42,7 @@ else: self.assertEqual(opt, 2) + @xfail def test_frozentable(self): # Python exports a PyImport_FrozenModules symbol. This is a # pointer to an array of struct _frozen entries. The end of the @@ -75,6 +78,7 @@ from ctypes import _pointer_type_cache del _pointer_type_cache[struct_frozen] + @xfail def test_undefined(self): self.assertRaises(ValueError, c_int.in_dll, pydll, "Undefined_Symbol") From noreply at buildbot.pypy.org Mon Jun 23 21:27:07 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 23 Jun 2014 21:27:07 +0200 (CEST) Subject: [pypy-commit] pypy disable_pythonapi: merge default into branch Message-ID: <20140623192707.782601D2DC0@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: disable_pythonapi Changeset: r72180:83ee399f97bb Date: 2014-06-23 22:25 +0300 http://bitbucket.org/pypy/pypy/changeset/83ee399f97bb/ Log: merge default into branch diff too long, truncating to 2000 out of 2132 lines diff --git a/pypy/doc/extradoc.rst b/pypy/doc/extradoc.rst --- a/pypy/doc/extradoc.rst +++ b/pypy/doc/extradoc.rst @@ -8,6 +8,9 @@ *Articles about PyPy published so far, most recent first:* (bibtex_ file) +* `A Way Forward in Parallelising Dynamic Languages`_, + R. Meier, A. Rigo + * `Runtime Feedback in a Meta-Tracing JIT for Efficient Dynamic Languages`_, C.F. Bolz, A. Cuni, M. Fijalkowski, M. Leuschel, S. Pedroni, A. Rigo @@ -71,6 +74,7 @@ .. _bibtex: https://bitbucket.org/pypy/extradoc/raw/tip/talk/bibtex.bib +.. _`A Way Forward in Parallelising Dynamic Languages`: https://bitbucket.org/pypy/extradoc/raw/extradoc/talk/icooolps2014/position-paper.pdf .. _`Runtime Feedback in a Meta-Tracing JIT for Efficient Dynamic Languages`: https://bitbucket.org/pypy/extradoc/raw/extradoc/talk/icooolps2011/jit-hints.pdf .. _`Allocation Removal by Partial Evaluation in a Tracing JIT`: https://bitbucket.org/pypy/extradoc/raw/extradoc/talk/pepm2011/bolz-allocation-removal.pdf .. _`Towards a Jitting VM for Prolog Execution`: http://www.stups.uni-duesseldorf.de/mediawiki/images/a/a7/Pub-BoLeSch2010.pdf @@ -93,6 +97,11 @@ Talks and Presentations ---------------------------------- +*This part is no longer updated.* The complete list is here__ (in +alphabetical order). + +.. __: https://bitbucket.org/pypy/extradoc/src/extradoc/talk/ + Talks in 2010 +++++++++++++ diff --git a/pypy/doc/release-pypy3-2.3.1.rst b/pypy/doc/release-pypy3-2.3.1.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/release-pypy3-2.3.1.rst @@ -0,0 +1,69 @@ +===================== +PyPy3 2.3.1 - Fulcrum +===================== + +We're pleased to announce the first stable release of PyPy3. PyPy3 +targets Python 3 (3.2.5) compatibility. + +We would like to thank all of the people who donated_ to the `py3k proposal`_ +for supporting the work that went into this. + +You can download the PyPy3 2.3.1 release here: + + http://pypy.org/download.html#pypy3-2-3-1 + +Highlights +========== + +* The first stable release of PyPy3: support for Python 3! + +* The stdlib has been updated to Python 3.2.5 + +* Additional support for the u'unicode' syntax (`PEP 414`_) from Python 3.3 + +* Updates from the default branch, such as incremental GC and various JIT + improvements + +* Resolved some notable JIT performance regressions from PyPy2: + + - Re-enabled the previously disabled collection (list/dict/set) strategies + + - Resolved performance of iteration over range objects + + - Resolved handling of Python 3's exception __context__ unnecessarily forcing + frame object overhead + +.. _`PEP 414`: http://legacy.python.org/dev/peps/pep-0414/ + +What is PyPy? +============== + +PyPy is a very compliant Python interpreter, almost a drop-in replacement for +CPython 2.7.6 or 3.2.5. It's fast due to its integrated tracing JIT compiler. + +This release supports x86 machines running Linux 32/64, Mac OS X 64, Windows, +and OpenBSD, +as well as newer ARM hardware (ARMv6 or ARMv7, with VFPv3) running Linux. + +While we support 32 bit python on Windows, work on the native Windows 64 +bit python is still stalling, we would welcome a volunteer +to `handle that`_. + +.. _`handle that`: http://doc.pypy.org/en/latest/windows.html#what-is-missing-for-a-full-64-bit-translation + +How to use PyPy? +================= + +We suggest using PyPy from a `virtualenv`_. Once you have a virtualenv +installed, you can follow instructions from `pypy documentation`_ on how +to proceed. This document also covers other `installation schemes`_. + +.. _donated: http://morepypy.blogspot.com/2012/01/py3k-and-numpy-first-stage-thanks-to.html +.. _`py3k proposal`: http://pypy.org/py3donate.html +.. _`pypy documentation`: http://doc.pypy.org/en/latest/getting-started.html#installing-using-virtualenv +.. _`virtualenv`: http://www.virtualenv.org/en/latest/ +.. _`installation schemes`: http://doc.pypy.org/en/latest/getting-started.html#installing-pypy + + +Cheers, +the PyPy team diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -22,3 +22,11 @@ conditional_calls). I would expect the net result to be a slight slow-down on some simple benchmarks and a speed-up on bigger programs. + +.. branch: ec-threadlocal +Change the executioncontext's lookup to be done by reading a thread- +local variable (which is implemented in C using '__thread' if +possible, and pthread_getspecific() otherwise). On Linux x86 and +x86-64, the JIT backend has a special optimization that lets it emit +directly a single MOV from a %gs- or %fs-based address. It seems +actually to give a good boost in performance. diff --git a/pypy/doc/whatsnew-pypy3-2.3.1.rst b/pypy/doc/whatsnew-pypy3-2.3.1.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/whatsnew-pypy3-2.3.1.rst @@ -0,0 +1,6 @@ +========================= +What's new in PyPy3 2.3.1 +========================= + +.. this is a revision shortly after pypy3-release-2.3.x +.. startrev: 0137d8e6657d diff --git a/pypy/goal/targetpypystandalone.py b/pypy/goal/targetpypystandalone.py --- a/pypy/goal/targetpypystandalone.py +++ b/pypy/goal/targetpypystandalone.py @@ -30,8 +30,6 @@ if w_dict is not None: # for tests w_entry_point = space.getitem(w_dict, space.wrap('entry_point')) w_run_toplevel = space.getitem(w_dict, space.wrap('run_toplevel')) - w_call_finish_gateway = space.wrap(gateway.interp2app(call_finish)) - w_call_startup_gateway = space.wrap(gateway.interp2app(call_startup)) withjit = space.config.objspace.usemodules.pypyjit def entry_point(argv): @@ -53,7 +51,7 @@ argv = argv[:1] + argv[3:] try: try: - space.call_function(w_run_toplevel, w_call_startup_gateway) + space.startup() w_executable = space.wrap(argv[0]) w_argv = space.newlist([space.wrap(s) for s in argv[1:]]) w_exitcode = space.call_function(w_entry_point, w_executable, w_argv) @@ -69,7 +67,7 @@ return 1 finally: try: - space.call_function(w_run_toplevel, w_call_finish_gateway) + space.finish() except OperationError, e: debug("OperationError:") debug(" operror-type: " + e.w_type.getname(space)) @@ -184,11 +182,6 @@ 'pypy_thread_attach': pypy_thread_attach, 'pypy_setup_home': pypy_setup_home} -def call_finish(space): - space.finish() - -def call_startup(space): - space.startup() # _____ Define and setup target ___ diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -395,6 +395,7 @@ def startup(self): # To be called before using the space + self.threadlocals.enter_thread(self) # Initialize already imported builtin modules from pypy.interpreter.module import Module @@ -639,30 +640,33 @@ """NOT_RPYTHON: Abstract method that should put some minimal content into the w_builtins.""" - @jit.loop_invariant def getexecutioncontext(self): "Return what we consider to be the active execution context." # Important: the annotator must not see a prebuilt ExecutionContext: # you should not see frames while you translate # so we make sure that the threadlocals never *have* an # ExecutionContext during translation. - if self.config.translating and not we_are_translated(): - assert self.threadlocals.getvalue() is None, ( - "threadlocals got an ExecutionContext during translation!") - try: - return self._ec_during_translation - except AttributeError: - ec = self.createexecutioncontext() - self._ec_during_translation = ec + if not we_are_translated(): + if self.config.translating: + assert self.threadlocals.get_ec() is None, ( + "threadlocals got an ExecutionContext during translation!") + try: + return self._ec_during_translation + except AttributeError: + ec = self.createexecutioncontext() + self._ec_during_translation = ec + return ec + else: + ec = self.threadlocals.get_ec() + if ec is None: + self.threadlocals.enter_thread(self) + ec = self.threadlocals.get_ec() return ec - # normal case follows. The 'thread' module installs a real - # thread-local object in self.threadlocals, so this builds - # and caches a new ec in each thread. - ec = self.threadlocals.getvalue() - if ec is None: - ec = self.createexecutioncontext() - self.threadlocals.setvalue(ec) - return ec + else: + # translated case follows. self.threadlocals is either from + # 'pypy.interpreter.miscutils' or 'pypy.module.thread.threadlocals'. + # the result is assumed to be non-null: enter_thread() was called. + return self.threadlocals.get_ec() def _freeze_(self): return True diff --git a/pypy/interpreter/miscutils.py b/pypy/interpreter/miscutils.py --- a/pypy/interpreter/miscutils.py +++ b/pypy/interpreter/miscutils.py @@ -11,11 +11,11 @@ """ _value = None - def getvalue(self): + def get_ec(self): return self._value - def setvalue(self, value): - self._value = value + def enter_thread(self, space): + self._value = space.createexecutioncontext() def signals_enabled(self): return True diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py --- a/pypy/module/_rawffi/interp_rawffi.py +++ b/pypy/module/_rawffi/interp_rawffi.py @@ -508,7 +508,10 @@ argshapes = unpack_argshapes(space, w_args) resshape = unpack_resshape(space, w_res) ffi_args = [shape.get_basic_ffi_type() for shape in argshapes] - ffi_res = resshape.get_basic_ffi_type() + if resshape is not None: + ffi_res = resshape.get_basic_ffi_type() + else: + ffi_res = ffi_type_void try: ptr = RawFuncPtr('???', ffi_args, ffi_res, rffi.cast(rffi.VOIDP, addr), flags) diff --git a/pypy/module/_rawffi/test/test__rawffi.py b/pypy/module/_rawffi/test/test__rawffi.py --- a/pypy/module/_rawffi/test/test__rawffi.py +++ b/pypy/module/_rawffi/test/test__rawffi.py @@ -353,6 +353,11 @@ assert ptr[0] == rawcall.buffer ptr.free() + def test_raw_callable_returning_void(self): + import _rawffi + _rawffi.FuncPtr(0, [], None) + # assert did not crash + def test_short_addition(self): import _rawffi lib = _rawffi.CDLL(self.lib_name) diff --git a/pypy/module/pypyjit/test_pypy_c/test_call.py b/pypy/module/pypyjit/test_pypy_c/test_call.py --- a/pypy/module/pypyjit/test_pypy_c/test_call.py +++ b/pypy/module/pypyjit/test_pypy_c/test_call.py @@ -71,13 +71,13 @@ "getfield_gc", "guard_value", "guard_not_invalidated"] ops = entry_bridge.ops_by_id('add', opcode='LOAD_GLOBAL') - assert log.opnames(ops) == ["guard_not_invalidated"] + assert log.opnames(ops) == [] # ops = entry_bridge.ops_by_id('call', opcode='LOAD_GLOBAL') assert log.opnames(ops) == [] # assert entry_bridge.match_by_id('call', """ - p38 = call(ConstClass(getexecutioncontext), descr=) + p38 = call(ConstClass(_ll_0_threadlocalref_getter___), descr=) p39 = getfield_gc(p38, descr=) i40 = force_token() p41 = getfield_gc(p38, descr=) @@ -435,7 +435,7 @@ p26 = getfield_gc(p7, descr=) guard_value(p26, ConstPtr(ptr27), descr=...) guard_not_invalidated(descr=...) - p29 = call(ConstClass(getexecutioncontext), descr=) + p29 = call(ConstClass(_ll_0_threadlocalref_getter___), descr=) p30 = getfield_gc(p29, descr=) p31 = force_token() p32 = getfield_gc(p29, descr=) @@ -448,7 +448,6 @@ i39 = getfield_gc_pure(p37, descr=) i40 = int_add_ovf(i22, i39) guard_no_overflow(descr=...) - guard_not_invalidated(descr=...) --TICK-- """) diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py --- a/pypy/module/pypyjit/test_pypy_c/test_string.py +++ b/pypy/module/pypyjit/test_pypy_c/test_string.py @@ -101,64 +101,38 @@ log = self.run(main, [1000]) assert log.result == main(1000) loop, = log.loops_by_filename(self.filepath) - # NB: since the stringbuilder2-perf branch we get more operations than - # before, but a lot less branches that might fail randomly. assert loop.match(""" - i100 = int_gt(i95, 0) - guard_true(i100, descr=...) + i79 = int_gt(i74, 0) + guard_true(i79, descr=...) guard_not_invalidated(descr=...) - p101 = call(ConstClass(ll_int2dec__Signed), i95, descr=) + p80 = call(ConstClass(ll_int2dec__Signed), i74, descr=) guard_no_exception(descr=...) - i102 = strlen(p101) - i103 = int_is_true(i102) - guard_true(i103, descr=...) - i104 = strgetitem(p101, 0) - i105 = int_eq(i104, 45) - guard_false(i105, descr=...) - i106 = int_neg(i102) - i107 = int_gt(i102, 23) - p108 = new(descr=) - p110 = newstr(23) + i85 = strlen(p80) + p86 = new(descr=) + p88 = newstr(23) setfield_gc(..., descr=) setfield_gc(..., descr=) setfield_gc(..., descr=) - cond_call(i107, ConstClass(stringbuilder_append_overflow__stringbuilderPtr_rpy_stringPtr_Signed), p108, p101, i102, descr=) + call(ConstClass(ll_append_res0__stringbuilderPtr_rpy_stringPtr), p86, p80, descr=) guard_no_exception(descr=...) - i111 = getfield_gc(p108, descr=) - i112 = int_sub(i102, i111) - i113 = getfield_gc(p108, descr=) - p114 = getfield_gc(p108, descr=) - copystrcontent(p101, p114, i111, i113, i112) - i115 = int_add(i113, i112) - i116 = getfield_gc(p108, descr=) - setfield_gc(p108, i115, descr=) - i117 = int_eq(i115, i116) - cond_call(i117, ConstClass(stringbuilder_grow__stringbuilderPtr_Signed), p108, 1, descr=) + i89 = getfield_gc(p86, descr=) + i90 = getfield_gc(p86, descr=) + i91 = int_eq(i89, i90) + cond_call(i91, ConstClass(ll_grow_by__stringbuilderPtr_Signed), p86, 1, descr=) guard_no_exception(descr=...) - i118 = getfield_gc(p108, descr=) - i119 = int_add(i118, 1) - p120 = getfield_gc(p108, descr=) - strsetitem(p120, i118, 32) - i121 = getfield_gc(p108, descr=) - i122 = int_sub(i121, i119) - setfield_gc(..., descr=) - setfield_gc(..., descr=) - i123 = int_gt(i102, i122) - cond_call(i123, ConstClass(stringbuilder_append_overflow__stringbuilderPtr_rpy_stringPtr_Signed), p108, p101, i102, descr=) + i92 = getfield_gc(p86, descr=) + i93 = int_add(i92, 1) + p94 = getfield_gc(p86, descr=) + strsetitem(p94, i92, 32) + setfield_gc(p86, i93, descr=) + call(ConstClass(ll_append_res0__stringbuilderPtr_rpy_stringPtr), p86, p80, descr=) guard_no_exception(descr=...) - i124 = getfield_gc(p108, descr=) - i125 = int_sub(i102, i124) - i126 = getfield_gc(p108, descr=) - p127 = getfield_gc(p108, descr=) - copystrcontent(p101, p127, i124, i126, i125) - i128 = int_add(i126, i125) - setfield_gc(p108, i128, descr=) - p135 = call(..., descr=) # ll_build guard_no_exception(descr=...) - i136 = strlen(p135) - i137 = int_add_ovf(i92, i136) + i96 = strlen(p95) + i97 = int_add_ovf(i71, i96) guard_no_overflow(descr=...) - i138 = int_sub(i95, 1) + i98 = int_sub(i74, 1) --TICK-- jump(..., descr=...) """) diff --git a/pypy/module/thread/__init__.py b/pypy/module/thread/__init__.py --- a/pypy/module/thread/__init__.py +++ b/pypy/module/thread/__init__.py @@ -26,10 +26,11 @@ "NOT_RPYTHON: patches space.threadlocals to use real threadlocals" from pypy.module.thread import gil MixedModule.__init__(self, space, *args) - prev = space.threadlocals.getvalue() + prev_ec = space.threadlocals.get_ec() space.threadlocals = gil.GILThreadLocals() space.threadlocals.initialize(space) - space.threadlocals.setvalue(prev) + if prev_ec is not None: + space.threadlocals._set_ec(prev_ec) from pypy.module.posix.interp_posix import add_fork_hook from pypy.module.thread.os_thread import reinit_threads diff --git a/pypy/module/thread/os_thread.py b/pypy/module/thread/os_thread.py --- a/pypy/module/thread/os_thread.py +++ b/pypy/module/thread/os_thread.py @@ -126,6 +126,8 @@ release = staticmethod(release) def run(space, w_callable, args): + # add the ExecutionContext to space.threadlocals + space.threadlocals.enter_thread(space) try: space.call_args(w_callable, args) except OperationError, e: diff --git a/pypy/module/thread/test/test_gil.py b/pypy/module/thread/test/test_gil.py --- a/pypy/module/thread/test/test_gil.py +++ b/pypy/module/thread/test/test_gil.py @@ -64,13 +64,14 @@ except Exception, e: assert 0 thread.gc_thread_die() + my_gil_threadlocals = gil.GILThreadLocals() def f(): state.data = [] state.datalen1 = 0 state.datalen2 = 0 state.datalen3 = 0 state.datalen4 = 0 - state.threadlocals = gil.GILThreadLocals() + state.threadlocals = my_gil_threadlocals state.threadlocals.setup_threads(space) subident = thread.start_new_thread(bootstrap, ()) mainident = thread.get_ident() diff --git a/pypy/module/thread/threadlocals.py b/pypy/module/thread/threadlocals.py --- a/pypy/module/thread/threadlocals.py +++ b/pypy/module/thread/threadlocals.py @@ -1,4 +1,5 @@ from rpython.rlib import rthread +from rpython.rlib.objectmodel import we_are_translated from pypy.module.thread.error import wrap_thread_error from pypy.interpreter.executioncontext import ExecutionContext @@ -13,53 +14,62 @@ os_thread.bootstrap().""" def __init__(self): + "NOT_RPYTHON" self._valuedict = {} # {thread_ident: ExecutionContext()} self._cleanup_() + self.raw_thread_local = rthread.ThreadLocalReference(ExecutionContext) def _cleanup_(self): self._valuedict.clear() self._mainthreadident = 0 - self._mostrecentkey = 0 # fast minicaching for the common case - self._mostrecentvalue = None # fast minicaching for the common case - def getvalue(self): + def enter_thread(self, space): + "Notification that the current thread is about to start running." + self._set_ec(space.createexecutioncontext()) + + def _set_ec(self, ec): ident = rthread.get_ident() - if ident == self._mostrecentkey: - result = self._mostrecentvalue - else: - value = self._valuedict.get(ident, None) - # slow path: update the minicache - self._mostrecentkey = ident - self._mostrecentvalue = value - result = value - return result + if self._mainthreadident == 0 or self._mainthreadident == ident: + ec._signals_enabled = 1 # the main thread is enabled + self._mainthreadident = ident + self._valuedict[ident] = ec + # This logic relies on hacks and _make_sure_does_not_move(). + # It only works because we keep the 'ec' alive in '_valuedict' too. + self.raw_thread_local.set(ec) - def setvalue(self, value): - ident = rthread.get_ident() - if value is not None: - if self._mainthreadident == 0: - value._signals_enabled = 1 # the main thread is enabled - self._mainthreadident = ident - self._valuedict[ident] = value - else: + def leave_thread(self, space): + "Notification that the current thread is about to stop." + from pypy.module.thread.os_local import thread_is_stopping + ec = self.get_ec() + if ec is not None: try: - del self._valuedict[ident] - except KeyError: - pass - # update the minicache to prevent it from containing an outdated value - self._mostrecentkey = ident - self._mostrecentvalue = value + thread_is_stopping(ec) + finally: + self.raw_thread_local.set(None) + ident = rthread.get_ident() + try: + del self._valuedict[ident] + except KeyError: + pass + + def get_ec(self): + ec = self.raw_thread_local.get() + if not we_are_translated(): + assert ec is self._valuedict.get(rthread.get_ident(), None) + return ec def signals_enabled(self): - ec = self.getvalue() + ec = self.get_ec() return ec is not None and ec._signals_enabled def enable_signals(self, space): - ec = self.getvalue() + ec = self.get_ec() + assert ec is not None ec._signals_enabled += 1 def disable_signals(self, space): - ec = self.getvalue() + ec = self.get_ec() + assert ec is not None new = ec._signals_enabled - 1 if new < 0: raise wrap_thread_error(space, @@ -69,22 +79,15 @@ def getallvalues(self): return self._valuedict - def leave_thread(self, space): - "Notification that the current thread is about to stop." - from pypy.module.thread.os_local import thread_is_stopping - ec = self.getvalue() - if ec is not None: - try: - thread_is_stopping(ec) - finally: - self.setvalue(None) - def reinit_threads(self, space): "Called in the child process after a fork()" ident = rthread.get_ident() - ec = self.getvalue() + ec = self.get_ec() + assert ec is not None + old_sig = ec._signals_enabled if ident != self._mainthreadident: - ec._signals_enabled += 1 + old_sig += 1 self._cleanup_() self._mainthreadident = ident - self.setvalue(ec) + self._set_ec(ec) + ec._signals_enabled = old_sig diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -314,6 +314,9 @@ t = TranslationContext(config=config) self.t = t # for debugging ann = t.buildannotator() + def _do_startup(): + self.threadlocals.enter_thread(self) + ann.build_types(_do_startup, [], complete_now=False) if func is not None: ann.build_types(func, argtypes, complete_now=False) if seeobj_w: diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -379,6 +379,19 @@ std_wp._annspecialcase_ = 'specialize:argtype(1)' def std_wp_number(self, r, prefix=''): + result = self.result + if len(prefix) == 0 and len(r) >= self.width: + # this is strictly a fast path: no prefix, and no padding + # needed. It is more efficient code both in the non-jit + # case (less testing stuff) and in the jit case (uses only + # result.append(), and no startswith() if not f_sign and + # not f_blank). + if self.f_sign and not r.startswith('-'): + result.append(const('+')) + elif self.f_blank and not r.startswith('-'): + result.append(const(' ')) + result.append(const(r)) + return # add a '+' or ' ' sign if necessary sign = r.startswith('-') if not sign: @@ -391,7 +404,6 @@ # do the padding requested by self.width and the flags, # without building yet another RPython string but directly # by pushing the pad character into self.result - result = self.result padding = self.width - len(r) - len(prefix) if padding <= 0: padding = 0 diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py --- a/rpython/config/translationoption.py +++ b/rpython/config/translationoption.py @@ -22,6 +22,12 @@ IS_64_BITS = sys.maxint > 2147483647 +SUPPORT__THREAD = ( # whether the particular C compiler supports __thread + sys.platform.startswith("linux")) # Linux works + # OS/X doesn't work, because we still target 10.5/10.6 and the + # minimum required version is 10.7. Windows doesn't work. Please + # add other platforms here if it works on them. + MAINDIR = os.path.dirname(os.path.dirname(__file__)) CACHE_DIR = os.path.realpath(os.path.join(MAINDIR, '_cache')) @@ -156,7 +162,8 @@ # portability options BoolOption("no__thread", "don't use __thread for implementing TLS", - default=False, cmdline="--no__thread", negation=False), + default=not SUPPORT__THREAD, cmdline="--no__thread", + negation=False), IntOption("make_jobs", "Specify -j argument to make for compilation" " (C backend only)", cmdline="--make-jobs", default=detect_number_of_processors()), diff --git a/rpython/jit/backend/llsupport/test/ztranslation_test.py b/rpython/jit/backend/llsupport/test/ztranslation_test.py --- a/rpython/jit/backend/llsupport/test/ztranslation_test.py +++ b/rpython/jit/backend/llsupport/test/ztranslation_test.py @@ -4,6 +4,8 @@ from rpython.rlib.jit import PARAMETERS, dont_look_inside from rpython.rlib.jit import promote from rpython.rlib import jit_hooks +from rpython.rlib.objectmodel import keepalive_until_here +from rpython.rlib.rthread import ThreadLocalReference from rpython.jit.backend.detect_cpu import getcpuclass from rpython.jit.backend.test.support import CCompiledMixin from rpython.jit.codewriter.policy import StopAtXPolicy @@ -21,6 +23,7 @@ # - profiler # - full optimizer # - floats neg and abs + # - threadlocalref_get class Frame(object): _virtualizable_ = ['i'] @@ -28,6 +31,10 @@ def __init__(self, i): self.i = i + class Foo(object): + pass + t = ThreadLocalReference(Foo) + @dont_look_inside def myabs(x): return abs(x) @@ -56,6 +63,7 @@ k = myabs(j) if k - abs(j): raise ValueError if k - abs(-j): raise ValueError + if t.get().nine != 9: raise ValueError return chr(total % 253) # from rpython.rtyper.lltypesystem import lltype, rffi @@ -78,8 +86,12 @@ return res # def main(i, j): + foo = Foo() + foo.nine = -(i + j) + t.set(foo) a_char = f(i, j) a_float = libffi_stuff(i, j) + keepalive_until_here(foo) return ord(a_char) * 10 + int(a_float) expected = main(40, -49) res = self.meta_interp(main, [40, -49]) diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -2351,10 +2351,29 @@ assert isinstance(reg, RegLoc) self.mc.MOV_rr(reg.value, ebp.value) + def threadlocalref_get(self, op, resloc): + # this function is only called on Linux + from rpython.jit.codewriter.jitcode import ThreadLocalRefDescr + from rpython.jit.backend.x86 import stmtlocal + assert isinstance(resloc, RegLoc) + effectinfo = op.getdescr().get_extra_info() + assert effectinfo.extradescrs is not None + ed = effectinfo.extradescrs[0] + assert isinstance(ed, ThreadLocalRefDescr) + addr1 = rffi.cast(lltype.Signed, ed.get_tlref_addr()) + addr0 = stmtlocal.threadlocal_base() + addr = addr1 - addr0 + assert rx86.fits_in_32bits(addr) + mc = self.mc + mc.writechar(stmtlocal.SEGMENT_TL) # prefix + mc.MOV_rj(resloc.value, addr) + + genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST genop_list = [Assembler386.not_implemented_op] * rop._LAST genop_llong_list = {} genop_math_list = {} +genop_tlref_list = {} genop_guard_list = [Assembler386.not_implemented_op_guard] * rop._LAST for name, value in Assembler386.__dict__.iteritems(): diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -2,7 +2,7 @@ """ Register allocation scheme. """ -import os +import os, sys from rpython.jit.backend.llsupport import symbolic from rpython.jit.backend.llsupport.descr import (ArrayDescr, CallDescr, unpack_arraydescr, unpack_fielddescr, unpack_interiorfielddescr) @@ -692,6 +692,15 @@ loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(1)) self.perform_math(op, [loc0], loc0) + TLREF_SUPPORT = sys.platform.startswith('linux') + + def _consider_threadlocalref_get(self, op): + if self.TLREF_SUPPORT: + resloc = self.force_allocate_reg(op.result) + self.assembler.threadlocalref_get(op, resloc) + else: + self._consider_call(op) + def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None): # we need to save registers on the stack: # @@ -769,6 +778,8 @@ return if oopspecindex == EffectInfo.OS_MATH_SQRT: return self._consider_math_sqrt(op) + if oopspecindex == EffectInfo.OS_THREADLOCALREF_GET: + return self._consider_threadlocalref_get(op) self._consider_call(op) def consider_call_may_force(self, op, guard_op): diff --git a/rpython/jit/backend/x86/stmtlocal.py b/rpython/jit/backend/x86/stmtlocal.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/x86/stmtlocal.py @@ -0,0 +1,32 @@ +from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.translator.tool.cbuild import ExternalCompilationInfo +from rpython.jit.backend.x86.arch import WORD + +SEGMENT_FS = '\x64' +SEGMENT_GS = '\x65' + +if WORD == 4: + SEGMENT_TL = SEGMENT_GS + _instruction = "movl %%gs:0, %0" +else: + SEGMENT_TL = SEGMENT_FS + _instruction = "movq %%fs:0, %0" + +eci = ExternalCompilationInfo(post_include_bits=[''' +#define RPY_STM_JIT 1 +static long pypy__threadlocal_base(void) +{ + /* XXX ONLY LINUX WITH GCC/CLANG FOR NOW XXX */ + long result; + asm("%s" : "=r"(result)); + return result; +} +''' % _instruction]) + + +threadlocal_base = rffi.llexternal( + 'pypy__threadlocal_base', + [], lltype.Signed, + compilation_info=eci, + _nowrapper=True, + ) #transactionsafe=True) diff --git a/rpython/jit/codewriter/effectinfo.py b/rpython/jit/codewriter/effectinfo.py --- a/rpython/jit/codewriter/effectinfo.py +++ b/rpython/jit/codewriter/effectinfo.py @@ -22,6 +22,7 @@ OS_STR2UNICODE = 2 # "str.str2unicode" OS_SHRINK_ARRAY = 3 # rgc.ll_shrink_array OS_DICT_LOOKUP = 4 # ll_dict_lookup + OS_THREADLOCALREF_GET = 5 # llop.threadlocalref_get # OS_STR_CONCAT = 22 # "stroruni.concat" OS_STR_SLICE = 23 # "stroruni.slice" diff --git a/rpython/jit/codewriter/jitcode.py b/rpython/jit/codewriter/jitcode.py --- a/rpython/jit/codewriter/jitcode.py +++ b/rpython/jit/codewriter/jitcode.py @@ -117,6 +117,26 @@ raise NotImplementedError +class ThreadLocalRefDescr(AbstractDescr): + # A special descr used as the extradescr in a call to a + # threadlocalref_get function. If the backend supports it, + # it can use this 'get_tlref_addr()' to get the address *in the + # current thread* of the thread-local variable. If, on the current + # platform, the "__thread" variables are implemented as an offset + # from some base register (e.g. %fs on x86-64), then the backend will + # immediately substract the current value of the base register. + # This gives an offset from the base register, and this can be + # written down in an assembler instruction to load the "__thread" + # variable from anywhere. + + def __init__(self, opaque_id): + from rpython.rtyper.lltypesystem.lloperation import llop + from rpython.rtyper.lltypesystem import llmemory + def get_tlref_addr(): + return llop.threadlocalref_getaddr(llmemory.Address, opaque_id) + self.get_tlref_addr = get_tlref_addr + + class LiveVarsInfo(object): def __init__(self, live_i, live_r, live_f): self.live_i = live_i diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py --- a/rpython/jit/codewriter/jtransform.py +++ b/rpython/jit/codewriter/jtransform.py @@ -390,11 +390,15 @@ lst.append(v) def handle_residual_call(self, op, extraargs=[], may_call_jitcodes=False, - oopspecindex=EffectInfo.OS_NONE): + oopspecindex=EffectInfo.OS_NONE, + extraeffect=None, + extradescr=None): """A direct_call turns into the operation 'residual_call_xxx' if it is calling a function that we don't want to JIT. The initial args of 'residual_call_xxx' are the function to call, and its calldescr.""" - calldescr = self.callcontrol.getcalldescr(op, oopspecindex=oopspecindex) + calldescr = self.callcontrol.getcalldescr(op, oopspecindex=oopspecindex, + extraeffect=extraeffect, + extradescr=extradescr) op1 = self.rewrite_call(op, 'residual_call', [op.args[0]] + extraargs, calldescr=calldescr) if may_call_jitcodes or self.callcontrol.calldescr_canraise(calldescr): @@ -1903,6 +1907,18 @@ None) return [op0, op1] + def rewrite_op_threadlocalref_get(self, op): + from rpython.jit.codewriter.jitcode import ThreadLocalRefDescr + opaqueid = op.args[0].value + op1 = self.prepare_builtin_call(op, 'threadlocalref_getter', [], + extra=(opaqueid,), + extrakey=opaqueid._obj) + extradescr = ThreadLocalRefDescr(opaqueid) + return self.handle_residual_call(op1, + oopspecindex=EffectInfo.OS_THREADLOCALREF_GET, + extraeffect=EffectInfo.EF_LOOPINVARIANT, + extradescr=[extradescr]) + # ____________________________________________________________ class NotSupported(Exception): diff --git a/rpython/jit/codewriter/support.py b/rpython/jit/codewriter/support.py --- a/rpython/jit/codewriter/support.py +++ b/rpython/jit/codewriter/support.py @@ -712,6 +712,11 @@ build_ll_1_raw_free_no_track_allocation = ( build_raw_free_builder(track_allocation=False)) + def build_ll_0_threadlocalref_getter(opaqueid): + def _ll_0_threadlocalref_getter(): + return llop.threadlocalref_get(rclass.OBJECTPTR, opaqueid) + return _ll_0_threadlocalref_getter + def _ll_1_weakref_create(obj): return llop.weakref_create(llmemory.WeakRefPtr, obj) diff --git a/rpython/jit/codewriter/test/test_jtransform.py b/rpython/jit/codewriter/test/test_jtransform.py --- a/rpython/jit/codewriter/test/test_jtransform.py +++ b/rpython/jit/codewriter/test/test_jtransform.py @@ -147,6 +147,7 @@ EI.OS_UNIEQ_LENGTHOK: ([PUNICODE, PUNICODE], INT), EI.OS_RAW_MALLOC_VARSIZE_CHAR: ([INT], ARRAYPTR), EI.OS_RAW_FREE: ([ARRAYPTR], lltype.Void), + EI.OS_THREADLOCALREF_GET: ([], rclass.OBJECTPTR), } argtypes = argtypes[oopspecindex] assert argtypes[0] == [v.concretetype for v in op.args[1:]] @@ -157,6 +158,8 @@ assert extraeffect == EI.EF_CAN_RAISE elif oopspecindex == EI.OS_RAW_FREE: assert extraeffect == EI.EF_CANNOT_RAISE + elif oopspecindex == EI.OS_THREADLOCALREF_GET: + assert extraeffect == EI.EF_LOOPINVARIANT else: assert extraeffect == EI.EF_ELIDABLE_CANNOT_RAISE return 'calldescr-%d' % oopspecindex @@ -1300,6 +1303,23 @@ assert op1.result is None assert op2 is None +def test_threadlocalref_get(): + from rpython.rtyper.lltypesystem import rclass + from rpython.rlib.rthread import ThreadLocalReference + OS_THREADLOCALREF_GET = effectinfo.EffectInfo.OS_THREADLOCALREF_GET + class Foo: pass + t = ThreadLocalReference(Foo) + v2 = varoftype(rclass.OBJECTPTR) + c_opaqueid = const(t.opaque_id) + op = SpaceOperation('threadlocalref_get', [c_opaqueid], v2) + tr = Transformer(FakeCPU(), FakeBuiltinCallControl()) + op0 = tr.rewrite_operation(op) + assert op0.opname == 'residual_call_r_r' + assert op0.args[0].value == 'threadlocalref_getter' # pseudo-function as str + assert op0.args[1] == ListOfKind("ref", []) + assert op0.args[2] == 'calldescr-%d' % OS_THREADLOCALREF_GET + assert op0.result == v2 + def test_unknown_operation(): op = SpaceOperation('foobar', [], varoftype(lltype.Void)) tr = Transformer() diff --git a/rpython/jit/metainterp/test/test_string.py b/rpython/jit/metainterp/test/test_string.py --- a/rpython/jit/metainterp/test/test_string.py +++ b/rpython/jit/metainterp/test/test_string.py @@ -688,7 +688,9 @@ return n res = self.meta_interp(f, [10], backendopt=True) assert res == 0 - self.check_resops(call=2) # (ll_shrink_array) * 2 unroll + self.check_resops(call=6, # (ll_append_res0, ll_append_0_2, ll_build) + # * 2 unroll + cond_call=0) def test_stringbuilder_append_len2_2(self): jitdriver = JitDriver(reds=['n', 'str1'], greens=[]) @@ -708,7 +710,8 @@ return n res = self.meta_interp(f, [10], backendopt=True) assert res == 0 - self.check_resops(call=2) # (ll_shrink_array) * 2 unroll + self.check_resops(call=4, # (ll_append_res0, ll_build) * 2 unroll + cond_call=0) def test_stringbuilder_append_slice_1(self): jitdriver = JitDriver(reds=['n'], greens=[]) @@ -724,8 +727,8 @@ return n res = self.meta_interp(f, [10], backendopt=True) assert res == 0 - self.check_resops(call=2, # (ll_shrink_array) * 2 unroll - copyunicodecontent=4) + self.check_resops(call=6, cond_call=0, + copyunicodecontent=0) def test_stringbuilder_append_slice_2(self): jitdriver = JitDriver(reds=['n'], greens=[]) @@ -751,12 +754,14 @@ while n > 0: jitdriver.jit_merge_point(n=n) sb = UnicodeBuilder() - sb.append_multiple_char(u"x", 3) + sb.append_multiple_char(u"x", 5) s = sb.build() - if len(s) != 3: raise ValueError + if len(s) != 5: raise ValueError if s[0] != u"x": raise ValueError if s[1] != u"x": raise ValueError if s[2] != u"x": raise ValueError + if s[3] != u"x": raise ValueError + if s[4] != u"x": raise ValueError n -= 1 return n res = self.meta_interp(f, [10], backendopt=True) @@ -770,19 +775,17 @@ while n > 0: jitdriver.jit_merge_point(n=n) sb = UnicodeBuilder() - sb.append_multiple_char(u"x", 5) + sb.append_multiple_char(u"x", 35) s = sb.build() - if len(s) != 5: raise ValueError - if s[0] != u"x": raise ValueError - if s[1] != u"x": raise ValueError - if s[2] != u"x": raise ValueError - if s[3] != u"x": raise ValueError - if s[4] != u"x": raise ValueError + if len(s) != 35: raise ValueError + for c in s: + if c != u"x": + raise ValueError n -= 1 return n res = self.meta_interp(f, [10], backendopt=True) assert res == 0 - self.check_resops(call=4) # (append, build) * 2 unroll + self.check_resops(call=4) # (_ll_append_multiple_char, build) * 2 def test_stringbuilder_bug1(self): jitdriver = JitDriver(reds=['n', 's1'], greens=[]) diff --git a/rpython/jit/metainterp/test/test_threadlocal.py b/rpython/jit/metainterp/test/test_threadlocal.py new file mode 100644 --- /dev/null +++ b/rpython/jit/metainterp/test/test_threadlocal.py @@ -0,0 +1,30 @@ +import py +from rpython.jit.metainterp.test.support import LLJitMixin +from rpython.rlib.rthread import ThreadLocalReference +from rpython.rlib.jit import dont_look_inside + + +class ThreadLocalTest(object): + + def test_threadlocalref_get(self): + class Foo: + pass + t = ThreadLocalReference(Foo) + x = Foo() + + @dont_look_inside + def setup(): + t.set(x) + + def f(): + setup() + if t.get() is x: + return 42 + return -666 + + res = self.interp_operations(f, []) + assert res == 42 + + +class TestLLtype(ThreadLocalTest, LLJitMixin): + pass diff --git a/rpython/rlib/rfile.py b/rpython/rlib/rfile.py --- a/rpython/rlib/rfile.py +++ b/rpython/rlib/rfile.py @@ -35,7 +35,7 @@ FILE = lltype.Struct('FILE') # opaque type maybe c_open = llexternal('fopen', [rffi.CCHARP, rffi.CCHARP], lltype.Ptr(FILE)) -c_close = llexternal('fclose', [lltype.Ptr(FILE)], rffi.INT) +c_close = llexternal('fclose', [lltype.Ptr(FILE)], rffi.INT, releasegil=False) c_fwrite = llexternal('fwrite', [rffi.CCHARP, rffi.SIZE_T, rffi.SIZE_T, lltype.Ptr(FILE)], rffi.SIZE_T) c_fread = llexternal('fread', [rffi.CCHARP, rffi.SIZE_T, rffi.SIZE_T, @@ -57,7 +57,7 @@ rffi.CCHARP) c_popen = llexternal('popen', [rffi.CCHARP, rffi.CCHARP], lltype.Ptr(FILE)) -c_pclose = llexternal('pclose', [lltype.Ptr(FILE)], rffi.INT) +c_pclose = llexternal('pclose', [lltype.Ptr(FILE)], rffi.INT, releasegil=False) BASE_BUF_SIZE = 4096 BASE_LINE_SIZE = 100 diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py --- a/rpython/rlib/rthread.py +++ b/rpython/rlib/rthread.py @@ -272,3 +272,65 @@ llop.gc_thread_after_fork(lltype.Void, result_of_fork, opaqueaddr) else: assert opaqueaddr == llmemory.NULL + +# ____________________________________________________________ +# +# Thread-locals. Only for references that change "not too often" -- +# for now, the JIT compiles get() as a loop-invariant, so basically +# don't change them. +# KEEP THE REFERENCE ALIVE, THE GC DOES NOT FOLLOW THEM SO FAR! +# We use _make_sure_does_not_move() to make sure the pointer will not move. + +ecitl = ExternalCompilationInfo( + includes = ['src/threadlocal.h'], + separate_module_files = [translator_c_dir / 'src' / 'threadlocal.c']) +ensure_threadlocal = rffi.llexternal_use_eci(ecitl) + +class ThreadLocalReference(object): + _COUNT = 1 + OPAQUEID = lltype.OpaqueType("ThreadLocalRef", + hints={"threadlocalref": True, + "external": "C", + "c_name": "RPyThreadStaticTLS"}) + + def __init__(self, Cls): + "NOT_RPYTHON: must be prebuilt" + import thread + self.Cls = Cls + self.local = thread._local() # <- NOT_RPYTHON + unique_id = ThreadLocalReference._COUNT + ThreadLocalReference._COUNT += 1 + opaque_id = lltype.opaqueptr(ThreadLocalReference.OPAQUEID, + 'tlref%d' % unique_id) + self.opaque_id = opaque_id + + def get(): + if we_are_translated(): + from rpython.rtyper.lltypesystem import rclass + from rpython.rtyper.annlowlevel import cast_base_ptr_to_instance + ptr = llop.threadlocalref_get(rclass.OBJECTPTR, opaque_id) + return cast_base_ptr_to_instance(Cls, ptr) + else: + return getattr(self.local, 'value', None) + + @jit.dont_look_inside + def set(value): + assert isinstance(value, Cls) or value is None + if we_are_translated(): + from rpython.rtyper.annlowlevel import cast_instance_to_base_ptr + from rpython.rlib.rgc import _make_sure_does_not_move + from rpython.rlib.objectmodel import running_on_llinterp + ptr = cast_instance_to_base_ptr(value) + if not running_on_llinterp: + gcref = lltype.cast_opaque_ptr(llmemory.GCREF, ptr) + _make_sure_does_not_move(gcref) + llop.threadlocalref_set(lltype.Void, opaque_id, ptr) + ensure_threadlocal() + else: + self.local.value = value + + self.get = get + self.set = set + + def _freeze_(self): + return True diff --git a/rpython/rlib/streamio.py b/rpython/rlib/streamio.py --- a/rpython/rlib/streamio.py +++ b/rpython/rlib/streamio.py @@ -37,7 +37,7 @@ import os, sys, errno from rpython.rlib.objectmodel import specialize, we_are_translated from rpython.rlib.rarithmetic import r_longlong, intmask -from rpython.rlib import rposix +from rpython.rlib import rposix, nonconst from rpython.rlib.rstring import StringBuilder from os import O_RDONLY, O_WRONLY, O_RDWR, O_CREAT, O_TRUNC, O_APPEND @@ -159,6 +159,8 @@ stream = TextInputFilter(stream) elif not binary and os.linesep == '\r\n': stream = TextCRLFFilter(stream) + if nonconst.NonConstant(False): + stream.flush_buffers() # annotation workaround for untranslated tests return stream diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py --- a/rpython/rlib/test/test_rstring.py +++ b/rpython/rlib/test/test_rstring.py @@ -239,6 +239,7 @@ res = res and split('a//b//c//d', '//') == ['a', 'b', 'c', 'd'] res = res and split(' a\ta\na b') == ['a', 'a', 'a', 'b'] res = res and split('a//b//c//d', '//', 2) == ['a', 'b', 'c//d'] + res = res and split('abcd,efghi', ',') == ['abcd', 'efghi'] res = res and split(u'a//b//c//d', u'//') == [u'a', u'b', u'c', u'd'] res = res and split(u'endcase test', u'test') == [u'endcase ', u''] res = res and rsplit('a|b|c|d', '|', 2) == ['a|b', 'c', 'd'] diff --git a/rpython/rlib/test/test_rthread.py b/rpython/rlib/test/test_rthread.py --- a/rpython/rlib/test/test_rthread.py +++ b/rpython/rlib/test/test_rthread.py @@ -1,4 +1,4 @@ -import gc +import gc, time from rpython.rlib.rthread import * from rpython.translator.c.test.test_boehm import AbstractGCTestClass from rpython.rtyper.lltypesystem import lltype, rffi @@ -29,6 +29,23 @@ else: py.test.fail("Did not raise") +def test_tlref_untranslated(): + class FooBar(object): + pass + t = ThreadLocalReference(FooBar) + results = [] + def subthread(): + x = FooBar() + results.append(t.get() is None) + t.set(x) + results.append(t.get() is x) + time.sleep(0.2) + results.append(t.get() is x) + for i in range(5): + start_new_thread(subthread, ()) + time.sleep(0.5) + assert results == [True] * 15 + class AbstractThreadTests(AbstractGCTestClass): use_threads = True @@ -198,6 +215,20 @@ res = fn() assert res >= 0.95 + def test_tlref(self): + class FooBar(object): + pass + t = ThreadLocalReference(FooBar) + def f(): + x1 = FooBar() + t.set(x1) + import gc; gc.collect() + assert t.get() is x1 + return 42 + fn = self.getcompiled(f, []) + res = fn() + assert res == 42 + #class TestRunDirectly(AbstractThreadTests): # def getcompiled(self, f, argtypes): # return f @@ -208,4 +239,4 @@ gcpolicy = 'boehm' class TestUsingFramework(AbstractThreadTests): - gcpolicy = 'generation' + gcpolicy = 'minimark' diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py --- a/rpython/rtyper/llinterp.py +++ b/rpython/rtyper/llinterp.py @@ -919,6 +919,20 @@ def op_stack_current(self): return 0 + def op_threadlocalref_set(self, key, value): + try: + d = self.llinterpreter.tlrefsdict + except AttributeError: + d = self.llinterpreter.tlrefsdict = {} + d[key._obj] = value + + def op_threadlocalref_get(self, key): + d = self.llinterpreter.tlrefsdict + return d[key._obj] + + def op_threadlocalref_getaddr(self, key): + raise NotImplementedError("threadlocalref_getaddr") + # __________________________________________________________ # operations on addresses diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -541,6 +541,10 @@ 'getslice': LLOp(canraise=(Exception,)), 'check_and_clear_exc': LLOp(), + 'threadlocalref_get': LLOp(sideeffects=False), + 'threadlocalref_getaddr': LLOp(sideeffects=False), + 'threadlocalref_set': LLOp(), + # __________ debugging __________ 'debug_view': LLOp(), 'debug_print': LLOp(canrun=True), diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py --- a/rpython/rtyper/lltypesystem/rbuilder.py +++ b/rpython/rtyper/lltypesystem/rbuilder.py @@ -2,6 +2,7 @@ from rpython.rlib.objectmodel import enforceargs from rpython.rlib.rarithmetic import ovfcheck, r_uint, intmask from rpython.rlib.debug import ll_assert +from rpython.rlib.unroll import unrolling_iterable from rpython.rtyper.rptr import PtrRepr from rpython.rtyper.lltypesystem import lltype, rffi, rstr from rpython.rtyper.lltypesystem.lltype import staticAdtMethod, nullptr @@ -34,62 +35,15 @@ # ------------------------------------------------------------ +def dont_inline(func): + func._dont_inline_ = True + return func + def always_inline(func): func._always_inline_ = True return func -def new_grow_funcs(name, mallocfn): - - @enforceargs(None, int) - def stringbuilder_grow(ll_builder, needed): - try: - needed = ovfcheck(needed + ll_builder.total_size) - needed = ovfcheck(needed + 63) & ~63 - total_size = ll_builder.total_size + needed - except OverflowError: - raise MemoryError - # - new_string = mallocfn(needed) - # - PIECE = lltype.typeOf(ll_builder.extra_pieces).TO - old_piece = lltype.malloc(PIECE) - old_piece.buf = ll_builder.current_buf - old_piece.prev_piece = ll_builder.extra_pieces - ll_assert(bool(old_piece.buf), "no buf??") - ll_builder.current_buf = new_string - ll_builder.current_pos = 0 - ll_builder.current_end = needed - ll_builder.total_size = total_size - ll_builder.extra_pieces = old_piece - - def stringbuilder_append_overflow(ll_builder, ll_str, size): - # First, the part that still fits in the current piece - part1 = ll_builder.current_end - ll_builder.current_pos - start = ll_builder.skip - ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, - start, ll_builder.current_pos, - part1) - ll_builder.skip += part1 - stringbuilder_grow(ll_builder, size - part1) - - def stringbuilder_append_overflow_2(ll_builder, char0): - # Overflow when writing two chars. There are two cases depending - # on whether one char still fits or not. - if ll_builder.current_pos < ll_builder.current_end: - ll_builder.current_buf.chars[ll_builder.current_pos] = char0 - ll_builder.skip = 1 - stringbuilder_grow(ll_builder, 2) - - return (func_with_new_name(stringbuilder_grow, '%s_grow' % name), - func_with_new_name(stringbuilder_append_overflow, - '%s_append_overflow' % name), - func_with_new_name(stringbuilder_append_overflow_2, - '%s_append_overflow_2' % name)) - -stringbuilder_grows = new_grow_funcs('stringbuilder', rstr.mallocstr) -unicodebuilder_grows = new_grow_funcs('unicodebuilder', rstr.mallocunicode) - STRINGPIECE = lltype.GcStruct('stringpiece', ('buf', lltype.Ptr(STR)), ('prev_piece', lltype.Ptr(lltype.GcForwardReference()))) @@ -100,12 +54,8 @@ ('current_pos', lltype.Signed), ('current_end', lltype.Signed), ('total_size', lltype.Signed), - ('skip', lltype.Signed), ('extra_pieces', lltype.Ptr(STRINGPIECE)), adtmeths={ - 'grow': staticAdtMethod(stringbuilder_grows[0]), - 'append_overflow': staticAdtMethod(stringbuilder_grows[1]), - 'append_overflow_2': staticAdtMethod(stringbuilder_grows[2]), 'copy_string_contents': staticAdtMethod(rstr.copy_string_contents), 'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_string), 'mallocfn': staticAdtMethod(rstr.mallocstr), @@ -122,18 +72,330 @@ ('current_pos', lltype.Signed), ('current_end', lltype.Signed), ('total_size', lltype.Signed), - ('skip', lltype.Signed), ('extra_pieces', lltype.Ptr(UNICODEPIECE)), adtmeths={ - 'grow': staticAdtMethod(unicodebuilder_grows[0]), - 'append_overflow': staticAdtMethod(unicodebuilder_grows[1]), - 'append_overflow_2': staticAdtMethod(unicodebuilder_grows[2]), 'copy_string_contents': staticAdtMethod(rstr.copy_unicode_contents), 'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_unicode), 'mallocfn': staticAdtMethod(rstr.mallocunicode), } ) +# ------------------------------------------------------------ +# The generic piece of code to append a string (or a slice of it) +# to a builder; it is inlined inside various functions below + + at always_inline +def _ll_append(ll_builder, ll_str, start, size): + pos = ll_builder.current_pos + end = ll_builder.current_end + if (end - pos) < size: + ll_grow_and_append(ll_builder, ll_str, start, size) + else: + ll_builder.current_pos = pos + size + ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, + start, pos, size) + +# ------------------------------------------------------------ +# Logic to grow a builder (by adding a new string to it) + + at dont_inline + at enforceargs(None, int) +def ll_grow_by(ll_builder, needed): + try: + needed = ovfcheck(needed + ll_builder.total_size) + needed = ovfcheck(needed + 63) & ~63 + total_size = ll_builder.total_size + needed + except OverflowError: + raise MemoryError + # + new_string = ll_builder.mallocfn(needed) + # + PIECE = lltype.typeOf(ll_builder.extra_pieces).TO + old_piece = lltype.malloc(PIECE) + old_piece.buf = ll_builder.current_buf + old_piece.prev_piece = ll_builder.extra_pieces + ll_assert(bool(old_piece.buf), "no buf??") + ll_builder.current_buf = new_string + ll_builder.current_pos = 0 + ll_builder.current_end = needed + ll_builder.total_size = total_size + ll_builder.extra_pieces = old_piece + + at dont_inline +def ll_grow_and_append(ll_builder, ll_str, start, size): + # First, the part that still fits in the current piece + part1 = ll_builder.current_end - ll_builder.current_pos + ll_assert(part1 < size, "part1 >= size") + ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, + start, ll_builder.current_pos, + part1) + start += part1 + size -= part1 + # Allocate the new piece + ll_grow_by(ll_builder, size) + ll_assert(ll_builder.current_pos == 0, "current_pos must be 0 after grow()") + # Finally, the second part of the string + ll_builder.current_pos = size + ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, + start, 0, size) + +# ------------------------------------------------------------ +# builder.append() + + at always_inline +def ll_append(ll_builder, ll_str): + if jit.we_are_jitted(): + ll_jit_append(ll_builder, ll_str) + else: + # no-jit case: inline the logic of _ll_append() in the caller + _ll_append(ll_builder, ll_str, 0, len(ll_str.chars)) + + at dont_inline +def ll_jit_append(ll_builder, ll_str): + # jit case: first try special cases for known small lengths + if ll_jit_try_append_slice(ll_builder, ll_str, 0, len(ll_str.chars)): + return + # fall-back to do a residual call to ll_append_res0 + ll_append_res0(ll_builder, ll_str) + + at jit.dont_look_inside +def ll_append_res0(ll_builder, ll_str): + _ll_append(ll_builder, ll_str, 0, len(ll_str.chars)) + +# ------------------------------------------------------------ +# builder.append_char() + + at always_inline +def ll_append_char(ll_builder, char): + jit.conditional_call(ll_builder.current_pos == ll_builder.current_end, + ll_grow_by, ll_builder, 1) + pos = ll_builder.current_pos + ll_builder.current_pos = pos + 1 + ll_builder.current_buf.chars[pos] = char + +# ------------------------------------------------------------ +# builder.append_slice() + + at always_inline +def ll_append_slice(ll_builder, ll_str, start, end): + if jit.we_are_jitted(): + ll_jit_append_slice(ll_builder, ll_str, start, end) + else: + # no-jit case: inline the logic of _ll_append() in the caller + _ll_append(ll_builder, ll_str, start, end - start) + + at dont_inline +def ll_jit_append_slice(ll_builder, ll_str, start, end): + # jit case: first try special cases for known small lengths + if ll_jit_try_append_slice(ll_builder, ll_str, start, end - start): + return + # fall-back to do a residual call to ll_append_res_slice + ll_append_res_slice(ll_builder, ll_str, start, end) + + at jit.dont_look_inside +def ll_append_res_slice(ll_builder, ll_str, start, end): + _ll_append(ll_builder, ll_str, start, end - start) + +# ------------------------------------------------------------ +# Special-casing for the JIT: appending strings (or slices) of +# a known length up to MAX_N. These functions all contain an +# inlined copy of _ll_append(), but with a known small N, gcc +# will compile the copy_string_contents() efficiently. + +MAX_N = 10 + +def make_func_for_size(N): + @jit.dont_look_inside + def ll_append_0(ll_builder, ll_str): + _ll_append(ll_builder, ll_str, 0, N) + ll_append_0 = func_with_new_name(ll_append_0, "ll_append_0_%d" % N) + # + @jit.dont_look_inside + def ll_append_start(ll_builder, ll_str, start): + _ll_append(ll_builder, ll_str, start, N) + ll_append_start = func_with_new_name(ll_append_start, + "ll_append_start_%d" % N) + return ll_append_0, ll_append_start, N + +unroll_func_for_size = unrolling_iterable([make_func_for_size(_n) + for _n in range(2, MAX_N + 1)]) + + at jit.unroll_safe +def ll_jit_try_append_slice(ll_builder, ll_str, start, size): + if jit.isconstant(size): + if size == 0: + return True + # a special case: if the builder's pos and end are still contants + # (typically if the builder is still virtual), and if 'size' fits, + # then we don't need any reallocation and can just set the + # characters in the buffer, in a way that won't force anything. + if (jit.isconstant(ll_builder.current_pos) and + jit.isconstant(ll_builder.current_end) and + size <= (ll_builder.current_end - ll_builder.current_pos) and + size <= 16): + pos = ll_builder.current_pos + buf = ll_builder.current_buf + stop = pos + size + ll_builder.current_pos = stop + while pos < stop: + buf.chars[pos] = ll_str.chars[start] + pos += 1 + start += 1 + return True + # turn appends of length 1 into ll_append_char(). + if size == 1: + ll_append_char(ll_builder, ll_str.chars[start]) + return True + # turn appends of length 2 to 10 into residual calls to + # specialized functions, for the lengths 2 to 10, where + # gcc will optimize the known-length copy_string_contents() + # as much as possible. + for func0, funcstart, for_size in unroll_func_for_size: + if size == for_size: + if jit.isconstant(start) and start == 0: + func0(ll_builder, ll_str) + else: + funcstart(ll_builder, ll_str, start) + return True + return False # use the fall-back path + +# ------------------------------------------------------------ +# builder.append_multiple_char() + + at always_inline +def ll_append_multiple_char(ll_builder, char, times): + if jit.we_are_jitted(): + if ll_jit_try_append_multiple_char(ll_builder, char, times): + return + _ll_append_multiple_char(ll_builder, char, times) + + at jit.dont_look_inside +def _ll_append_multiple_char(ll_builder, char, times): + part1 = ll_builder.current_end - ll_builder.current_pos + if times > part1: + times -= part1 + buf = ll_builder.current_buf + for i in xrange(ll_builder.current_pos, ll_builder.current_end): + buf.chars[i] = char + ll_grow_by(ll_builder, times) + # + buf = ll_builder.current_buf + pos = ll_builder.current_pos + end = pos + times + ll_builder.current_pos = end + for i in xrange(pos, end): + buf.chars[i] = char + + at jit.unroll_safe +def ll_jit_try_append_multiple_char(ll_builder, char, size): + if jit.isconstant(size): + if size == 0: + return True + # a special case: if the builder's pos and end are still contants + # (typically if the builder is still virtual), and if 'size' fits, + # then we don't need any reallocation and can just set the + # characters in the buffer, in a way that won't force anything. + if (jit.isconstant(ll_builder.current_pos) and + jit.isconstant(ll_builder.current_end) and + size <= (ll_builder.current_end - ll_builder.current_pos) and + size <= 16): + pos = ll_builder.current_pos + buf = ll_builder.current_buf + stop = pos + size + ll_builder.current_pos = stop + while pos < stop: + buf.chars[pos] = char + pos += 1 + return True + if size == 1: + ll_append_char(ll_builder, char) + return True + return False # use the fall-back path + +# ------------------------------------------------------------ +# builder.append_charpsize() + + at jit.dont_look_inside +def ll_append_charpsize(ll_builder, charp, size): + part1 = ll_builder.current_end - ll_builder.current_pos + if size > part1: + # First, the part that still fits + ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, + ll_builder.current_pos, part1) + charp = rffi.ptradd(charp, part1) + size -= part1 + ll_grow_by(ll_builder, size) + # + pos = ll_builder.current_pos + ll_builder.current_pos = pos + size + ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, pos, size) + +# ------------------------------------------------------------ +# builder.getlength() + + at always_inline +def ll_getlength(ll_builder): + num_chars_missing_from_last_piece = ( + ll_builder.current_end - ll_builder.current_pos) + return ll_builder.total_size - num_chars_missing_from_last_piece + +# ------------------------------------------------------------ +# builder.build() + + at jit.look_inside_iff(lambda ll_builder: jit.isvirtual(ll_builder)) +def ll_build(ll_builder): + # NB. usually the JIT doesn't look inside this function; it does + # so only in the simplest example where it could virtualize everything + if ll_builder.extra_pieces: + ll_fold_pieces(ll_builder) + elif ll_builder.current_pos != ll_builder.total_size: + ll_shrink_final(ll_builder) + return ll_builder.current_buf + +def ll_shrink_final(ll_builder): + final_size = ll_builder.current_pos + ll_assert(final_size <= ll_builder.total_size, + "final_size > ll_builder.total_size?") + buf = rgc.ll_shrink_array(ll_builder.current_buf, final_size) + ll_builder.current_buf = buf + ll_builder.current_end = final_size + ll_builder.total_size = final_size + +def ll_fold_pieces(ll_builder): + final_size = BaseStringBuilderRepr.ll_getlength(ll_builder) + ll_assert(final_size >= 0, "negative final_size") + extra = ll_builder.extra_pieces + ll_builder.extra_pieces = lltype.nullptr(lltype.typeOf(extra).TO) + # + result = ll_builder.mallocfn(final_size) + piece = ll_builder.current_buf + piece_lgt = ll_builder.current_pos + ll_assert(ll_builder.current_end == len(piece.chars), + "bogus last piece_lgt") + ll_builder.total_size = final_size + ll_builder.current_buf = result + ll_builder.current_pos = final_size + ll_builder.current_end = final_size + + dst = final_size + while True: + dst -= piece_lgt + ll_assert(dst >= 0, "rbuilder build: overflow") + ll_builder.copy_string_contents(piece, result, 0, dst, piece_lgt) + if not extra: + break + piece = extra.buf + piece_lgt = len(piece.chars) + extra = extra.prev_piece + ll_assert(dst == 0, "rbuilder build: underflow") + +# ------------------------------------------------------------ +# bool(builder) + +def ll_bool(ll_builder): + return ll_builder != nullptr(lltype.typeOf(ll_builder).TO) + +# ------------------------------------------------------------ class BaseStringBuilderRepr(AbstractStringBuilderRepr): def empty(self): @@ -145,211 +407,24 @@ # Negative values are mapped to 1280. init_size = intmask(min(r_uint(init_size), r_uint(1280))) ll_builder = lltype.malloc(cls.lowleveltype.TO) - ll_builder.current_buf = cls.mallocfn(init_size) + ll_builder.current_buf = ll_builder.mallocfn(init_size) ll_builder.current_pos = 0 ll_builder.current_end = init_size ll_builder.total_size = init_size return ll_builder - @staticmethod - @always_inline - def ll_append(ll_builder, ll_str): - BaseStringBuilderRepr.ll_append_slice(ll_builder, ll_str, - 0, len(ll_str.chars)) - - @staticmethod - @always_inline - def ll_append_char(ll_builder, char): - jit.conditional_call(ll_builder.current_pos == ll_builder.current_end, - ll_builder.grow, ll_builder, 1) - pos = ll_builder.current_pos - ll_builder.current_pos = pos + 1 - ll_builder.current_buf.chars[pos] = char - - @staticmethod - def ll_append_char_2(ll_builder, char0, char1): - # this is only used by the JIT, when appending a small, known-length - # string. Unlike two consecutive ll_append_char(), it can do that - # with only one conditional_call. - ll_builder.skip = 2 - jit.conditional_call( - ll_builder.current_end - ll_builder.current_pos < 2, - ll_builder.append_overflow_2, ll_builder, char0) - pos = ll_builder.current_pos - buf = ll_builder.current_buf - buf.chars[pos] = char0 - pos += ll_builder.skip - ll_builder.current_pos = pos - buf.chars[pos - 1] = char1 - # NB. this usually writes into buf.chars[current_pos] and - # buf.chars[current_pos+1], except if we had an overflow right - # in the middle of the two chars. In that case, 'skip' is set to - # 1 and only one char is written: the 'char1' overrides the 'char0'. - - @staticmethod - @always_inline - def ll_append_slice(ll_builder, ll_str, start, end): - size = end - start - if jit.we_are_jitted(): - if BaseStringBuilderRepr._ll_jit_try_append_slice( - ll_builder, ll_str, start, size): - return - ll_builder.skip = start - jit.conditional_call( - size > ll_builder.current_end - ll_builder.current_pos, - ll_builder.append_overflow, ll_builder, ll_str, size) - start = ll_builder.skip - size = end - start - pos = ll_builder.current_pos - ll_builder.copy_string_contents(ll_str, ll_builder.current_buf, - start, pos, size) - ll_builder.current_pos = pos + size - - @staticmethod - def _ll_jit_try_append_slice(ll_builder, ll_str, start, size): - if jit.isconstant(size): - if size == 0: - return True - if size == 1: - BaseStringBuilderRepr.ll_append_char(ll_builder, - ll_str.chars[start]) - return True - if size == 2: - BaseStringBuilderRepr.ll_append_char_2(ll_builder, - ll_str.chars[start], - ll_str.chars[start + 1]) - return True - return False # use the fall-back path - - @staticmethod - @always_inline - def ll_append_multiple_char(ll_builder, char, times): - if jit.we_are_jitted(): - if BaseStringBuilderRepr._ll_jit_try_append_multiple_char( - ll_builder, char, times): - return - BaseStringBuilderRepr._ll_append_multiple_char(ll_builder, char, times) - - @staticmethod - @jit.dont_look_inside - def _ll_append_multiple_char(ll_builder, char, times): - part1 = ll_builder.current_end - ll_builder.current_pos - if times > part1: - times -= part1 - buf = ll_builder.current_buf - for i in xrange(ll_builder.current_pos, ll_builder.current_end): - buf.chars[i] = char - ll_builder.grow(ll_builder, times) - # - buf = ll_builder.current_buf - pos = ll_builder.current_pos - end = pos + times - ll_builder.current_pos = end - for i in xrange(pos, end): - buf.chars[i] = char - - @staticmethod - def _ll_jit_try_append_multiple_char(ll_builder, char, size): - if jit.isconstant(size): - if size == 0: - return True - if size == 1: - BaseStringBuilderRepr.ll_append_char(ll_builder, char) - return True - if size == 2: - BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char) - return True - if size == 3: - BaseStringBuilderRepr.ll_append_char(ll_builder, char) - BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char) - return True - if size == 4: - BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char) - BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char) - return True - return False # use the fall-back path - - @staticmethod - @jit.dont_look_inside - def ll_append_charpsize(ll_builder, charp, size): - part1 = ll_builder.current_end - ll_builder.current_pos - if size > part1: - # First, the part that still fits - ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, - ll_builder.current_pos, part1) - charp = rffi.ptradd(charp, part1) - size -= part1 - ll_builder.grow(ll_builder, size) - # - pos = ll_builder.current_pos - ll_builder.current_pos = pos + size - ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, pos, size) - - @staticmethod - @always_inline - def ll_getlength(ll_builder): - num_chars_missing_from_last_piece = ( - ll_builder.current_end - ll_builder.current_pos) - return ll_builder.total_size - num_chars_missing_from_last_piece - - @staticmethod - @jit.look_inside_iff(lambda ll_builder: jit.isvirtual(ll_builder)) - def ll_build(ll_builder): - # NB. usually the JIT doesn't look inside this function; it does - # so only in the simplest example where it could virtualize everything - if ll_builder.extra_pieces: - BaseStringBuilderRepr._ll_fold_pieces(ll_builder) - elif ll_builder.current_pos != ll_builder.total_size: - BaseStringBuilderRepr._ll_shrink_final(ll_builder) - return ll_builder.current_buf - - @staticmethod - def _ll_shrink_final(ll_builder): - final_size = ll_builder.current_pos - ll_assert(final_size <= ll_builder.total_size, - "final_size > ll_builder.total_size?") - buf = rgc.ll_shrink_array(ll_builder.current_buf, final_size) - ll_builder.current_buf = buf - ll_builder.current_end = final_size - ll_builder.total_size = final_size - - @staticmethod - def _ll_fold_pieces(ll_builder): - final_size = BaseStringBuilderRepr.ll_getlength(ll_builder) - ll_assert(final_size >= 0, "negative final_size") - extra = ll_builder.extra_pieces - ll_builder.extra_pieces = lltype.nullptr(lltype.typeOf(extra).TO) - # - result = ll_builder.mallocfn(final_size) - piece = ll_builder.current_buf - piece_lgt = ll_builder.current_pos - ll_assert(ll_builder.current_end == len(piece.chars), - "bogus last piece_lgt") - ll_builder.total_size = final_size - ll_builder.current_buf = result - ll_builder.current_pos = final_size - ll_builder.current_end = final_size - - dst = final_size - while True: - dst -= piece_lgt - ll_assert(dst >= 0, "rbuilder build: overflow") - ll_builder.copy_string_contents(piece, result, 0, dst, piece_lgt) - if not extra: - break - piece = extra.buf - piece_lgt = len(piece.chars) - extra = extra.prev_piece - ll_assert(dst == 0, "rbuilder build: underflow") - - @classmethod - def ll_bool(cls, ll_builder): - return ll_builder != nullptr(cls.lowleveltype.TO) + ll_append = staticmethod(ll_append) + ll_append_char = staticmethod(ll_append_char) + ll_append_slice = staticmethod(ll_append_slice) + ll_append_multiple_char = staticmethod(ll_append_multiple_char) + ll_append_charpsize = staticmethod(ll_append_charpsize) + ll_getlength = staticmethod(ll_getlength) + ll_build = staticmethod(ll_build) + ll_bool = staticmethod(ll_bool) class StringBuilderRepr(BaseStringBuilderRepr): lowleveltype = lltype.Ptr(STRINGBUILDER) basetp = STR - mallocfn = staticmethod(rstr.mallocstr) string_repr = string_repr char_repr = char_repr raw_ptr_repr = PtrRepr( @@ -359,7 +434,6 @@ class UnicodeBuilderRepr(BaseStringBuilderRepr): lowleveltype = lltype.Ptr(UNICODEBUILDER) basetp = UNICODE - mallocfn = staticmethod(rstr.mallocunicode) string_repr = unicode_repr char_repr = unichar_repr raw_ptr_repr = PtrRepr( diff --git a/rpython/rtyper/module/ll_os_environ.py b/rpython/rtyper/module/ll_os_environ.py --- a/rpython/rtyper/module/ll_os_environ.py +++ b/rpython/rtyper/module/ll_os_environ.py @@ -60,7 +60,7 @@ # ____________________________________________________________ # Access to the 'environ' external variable - +prefix = '' if sys.platform.startswith('darwin'): CCHARPPP = rffi.CArrayPtr(rffi.CCHARPP) _os_NSGetEnviron = rffi.llexternal( @@ -77,6 +77,7 @@ rffi.CCHARPP, '_environ', eci) get__wenviron, _set__wenviron = rffi.CExternVariable( CWCHARPP, '_wenviron', eci, c_type='wchar_t **') + prefix = '_' else: os_get_environ, _os_set_environ = rffi.CExternVariable( rffi.CCHARPP, 'environ', ExternalCompilationInfo()) @@ -117,7 +118,7 @@ os_getenv = rffi.llexternal('getenv', [rffi.CCHARP], rffi.CCHARP, releasegil=False) -os_putenv = rffi.llexternal('putenv', [rffi.CCHARP], rffi.INT) +os_putenv = rffi.llexternal(prefix + 'putenv', [rffi.CCHARP], rffi.INT) if _WIN32: _wgetenv = rffi.llexternal('_wgetenv', [rffi.CWCHARP], rffi.CWCHARP, compilation_info=eci, releasegil=False) diff --git a/rpython/rtyper/test/test_rbuilder.py b/rpython/rtyper/test/test_rbuilder.py --- a/rpython/rtyper/test/test_rbuilder.py +++ b/rpython/rtyper/test/test_rbuilder.py @@ -28,9 +28,13 @@ def test_simple(self): sb = StringBuilderRepr.ll_new(3) + assert StringBuilderRepr.ll_getlength(sb) == 0 StringBuilderRepr.ll_append_char(sb, 'x') + assert StringBuilderRepr.ll_getlength(sb) == 1 StringBuilderRepr.ll_append(sb, llstr("abc")) + assert StringBuilderRepr.ll_getlength(sb) == 4 StringBuilderRepr.ll_append_slice(sb, llstr("foobar"), 2, 5) + assert StringBuilderRepr.ll_getlength(sb) == 7 StringBuilderRepr.ll_append_multiple_char(sb, 'y', 3) assert StringBuilderRepr.ll_getlength(sb) == 10 s = StringBuilderRepr.ll_build(sb) diff --git a/rpython/translator/c/node.py b/rpython/translator/c/node.py --- a/rpython/translator/c/node.py +++ b/rpython/translator/c/node.py @@ -959,12 +959,30 @@ args.append('0') yield 'RPyOpaque_SETUP_%s(%s);' % (T.tag, ', '.join(args)) +class ThreadLocalRefOpaqueNode(ContainerNode): + nodekind = 'tlrefopaque' From noreply at buildbot.pypy.org Mon Jun 23 22:09:27 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 23 Jun 2014 22:09:27 +0200 (CEST) Subject: [pypy-commit] pypy ufuncapi: cpyext now returns a ufunc, but calling the returned function segfaults Message-ID: <20140623200927.E072E1C30A6@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: ufuncapi Changeset: r72181:f7a99cbe1f1e Date: 2014-06-23 23:09 +0300 http://bitbucket.org/pypy/pypy/changeset/f7a99cbe1f1e/ Log: cpyext now returns a ufunc, but calling the returned function segfaults diff --git a/pypy/module/cpyext/include/numpy/__ufunc_api.h b/pypy/module/cpyext/include/numpy/__ufunc_api.h new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/include/numpy/__ufunc_api.h @@ -0,0 +1,328 @@ + +#ifdef _UMATHMODULE + +#ifdef NPY_ENABLE_SEPARATE_COMPILATION +extern NPY_NO_EXPORT PyTypeObject PyUFunc_Type; +#else +NPY_NO_EXPORT PyTypeObject PyUFunc_Type; +#endif + +#ifdef NPY_ENABLE_SEPARATE_COMPILATION + extern NPY_NO_EXPORT PyTypeObject PyUFunc_Type; +#else + NPY_NO_EXPORT PyTypeObject PyUFunc_Type; +#endif + +NPY_NO_EXPORT PyObject * PyUFunc_FromFuncAndData \ + (PyUFuncGenericFunction *, void **, char *, int, int, int, int, char *, char *, int); +NPY_NO_EXPORT int PyUFunc_RegisterLoopForType \ + (PyUFuncObject *, int, PyUFuncGenericFunction, int *, void *); +NPY_NO_EXPORT int PyUFunc_GenericFunction \ + (PyUFuncObject *, PyObject *, PyObject *, PyArrayObject **); +NPY_NO_EXPORT void PyUFunc_f_f_As_d_d \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_d_d \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_f_f \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_g_g \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_F_F_As_D_D \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_F_F \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_D_D \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_G_G \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_O_O \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_ff_f_As_dd_d \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_ff_f \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_dd_d \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_gg_g \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_FF_F_As_DD_D \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_DD_D \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_FF_F \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_GG_G \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_OO_O \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_O_O_method \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_OO_O_method \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_On_Om \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT int PyUFunc_GetPyValues \ + (char *, int *, int *, PyObject **); +NPY_NO_EXPORT int PyUFunc_checkfperr \ + (int, PyObject *, int *); +NPY_NO_EXPORT void PyUFunc_clearfperr \ + (void); +NPY_NO_EXPORT int PyUFunc_getfperr \ + (void); +NPY_NO_EXPORT int PyUFunc_handlefperr \ + (int, PyObject *, int, int *); +NPY_NO_EXPORT int PyUFunc_ReplaceLoopBySignature \ + (PyUFuncObject *, PyUFuncGenericFunction, int *, PyUFuncGenericFunction *); +NPY_NO_EXPORT PyObject * PyUFunc_FromFuncAndDataAndSignature \ + (PyUFuncGenericFunction *, void **, char *, int, int, int, int, char *, char *, int, const char *); +NPY_NO_EXPORT int PyUFunc_SetUsesArraysAsData \ + (void **, size_t); +NPY_NO_EXPORT void PyUFunc_e_e \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_e_e_As_f_f \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_e_e_As_d_d \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_ee_e \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_ee_e_As_ff_f \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT void PyUFunc_ee_e_As_dd_d \ + (char **, npy_intp *, npy_intp *, void *); +NPY_NO_EXPORT int PyUFunc_DefaultTypeResolver \ + (PyUFuncObject *, NPY_CASTING, PyArrayObject **, PyObject *, PyArray_Descr **); +NPY_NO_EXPORT int PyUFunc_ValidateCasting \ + (PyUFuncObject *, NPY_CASTING, PyArrayObject **, PyArray_Descr **); +NPY_NO_EXPORT int PyUFunc_RegisterLoopForDescr \ + (PyUFuncObject *, PyArray_Descr *, PyUFuncGenericFunction, PyArray_Descr **, void *); + +#else + +#if defined(PY_UFUNC_UNIQUE_SYMBOL) +#define PyUFunc_API PY_UFUNC_UNIQUE_SYMBOL +#endif + +#if defined(NO_IMPORT) || defined(NO_IMPORT_UFUNC) +extern void **PyUFunc_API; +#else +#if defined(PY_UFUNC_UNIQUE_SYMBOL) +void **PyUFunc_API; +#else +static void **PyUFunc_API=NULL; +#endif +#endif + +#define PyUFunc_Type (*(PyTypeObject *)PyUFunc_API[0]) +#define PyUFunc_FromFuncAndData \ + (*(PyObject * (*)(PyUFuncGenericFunction *, void **, char *, int, int, int, int, char *, char *, int)) \ + PyUFunc_API[1]) +#define PyUFunc_RegisterLoopForType \ + (*(int (*)(PyUFuncObject *, int, PyUFuncGenericFunction, int *, void *)) \ + PyUFunc_API[2]) +#define PyUFunc_GenericFunction \ + (*(int (*)(PyUFuncObject *, PyObject *, PyObject *, PyArrayObject **)) \ + PyUFunc_API[3]) +#define PyUFunc_f_f_As_d_d \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[4]) +#define PyUFunc_d_d \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[5]) +#define PyUFunc_f_f \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[6]) +#define PyUFunc_g_g \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[7]) +#define PyUFunc_F_F_As_D_D \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[8]) +#define PyUFunc_F_F \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[9]) +#define PyUFunc_D_D \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[10]) +#define PyUFunc_G_G \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[11]) +#define PyUFunc_O_O \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[12]) +#define PyUFunc_ff_f_As_dd_d \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[13]) +#define PyUFunc_ff_f \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[14]) +#define PyUFunc_dd_d \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[15]) +#define PyUFunc_gg_g \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[16]) +#define PyUFunc_FF_F_As_DD_D \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[17]) +#define PyUFunc_DD_D \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[18]) +#define PyUFunc_FF_F \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[19]) +#define PyUFunc_GG_G \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[20]) +#define PyUFunc_OO_O \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[21]) +#define PyUFunc_O_O_method \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[22]) +#define PyUFunc_OO_O_method \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[23]) +#define PyUFunc_On_Om \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[24]) +#define PyUFunc_GetPyValues \ + (*(int (*)(char *, int *, int *, PyObject **)) \ + PyUFunc_API[25]) +#define PyUFunc_checkfperr \ + (*(int (*)(int, PyObject *, int *)) \ + PyUFunc_API[26]) +#define PyUFunc_clearfperr \ + (*(void (*)(void)) \ + PyUFunc_API[27]) +#define PyUFunc_getfperr \ + (*(int (*)(void)) \ + PyUFunc_API[28]) +#define PyUFunc_handlefperr \ + (*(int (*)(int, PyObject *, int, int *)) \ + PyUFunc_API[29]) +#define PyUFunc_ReplaceLoopBySignature \ + (*(int (*)(PyUFuncObject *, PyUFuncGenericFunction, int *, PyUFuncGenericFunction *)) \ + PyUFunc_API[30]) +#define PyUFunc_FromFuncAndDataAndSignature \ + (*(PyObject * (*)(PyUFuncGenericFunction *, void **, char *, int, int, int, int, char *, char *, int, const char *)) \ + PyUFunc_API[31]) +#define PyUFunc_SetUsesArraysAsData \ + (*(int (*)(void **, size_t)) \ + PyUFunc_API[32]) +#define PyUFunc_e_e \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[33]) +#define PyUFunc_e_e_As_f_f \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[34]) +#define PyUFunc_e_e_As_d_d \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[35]) +#define PyUFunc_ee_e \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[36]) +#define PyUFunc_ee_e_As_ff_f \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[37]) +#define PyUFunc_ee_e_As_dd_d \ + (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \ + PyUFunc_API[38]) +#define PyUFunc_DefaultTypeResolver \ + (*(int (*)(PyUFuncObject *, NPY_CASTING, PyArrayObject **, PyObject *, PyArray_Descr **)) \ + PyUFunc_API[39]) +#define PyUFunc_ValidateCasting \ + (*(int (*)(PyUFuncObject *, NPY_CASTING, PyArrayObject **, PyArray_Descr **)) \ + PyUFunc_API[40]) +#define PyUFunc_RegisterLoopForDescr \ + (*(int (*)(PyUFuncObject *, PyArray_Descr *, PyUFuncGenericFunction, PyArray_Descr **, void *)) \ + PyUFunc_API[41]) + +static int +_import_umath(void) +{ + PyObject *numpy = PyImport_ImportModule("numpy.core.umath"); + PyObject *c_api = NULL; + + if (numpy == NULL) { + PyErr_SetString(PyExc_ImportError, "numpy.core.umath failed to import"); + return -1; + } + c_api = PyObject_GetAttrString(numpy, "_UFUNC_API"); + Py_DECREF(numpy); + if (c_api == NULL) { + PyErr_SetString(PyExc_AttributeError, "_UFUNC_API not found"); + return -1; + } + +#if PY_VERSION_HEX >= 0x03000000 + if (!PyCapsule_CheckExact(c_api)) { + PyErr_SetString(PyExc_RuntimeError, "_UFUNC_API is not PyCapsule object"); + Py_DECREF(c_api); + return -1; + } + PyUFunc_API = (void **)PyCapsule_GetPointer(c_api, NULL); +#else + if (!PyCObject_Check(c_api)) { + PyErr_SetString(PyExc_RuntimeError, "_UFUNC_API is not PyCObject object"); + Py_DECREF(c_api); + return -1; + } + PyUFunc_API = (void **)PyCObject_AsVoidPtr(c_api); +#endif + Py_DECREF(c_api); + if (PyUFunc_API == NULL) { + PyErr_SetString(PyExc_RuntimeError, "_UFUNC_API is NULL pointer"); + return -1; + } + return 0; +} + +#if PY_VERSION_HEX >= 0x03000000 +#define NUMPY_IMPORT_UMATH_RETVAL NULL +#else +#define NUMPY_IMPORT_UMATH_RETVAL +#endif + +#define import_umath() \ + do {\ + UFUNC_NOFPE\ + if (_import_umath() < 0) {\ + PyErr_Print();\ + PyErr_SetString(PyExc_ImportError,\ + "numpy.core.umath failed to import");\ + return NUMPY_IMPORT_UMATH_RETVAL;\ + }\ + } while(0) + +#define import_umath1(ret) \ + do {\ + UFUNC_NOFPE\ + if (_import_umath() < 0) {\ + PyErr_Print();\ + PyErr_SetString(PyExc_ImportError,\ + "numpy.core.umath failed to import");\ + return ret;\ + }\ + } while(0) + +#define import_umath2(ret, msg) \ + do {\ + UFUNC_NOFPE\ + if (_import_umath() < 0) {\ + PyErr_Print();\ + PyErr_SetString(PyExc_ImportError, msg);\ + return ret;\ + }\ + } while(0) + +#define import_ufunc() \ + do {\ + UFUNC_NOFPE\ + if (_import_umath() < 0) {\ + PyErr_Print();\ + PyErr_SetString(PyExc_ImportError,\ + "numpy.core.umath failed to import");\ + }\ + } while(0) + +#endif diff --git a/pypy/module/cpyext/include/numpy/ndarraytypes.h b/pypy/module/cpyext/include/numpy/ndarraytypes.h --- a/pypy/module/cpyext/include/numpy/ndarraytypes.h +++ b/pypy/module/cpyext/include/numpy/ndarraytypes.h @@ -38,6 +38,8 @@ #define NPY_NO_DEPRECATED_API NPY_1_8_API_VERSION #undef NPY_1_8_API_VERSION +#define NPY_ENABLE_SEPARATE_COMPILATION 1 +#define NPY_VISIBILITY_HIDDEN #ifdef NPY_ENABLE_SEPARATE_COMPILATION #define NPY_NO_EXPORT NPY_VISIBILITY_HIDDEN diff --git a/pypy/module/cpyext/ndarrayobject.py b/pypy/module/cpyext/ndarrayobject.py --- a/pypy/module/cpyext/ndarrayobject.py +++ b/pypy/module/cpyext/ndarrayobject.py @@ -265,9 +265,9 @@ funcs_w = [None] * ntypes dtypes_w = [None] * ntypes * (nin + nout) for i in range(ntypes): - funcs_w[i] = funcs[i] - print 'function',i,'is',funcs[i], hex(rffi.cast(lltype.Signed, funcs[i])) + funcs_w[i] = space.wrap(funcs[i]) + #print 'function',i,'is',funcs[i], hex(rffi.cast(lltype.Signed, funcs[i])) for i in range(ntypes*(nin+nout)): dtypes_w[i] = get_dtype_cache(space).dtypes_by_num[ord(types[i])] - return ufuncs.ufunc_from_func_and_data_and_signature(funcs_w, data, dtypes_w, - nin, nout, identity, name, doc, check_return, signature) + return ufuncs.frompyfunc(space, space.newlist(funcs_w), nin, nout, dtypes_w, + signature, identity, name, doc) From noreply at buildbot.pypy.org Mon Jun 23 22:47:32 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 23 Jun 2014 22:47:32 +0200 (CEST) Subject: [pypy-commit] pypy default: Fix a latent memory leak: in a cffi callback invoked in an Message-ID: <20140623204732.AA4D11D32C8@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72182:1befc86bfa19 Date: 2014-06-23 22:46 +0200 http://bitbucket.org/pypy/pypy/changeset/1befc86bfa19/ Log: Fix a latent memory leak: in a cffi callback invoked in an "unexpected" thread, always unregister newly registered ExecutionContext. Fixes cffi_tests/test_verify's test_callback_in_thread. diff --git a/pypy/interpreter/miscutils.py b/pypy/interpreter/miscutils.py --- a/pypy/interpreter/miscutils.py +++ b/pypy/interpreter/miscutils.py @@ -17,6 +17,9 @@ def enter_thread(self, space): self._value = space.createexecutioncontext() + def try_enter_thread(self, space): + return False + def signals_enabled(self): return True diff --git a/pypy/module/_cffi_backend/ccallback.py b/pypy/module/_cffi_backend/ccallback.py --- a/pypy/module/_cffi_backend/ccallback.py +++ b/pypy/module/_cffi_backend/ccallback.py @@ -183,9 +183,12 @@ misc._raw_memclear(ll_res, SIZE_OF_FFI_ARG) return # + must_leave = False ec = None + space = callback.space try: - ec = cerrno.get_errno_container(callback.space) + must_leave = space.threadlocals.try_enter_thread(space) + ec = cerrno.get_errno_container(space) cerrno.save_errno_into(ec, e) extra_line = '' try: @@ -206,5 +209,7 @@ except OSError: pass callback.write_error_return_value(ll_res) + if must_leave: + space.threadlocals.leave_thread(space) if ec is not None: cerrno.restore_errno_from(ec) diff --git a/pypy/module/thread/threadlocals.py b/pypy/module/thread/threadlocals.py --- a/pypy/module/thread/threadlocals.py +++ b/pypy/module/thread/threadlocals.py @@ -27,6 +27,12 @@ "Notification that the current thread is about to start running." self._set_ec(space.createexecutioncontext()) + def try_enter_thread(self, space): + if rthread.get_ident() in self._valuedict: + return False + self.enter_thread(space) + return True + def _set_ec(self, ec): ident = rthread.get_ident() if self._mainthreadident == 0 or self._mainthreadident == ident: From noreply at buildbot.pypy.org Tue Jun 24 09:46:46 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 09:46:46 +0200 (CEST) Subject: [pypy-commit] pypy default: Fix issue #1801 Message-ID: <20140624074646.D982C1C333B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72183:1b8316ceb9e6 Date: 2014-06-24 09:45 +0200 http://bitbucket.org/pypy/pypy/changeset/1b8316ceb9e6/ Log: Fix issue #1801 diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py --- a/pypy/interpreter/gateway.py +++ b/pypy/interpreter/gateway.py @@ -895,7 +895,7 @@ "use unwrap_spec(...=WrappedDefault(default))" % ( self._code.identifier, name, defaultval)) defs_w.append(None) - else: + elif name != '__args__' and name != 'args_w': defs_w.append(space.wrap(defaultval)) if self._code._unwrap_spec: UNDEFINED = object() diff --git a/pypy/interpreter/test/test_gateway.py b/pypy/interpreter/test/test_gateway.py --- a/pypy/interpreter/test/test_gateway.py +++ b/pypy/interpreter/test/test_gateway.py @@ -726,6 +726,22 @@ never_called py.test.raises(AssertionError, space.wrap, gateway.interp2app_temp(g)) + def test_unwrap_spec_default_applevel_bug2(self): + space = self.space + def g(space, w_x, w_y=None, __args__=None): + return w_x + w_g = space.wrap(gateway.interp2app_temp(g)) + w_42 = space.call_function(w_g, space.wrap(42)) + assert space.int_w(w_42) == 42 + py.test.raises(gateway.OperationError, space.call_function, w_g) + # + def g(space, w_x, w_y=None, args_w=None): + return w_x + w_g = space.wrap(gateway.interp2app_temp(g)) + w_42 = space.call_function(w_g, space.wrap(42)) + assert space.int_w(w_42) == 42 + py.test.raises(gateway.OperationError, space.call_function, w_g) + def test_interp2app_doc(self): space = self.space def f(space, w_x): diff --git a/pypy/module/_weakref/test/test_weakref.py b/pypy/module/_weakref/test/test_weakref.py --- a/pypy/module/_weakref/test/test_weakref.py +++ b/pypy/module/_weakref/test/test_weakref.py @@ -15,6 +15,10 @@ gc.collect() assert ref() is None + def test_missing_arg(self): + import _weakref + raises(TypeError, _weakref.ref) + def test_callback(self): import _weakref, gc class A(object): From noreply at buildbot.pypy.org Tue Jun 24 10:51:02 2014 From: noreply at buildbot.pypy.org (complx) Date: Tue, 24 Jun 2014 10:51:02 +0200 (CEST) Subject: [pypy-commit] cffi complx/indexrst-edited-online-with-bitbucket-1403535394739: index.rst edited online with Bitbucket Message-ID: <20140624085102.76DC81D24C0@cobra.cs.uni-duesseldorf.de> Author: Kurt Jaeger Branch: complx/indexrst-edited-online-with-bitbucket-1403535394739 Changeset: r1520:4aa308c25af2 Date: 2014-06-23 14:56 +0000 http://bitbucket.org/cffi/cffi/changeset/4aa308c25af2/ Log: index.rst edited online with Bitbucket diff --git a/doc/source/index.rst b/doc/source/index.rst --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -60,7 +60,7 @@ left. It supports CPython 2.6; 2.7; 3.x (tested with 3.2 and 3.3); -and is distrubuted with PyPy 2.0 beta2 or later. +and is distributed with PyPy 2.0 beta2 or later. Its speed is comparable to ctypes on CPython (a bit faster but a higher warm-up time). It is already faster than ctypes on PyPy (1.5x-2x), but not yet From noreply at buildbot.pypy.org Tue Jun 24 10:51:03 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 10:51:03 +0200 (CEST) Subject: [pypy-commit] cffi release-0.8: Merged in complx/cffi/complx/indexrst-edited-online-with-bitbucket-1403535394739 (pull request #42) Message-ID: <20140624085103.BB4931D24C0@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: release-0.8 Changeset: r1521:08a4b8145f90 Date: 2014-06-24 10:51 +0200 http://bitbucket.org/cffi/cffi/changeset/08a4b8145f90/ Log: Merged in complx/cffi/complx/indexrst-edited-online-with- bitbucket-1403535394739 (pull request #42) index.rst edited online with Bitbucket diff --git a/doc/source/index.rst b/doc/source/index.rst --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -60,7 +60,7 @@ left. It supports CPython 2.6; 2.7; 3.x (tested with 3.2 and 3.3); -and is distrubuted with PyPy 2.0 beta2 or later. +and is distributed with PyPy 2.0 beta2 or later. Its speed is comparable to ctypes on CPython (a bit faster but a higher warm-up time). It is already faster than ctypes on PyPy (1.5x-2x), but not yet From noreply at buildbot.pypy.org Tue Jun 24 10:51:43 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 10:51:43 +0200 (CEST) Subject: [pypy-commit] cffi default: Transplant 08a4b8145f90 Message-ID: <20140624085143.B94DE1D24C0@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r1522:5722a31bd60a Date: 2014-06-24 10:51 +0200 http://bitbucket.org/cffi/cffi/changeset/5722a31bd60a/ Log: Transplant 08a4b8145f90 diff --git a/doc/source/index.rst b/doc/source/index.rst --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -63,7 +63,7 @@ left. It supports CPython 2.6; 2.7; 3.x (tested with 3.2 and 3.3); -and is distrubuted with PyPy 2.0 beta2 or later. +and is distributed with PyPy 2.0 beta2 or later. Its speed is comparable to ctypes on CPython (a bit faster but a higher warm-up time). It is already faster than ctypes on PyPy (1.5x-2x), but not yet From noreply at buildbot.pypy.org Tue Jun 24 12:13:45 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 12:13:45 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: Change rpy_fastgil (invert the 0 and 1 values) to be more consistent Message-ID: <20140624101345.409721C05B7@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72184:206fd7ee9cc1 Date: 2014-06-24 11:38 +0200 http://bitbucket.org/pypy/pypy/changeset/206fd7ee9cc1/ Log: Change rpy_fastgil (invert the 0 and 1 values) to be more consistent with the standards of __sync_lock_test_and_set(). diff --git a/rpython/translator/c/src/thread.h b/rpython/translator/c/src/thread.h --- a/rpython/translator/c/src/thread.h +++ b/rpython/translator/c/src/thread.h @@ -28,13 +28,13 @@ void RPyGilYieldThread(void); void RPyGilAcquire(void); -extern void *rpy_fastgil; +extern Signed rpy_fastgil; static inline void RPyGilRelease(void) { - assert(rpy_fastgil == NULL); - rpy_fastgil = (void *)1; + assert(RPY_FASTGIL_HELD(rpy_fastgil)); + rpy_fastgil = 0; } -static inline void *RPyFetchFastGil(void) { +static inline Signed *RPyFetchFastGil(void) { return &rpy_fastgil; } diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c --- a/rpython/translator/c/src/thread_pthread.c +++ b/rpython/translator/c/src/thread_pthread.c @@ -484,35 +484,48 @@ /* Idea: - - "The GIL" is a composite concept. "The GIL is locked" means - that the global variable 'rpy_fastgil' is zero *and* the - 'mutex_gil' is acquired. Conversely, "the GIL is unlocked" means - that rpy_fastgil != 0 *or* mutex_gil is released. It should never - be the case that these two conditions are true at the same time. + - "The GIL" is a composite concept. There are two locks, and "the + GIL is locked" when both are locked. + + - The first lock is a simple global variable 'rpy_fastgil'. With + shadowstack, we use the most portable definition: 0 means unlocked + and != 0 means locked. With asmgcc, 0 means unlocked but only 1 + means locked. A different value means unlocked too, but the value + is used by the JIT to contain the stack top for stack root scanning. + + - The second lock is a regular mutex. In the fast path, it is never + unlocked. Remember that "the GIL is unlocked" means that either + the first or the second lock is unlocked. It should never be the + case that both are unlocked at the same time. - Let's call "thread 1" the thread with the GIL. Whenever it does an - external function call, it sets 'rpy_fastgil' to a non-null value. + external function call, it sets 'rpy_fastgil' to 0 (unlocked). This is the cheapest way to release the GIL. When it returns from - the function call, this thread attempts to atomically reset - rpy_fastgil to zero. In the common case where it works, thread 1 + the function call, this thread attempts to atomically change + 'rpy_fastgil' to 1. In the common case where it works, thread 1 has got the GIL back and so continues to run. - - But "thread 2" is eagerly waiting for thread 1 to become blocked in - some long-running call. About every millisecond it checks if - 'rpy_fastgil' is non-null, by atomically resetting it to zero. - If it was non-null, it means that the GIL was not actually locked, - and thread 2 has now got the GIL. + - Say "thread 2" is eagerly waiting for thread 1 to become blocked in + some long-running call. Regularly, it checks if 'rpy_fastgil' is 0 + and tries to atomically change it to 1. If it succeeds, it means + that the GIL was not previously locked. Thread 2 has now got the GIL. - - If there are more threads, they are really sleeping, waiting on the - 'mutex_gil_stealer' held by thread 2. + - If there are more than 2 threads, the rest is really sleeping by + waiting on the 'mutex_gil_stealer' held by thread 2. - An additional mechanism is used for when thread 1 wants to explicitly yield the GIL to thread 2: it does so by releasing 'mutex_gil' (which is otherwise not released) but keeping the - value of 'rpy_fastgil' to zero. + value of 'rpy_fastgil' to 1. */ -void *rpy_fastgil = NULL; +#ifdef PYPY_USE_ASMGCC +# define RPY_FASTGIL_LOCKED(x) (x == 1) +#else +# define RPY_FASTGIL_LOCKED(x) (x != 0) +#endif + +Signed rpy_fastgil = 1; static pthread_mutex_t mutex_gil_stealer; static pthread_mutex_t mutex_gil; static pthread_once_t mutex_gil_once = PTHREAD_ONCE_INIT; @@ -530,26 +543,6 @@ pthread_once(&mutex_gil_once, &init_mutex_gil); } -static inline void *atomic_xchg(void **ptr, void *value) -{ - void *result; -#if defined(__amd64__) - asm volatile ("xchgq %0, %2 /* automatically locked */" - : "=r"(result) : "0"(value), "m"(*ptr) : "memory"); -#elif defined(__i386__) - asm volatile ("xchgl %0, %2 /* automatically locked */" - : "=r"(result) : "0"(value), "m"(*ptr) : "memory"); -#else - /* requires gcc >= 4.1 */ - while (1) { - result = *ptr; - if (__sync_bool_compare_and_swap(ptr, result, value)) - break; - } -#endif - return result; -} - static inline void timespec_add(struct timespec *t, long incr) { long nsec = t->tv_nsec + incr; @@ -565,12 +558,11 @@ { /* Acquires the GIL. Note: this function saves and restores 'errno'. */ - void *old_fastgil = atomic_xchg(&rpy_fastgil, NULL); + Signed old_fastgil = __sync_lock_test_and_set(&rpy_fastgil, 1); - if (old_fastgil != NULL) { - /* If we get a non-NULL value, it means that no other thread had the - GIL, and the exchange was successful. 'mutex_gil' should still - be locked at this point. + if (!RPY_FASTGIL_LOCKED(old_fastgil)) { + /* The fastgil was not previously locked: success. + 'mutex_gil' should still be locked at this point. */ } else { @@ -602,29 +594,29 @@ /* We arrive here if 'mutex_gil' was recently released and we just relocked it. */ - assert(rpy_fastgil == NULL); - old_fastgil = (void *)1; + old_fastgil = 0; break; } /* Busy-looping here. Try to look again if 'rpy_fastgil' is - non-NULL. + released. */ - if (rpy_fastgil != NULL) { - old_fastgil = atomic_xchg(&rpy_fastgil, NULL); - if (old_fastgil != NULL) { - /* yes, got a non-NULL value! */ + if (!RPY_FASTGIL_LOCKED(rpy_fastgil)) { + old_fastgil = __sync_lock_test_and_set(&rpy_fastgil, 1); + if (!RPY_FASTGIL_LOCKED(old_fastgil)) + /* yes, got a non-held value! Now we hold it. */ break; } } /* Otherwise, loop back. */ } + ASSERT_STATUS(pthread_mutex_unlock(&mutex_gil_stealer)); errno = old_errno; } #ifdef PYPY_USE_ASMGCC - if (old_fastgil != (void *)1) { + if (old_fastgil != 0) { /* this case only occurs from the JIT compiler */ struct pypy_ASM_FRAMEDATA_HEAD0 *new = (struct pypy_ASM_FRAMEDATA_HEAD0 *)old_fastgil; @@ -636,8 +628,9 @@ next->as_prev = new; } #else - assert(old_fastgil == (void *)1); + assert(old_fastgil == 0); #endif + assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); return; } @@ -654,7 +647,7 @@ void RPyGilYieldThread(void) { - assert(rpy_fastgil == NULL); + assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); /* Explicitly release the 'mutex_gil'. */ @@ -662,7 +655,7 @@ ASSERT_STATUS(pthread_mutex_unlock(&mutex_gil)); /* Now nobody has got the GIL, because 'mutex_gil' is released (but - rpy_fastgil is still zero). Call RPyGilAcquire(). It will + rpy_fastgil is still locked). Call RPyGilAcquire(). It will enqueue ourselves at the end of the 'mutex_gil_stealer' queue. If there is no other waiting thread, it will fall through both its pthread_mutex_lock() and pthread_mutex_timedlock() now. From noreply at buildbot.pypy.org Tue Jun 24 12:13:46 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 12:13:46 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: in-progress Message-ID: <20140624101346.84DD21C05B7@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72185:eaa2cbefc0e6 Date: 2014-06-24 12:03 +0200 http://bitbucket.org/pypy/pypy/changeset/eaa2cbefc0e6/ Log: in-progress diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py --- a/rpython/jit/backend/llsupport/assembler.py +++ b/rpython/jit/backend/llsupport/assembler.py @@ -307,11 +307,21 @@ @staticmethod @rgc.no_collect def _reacquire_gil_asmgcc(css, old_rpy_fastgil): - # Only called if rpy_fastgil was reset to a different value - # by another thread or by a callback. See description in - # transator/c/src/thread_pthread.c. - if not old_rpy_fastgil: - # first reacquire the GIL + # Before doing an external call, 'rpy_fastgil' is initialized to + # be equal to css. This function is called if we find out after + # the call that it is no longer equal to css. See description + # in transator/c/src/thread_pthread.c. + + if old_rpy_fastgil == 0: + # this case occurs if some other thread stole the GIL but + # released it again. What occurred here is that we changed + # 'rpy_fastgil' from 0 to 1, thus successfully requiring the + # GIL. + pass + + elif old_rpy_fastgil == 1: + # 'rpy_fastgil' was (and still is) locked by someone else. + # We need to wait for the regular mutex. after = rffi.aroundstate.after if after: after() @@ -326,6 +336,7 @@ oth.prev = asmgcroot.gcrootanchor asmgcroot.gcrootanchor.next = oth next.prev = oth + # similar to trackgcroot.py:pypy_asm_stackwalk, second part: # detach the 'css' from the chained list from rpython.memory.gctransform import asmgcroot @@ -339,14 +350,15 @@ @rgc.no_collect def _reacquire_gil_shadowstack(): # Simplified version of _reacquire_gil_asmgcc(): in shadowstack mode, - # rpy_fastgil contains only 0 or 1, and this must only be called when - # the old value stored in rpy_fastgil was 0. + # 'rpy_fastgil' contains only zero or non-zero, and this is only + # called when the old value stored in 'rpy_fastgil' was non-zero + # (i.e. still locked, must wait with the regular mutex) after = rffi.aroundstate.after if after: after() _REACQGIL0_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void)) - _REACQGIL2_FUNC = lltype.Ptr(lltype.FuncType([rffi.CCHARP, rffi.CCHARP], + _REACQGIL2_FUNC = lltype.Ptr(lltype.FuncType([rffi.CCHARP, lltype.Signed], lltype.Void)) def _build_release_gil(self, gcrootmap): diff --git a/rpython/jit/backend/x86/callbuilder.py b/rpython/jit/backend/x86/callbuilder.py --- a/rpython/jit/backend/x86/callbuilder.py +++ b/rpython/jit/backend/x86/callbuilder.py @@ -98,9 +98,10 @@ from rpython.jit.backend.x86.assembler import heap # if not self.asm._is_asmgcc(): - # the helper takes no argument + # shadowstack: change 'rpy_fastgil' to 0 (it should be + # non-zero right now). self.change_extra_stack_depth = False - css_value = imm(1) + css_value = imm(0) else: from rpython.memory.gctransform import asmgcroot # build a 'css' structure on the stack: 2 words for the linkage, @@ -134,14 +135,14 @@ from rpython.jit.backend.x86.assembler import heap from rpython.jit.backend.x86 import rx86 # - # check if we need to call the reopenstack() function + # check if we need to call the reacqgil() function or not # (to acquiring the GIL, remove the asmgcc head from # the chained list, etc.) mc = self.mc restore_edx = False if not self.asm._is_asmgcc(): css = 0 - css_value = imm(1) + css_value = imm(0) old_value = ecx else: from rpython.memory.gctransform import asmgcroot @@ -158,7 +159,7 @@ old_value = esi mc.LEA_rs(css_value.value, css) # - mc.XOR_rr(old_value.value, old_value.value) + mc.MOV(old_value, imm(1)) if rx86.fits_in_32bits(fastgil): mc.XCHG_rj(old_value.value, fastgil) else: @@ -168,15 +169,15 @@ mc.J_il8(rx86.Conditions['E'], 0) je_location = mc.get_relative_pos() # - # Yes, we need to call the reopenstack() function - self.save_result_value_reacq(restore_edx) + # Yes, we need to call the reacqgil() function + self.save_result_value_reacq() if self.asm._is_asmgcc(): if IS_X86_32: mc.MOV_sr(4, old_value.value) mc.MOV_sr(0, css_value.value) # on X86_64, they are already in the right registers mc.CALL(imm(self.asm.reacqgil_addr)) - self.restore_result_value_reacq(restore_edx) + self.restore_result_value_reacq() # # patch the JE above offset = mc.get_relative_pos() - je_location @@ -198,11 +199,11 @@ #else: # for shadowstack, done for us by _reload_frame_if_necessary() - def save_result_value_reacq(self, restore_edx): + def save_result_value_reacq(self): """Overridden in CallBuilder32 and CallBuilder64""" raise NotImplementedError - def restore_result_value_reacq(self, restore_edx): + def restore_result_value_reacq(self): """Overridden in CallBuilder32 and CallBuilder64""" raise NotImplementedError @@ -273,7 +274,7 @@ else: CallBuilderX86.load_result(self) - def save_result_value_reacq(self, restore_edx): + def save_result_value_reacq(self): # Temporarily save the result value into [ESP+8]. We use "+8" # in order to leave the two initial words free, in case it's needed. # Also note that in this 32-bit case, a long long return value is @@ -285,8 +286,7 @@ # a float or a long long return if self.restype == 'L': self.mc.MOV_sr(8, eax.value) # long long - if not restore_edx: - self.mc.MOV_sr(12, edx.value) + #self.mc.MOV_sr(12, edx.value) -- already done by the caller else: self.mc.FSTPL_s(8) # float return else: @@ -297,7 +297,7 @@ assert self.ressize <= WORD self.mc.MOV_sr(8, eax.value) - def restore_result_value_reacq(self, restore_edx): + def restore_result_value_reacq(self): # Opposite of save_result_value_reacq() if self.ressize == 0: # void return return @@ -305,8 +305,7 @@ # a float or a long long return if self.restype == 'L': self.mc.MOV_rs(eax.value, 8) # long long - if not restore_edx: - self.mc.MOV_rs(edx.value, 12) + #self.mc.MOV_rs(edx.value, 12) -- will be done by the caller else: self.mc.FLDL_s(8) # float return else: @@ -430,7 +429,7 @@ else: CallBuilderX86.load_result(self) - def save_result_value_reacq(self, restore_edx): + def save_result_value_reacq(self): # Temporarily save the result value into [ESP]. if self.ressize == 0: # void return return @@ -448,7 +447,7 @@ assert self.restype == INT self.mc.MOV_sr(0, eax.value) - def restore_result_value_reacq(self, restore_edx): + def restore_result_value_reacq(self): # Opposite of save_result_value_reacq() if self.ressize == 0: # void return return From noreply at buildbot.pypy.org Tue Jun 24 12:13:47 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 12:13:47 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: translation fixes Message-ID: <20140624101347.B301A1C05B7@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72186:1c5642278f0f Date: 2014-06-24 12:13 +0200 http://bitbucket.org/pypy/pypy/changeset/1c5642278f0f/ Log: translation fixes diff --git a/rpython/rlib/rgil.py b/rpython/rlib/rgil.py --- a/rpython/rlib/rgil.py +++ b/rpython/rlib/rgil.py @@ -1,6 +1,7 @@ import py from rpython.conftest import cdir from rpython.translator.tool.cbuild import ExternalCompilationInfo +from rpython.rtyper.lltypesystem import lltype, llmemory, rffi # these functions manipulate directly the GIL, whose definition does not # escape the C code itself @@ -13,6 +14,8 @@ export_symbols = ['RPyGilYieldThread', 'RPyGilRelease', 'RPyGilAcquire', 'RPyFetchFastGil']) +llexternal = rffi.llexternal + gil_yield_thread = llexternal('RPyGilYieldThread', [], lltype.Void, _nowrapper=True, sandboxsafe=True, diff --git a/rpython/translator/c/src/thread.h b/rpython/translator/c/src/thread.h --- a/rpython/translator/c/src/thread.h +++ b/rpython/translator/c/src/thread.h @@ -28,13 +28,19 @@ void RPyGilYieldThread(void); void RPyGilAcquire(void); -extern Signed rpy_fastgil; +#ifdef PYPY_USE_ASMGCC +# define RPY_FASTGIL_LOCKED(x) (x == 1) +#else +# define RPY_FASTGIL_LOCKED(x) (x != 0) +#endif + +extern long rpy_fastgil; static inline void RPyGilRelease(void) { - assert(RPY_FASTGIL_HELD(rpy_fastgil)); + assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); rpy_fastgil = 0; } -static inline Signed *RPyFetchFastGil(void) { +static inline long *RPyFetchFastGil(void) { return &rpy_fastgil; } diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c --- a/rpython/translator/c/src/thread_pthread.c +++ b/rpython/translator/c/src/thread_pthread.c @@ -519,13 +519,7 @@ value of 'rpy_fastgil' to 1. */ -#ifdef PYPY_USE_ASMGCC -# define RPY_FASTGIL_LOCKED(x) (x == 1) -#else -# define RPY_FASTGIL_LOCKED(x) (x != 0) -#endif - -Signed rpy_fastgil = 1; +long rpy_fastgil = 1; static pthread_mutex_t mutex_gil_stealer; static pthread_mutex_t mutex_gil; static pthread_once_t mutex_gil_once = PTHREAD_ONCE_INIT; @@ -558,7 +552,7 @@ { /* Acquires the GIL. Note: this function saves and restores 'errno'. */ - Signed old_fastgil = __sync_lock_test_and_set(&rpy_fastgil, 1); + long old_fastgil = __sync_lock_test_and_set(&rpy_fastgil, 1); if (!RPY_FASTGIL_LOCKED(old_fastgil)) { /* The fastgil was not previously locked: success. @@ -606,7 +600,6 @@ if (!RPY_FASTGIL_LOCKED(old_fastgil)) /* yes, got a non-held value! Now we hold it. */ break; - } } /* Otherwise, loop back. */ } From noreply at buildbot.pypy.org Tue Jun 24 12:28:00 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 12:28:00 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: Translation fix Message-ID: <20140624102800.B41961C333B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72187:9fe8251182d4 Date: 2014-06-24 12:27 +0200 http://bitbucket.org/pypy/pypy/changeset/9fe8251182d4/ Log: Translation fix diff --git a/rpython/jit/backend/llsupport/callbuilder.py b/rpython/jit/backend/llsupport/callbuilder.py --- a/rpython/jit/backend/llsupport/callbuilder.py +++ b/rpython/jit/backend/llsupport/callbuilder.py @@ -1,5 +1,6 @@ from rpython.rlib.clibffi import FFI_DEFAULT_ABI from rpython.rlib import rgil +from rpython.rtyper.lltypesystem import lltype, rffi class AbstractCallBuilder(object): diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -584,11 +584,13 @@ rffi.aroundstate.after = after rffi.aroundstate.enter_callback = after rffi.aroundstate.leave_callback = before - # the 'aroundstate' contains regular function and not ll pointers to them, - # but let's call llhelper() anyway to force their annotation - from rpython.rtyper.annlowlevel import llhelper - llhelper(rffi.AroundFnPtr, before) - llhelper(rffi.AroundFnPtr, after) + # force the annotation of before() and after() + from rpython.rlib.nonconst import NonConstant + if NonConstant(0): + if before: + before() + if after: + after() def is_in_callback(): from rpython.rtyper.lltypesystem import rffi From noreply at buildbot.pypy.org Tue Jun 24 15:42:04 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 15:42:04 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: Fix typos Message-ID: <20140624134204.8A8AC1C023B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72188:a8a8d8d6c823 Date: 2014-06-24 13:55 +0200 http://bitbucket.org/pypy/pypy/changeset/a8a8d8d6c823/ Log: Fix typos diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py --- a/rpython/jit/backend/llsupport/assembler.py +++ b/rpython/jit/backend/llsupport/assembler.py @@ -310,7 +310,7 @@ # Before doing an external call, 'rpy_fastgil' is initialized to # be equal to css. This function is called if we find out after # the call that it is no longer equal to css. See description - # in transator/c/src/thread_pthread.c. + # in translator/c/src/thread_pthread.c. if old_rpy_fastgil == 0: # this case occurs if some other thread stole the GIL but diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -154,11 +154,13 @@ argnames = ', '.join(['a%d' % i for i in range(len(args))]) source = py.code.Source(""" - def call_external_function(%(argnames)s): - aroundstate.before() + def call_external_function(%(argnames)s)e + before = aroundstate.before + if before: before() # NB. it is essential that no exception checking occurs here! res = funcptr(%(argnames)s) - aroundstate.after() + after = aroundstate.after + if after: after() return res """ % locals()) miniglobals = {'aroundstate': aroundstate, @@ -293,7 +295,7 @@ aroundstate.leave_callback() # here we don't hold the GIL any more. As in the wrapper() produced # by llexternal, it is essential that no exception checking occurs - # after the call to leave_calback(). + # after the call to leave_callback(). return result """ % locals()) miniglobals = locals().copy() From noreply at buildbot.pypy.org Tue Jun 24 15:42:05 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 15:42:05 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: Partial revert of some previous changes in this branch: trying to Message-ID: <20140624134205.BA4CF1C023B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72189:b349a2e4c0b5 Date: 2014-06-24 14:14 +0200 http://bitbucket.org/pypy/pypy/changeset/b349a2e4c0b5/ Log: Partial revert of some previous changes in this branch: trying to minimize the changes per branch diff --git a/pypy/goal/targetpypystandalone.py b/pypy/goal/targetpypystandalone.py --- a/pypy/goal/targetpypystandalone.py +++ b/pypy/goal/targetpypystandalone.py @@ -121,21 +121,25 @@ @entrypoint('main', [rffi.CCHARP], c_name='pypy_execute_source') def pypy_execute_source(ll_source): - rffi.aroundstate.enter_callback() + after = rffi.aroundstate.after + if after: after() source = rffi.charp2str(ll_source) res = _pypy_execute_source(source) - rffi.aroundstate.leave_callback() + before = rffi.aroundstate.before + if before: before() return rffi.cast(rffi.INT, res) @entrypoint('main', [rffi.CCHARP, lltype.Signed], c_name='pypy_execute_source_ptr') def pypy_execute_source_ptr(ll_source, ll_ptr): - rffi.aroundstate.enter_callback() + after = rffi.aroundstate.after + if after: after() source = rffi.charp2str(ll_source) space.setitem(w_globals, space.wrap('c_argument'), space.wrap(ll_ptr)) res = _pypy_execute_source(source) - rffi.aroundstate.leave_callback() + before = rffi.aroundstate.before + if before: before() return rffi.cast(rffi.INT, res) @entrypoint('main', [], c_name='pypy_init_threads') @@ -143,7 +147,8 @@ if not space.config.objspace.usemodules.thread: return os_thread.setup_threads(space) - rffi.aroundstate.leave_callback() + before = rffi.aroundstate.before + if before: before() @entrypoint('main', [], c_name='pypy_thread_attach') def pypy_thread_attach(): @@ -154,7 +159,8 @@ rthread.gc_thread_start() os_thread.bootstrapper.nbthreads += 1 os_thread.bootstrapper.release() - rffi.aroundstate.leave_callback() + before = rffi.aroundstate.before + if before: before() w_globals = space.newdict() space.setitem(w_globals, space.wrap('__builtins__'), diff --git a/pypy/module/cpyext/pystate.py b/pypy/module/cpyext/pystate.py --- a/pypy/module/cpyext/pystate.py +++ b/pypy/module/cpyext/pystate.py @@ -29,7 +29,8 @@ state = space.fromcache(InterpreterState) tstate = state.swap_thread_state( space, lltype.nullptr(PyThreadState.TO)) - rffi.aroundstate.leave_callback() + if rffi.aroundstate.before: + rffi.aroundstate.before() return tstate @cpython_api([PyThreadState], lltype.Void) @@ -39,7 +40,8 @@ NULL. If the lock has been created, the current thread must not have acquired it, otherwise deadlock ensues. (This function is available even when thread support is disabled at compile time.)""" - rffi.aroundstate.enter_callback() + if rffi.aroundstate.after: + rffi.aroundstate.after() state = space.fromcache(InterpreterState) state.swap_thread_state(space, tstate) @@ -186,7 +188,9 @@ tstate, which should not be NULL. The lock must have been created earlier. If this thread already has the lock, deadlock ensues. This function is not available when thread support is disabled at compile time.""" - rffi.aroundstate.enter_callback() + if rffi.aroundstate.after: + # After external call is before entering Python + rffi.aroundstate.after() @cpython_api([PyThreadState], lltype.Void) def PyEval_ReleaseThread(space, tstate): @@ -196,7 +200,9 @@ that it represents the current thread state --- if it isn't, a fatal error is reported. This function is not available when thread support is disabled at compile time.""" - rffi.aroundstate.leave_callback() + if rffi.aroundstate.before: + # Before external call is after running Python + rffi.aroundstate.before() PyGILState_STATE = rffi.COpaquePtr('PyGILState_STATE', typedef='PyGILState_STATE', @@ -204,12 +210,16 @@ @cpython_api([], PyGILState_STATE, error=CANNOT_FAIL) def PyGILState_Ensure(space): - rffi.aroundstate.enter_callback() + if rffi.aroundstate.after: + # After external call is before entering Python + rffi.aroundstate.after() return 0 @cpython_api([PyGILState_STATE], lltype.Void) def PyGILState_Release(space, state): - rffi.aroundstate.leave_callback() + if rffi.aroundstate.before: + # Before external call is after running Python + rffi.aroundstate.before() @cpython_api([], PyInterpreterState, error=CANNOT_FAIL) def PyInterpreterState_Head(space): @@ -233,12 +243,12 @@ raise NoThreads # PyThreadState_Get will allocate a new execution context, # we need to protect gc and other globals with the GIL. - rffi.aroundstate.enter_callback() + rffi.aroundstate.after() try: rthread.gc_thread_start() return PyThreadState_Get(space) finally: - rffi.aroundstate.leave_callback() + rffi.aroundstate.before() @cpython_api([PyThreadState], lltype.Void) def PyThreadState_Clear(space, tstate): diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -575,22 +575,18 @@ raise TypeError, "hlinvoke is meant to be rtyped and not called direclty" def invoke_around_extcall(before, after): - """Call before() before any external function call, and after() after. - ONLY FOR TESTS! - """ - # NOTE: the hooks are cleared during translation! + # NOTE: the hooks are cleared during translation! To be effective + # in a compiled program they must be set at run-time. from rpython.rtyper.lltypesystem import rffi rffi.aroundstate.before = before rffi.aroundstate.after = after - rffi.aroundstate.enter_callback = after - rffi.aroundstate.leave_callback = before - # force the annotation of before() and after() - from rpython.rlib.nonconst import NonConstant - if NonConstant(0): - if before: - before() - if after: - after() + # the 'aroundstate' contains regular function and not ll pointers to them, + # but let's call llhelper() anyway to force their annotation + from rpython.rtyper.annlowlevel import llhelper + if before is not None: + llhelper(rffi.AroundFnPtr, before) + if after is not None: + llhelper(rffi.AroundFnPtr, after) def is_in_callback(): from rpython.rtyper.lltypesystem import rffi diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -154,7 +154,7 @@ argnames = ', '.join(['a%d' % i for i in range(len(args))]) source = py.code.Source(""" - def call_external_function(%(argnames)s)e + def call_external_function(%(argnames)s): before = aroundstate.before if before: before() # NB. it is essential that no exception checking occurs here! @@ -218,8 +218,13 @@ freeme = arg elif _isfunctype(TARGET) and not _isllptr(arg): # XXX pass additional arguments - arg = llhelper(TARGET, _make_wrapper_for( - TARGET, arg, invoke_around_handlers, callbackholder)) + if invoke_around_handlers: + arg = llhelper(TARGET, _make_wrapper_for(TARGET, arg, + callbackholder, + aroundstate)) + else: + arg = llhelper(TARGET, _make_wrapper_for(TARGET, arg, + callbackholder)) else: SOURCE = lltype.typeOf(arg) if SOURCE != TARGET: @@ -258,8 +263,7 @@ def __init__(self): self.callbacks = {} -def _make_wrapper_for(TP, callable, invoke_around_handlers=True, - callbackholder=None): +def _make_wrapper_for(TP, callable, callbackholder=None, aroundstate=None): """ Function creating wrappers for callbacks. Note that this is cheating as we assume constant callbacks and we just memoize wrappers """ @@ -275,8 +279,10 @@ args = ', '.join(['a%d' % i for i in range(len(TP.TO.ARGS))]) source = py.code.Source(r""" def wrapper(%(args)s): # no *args - no GIL for mallocing the tuple - if invoke_around_handlers: - aroundstate.enter_callback() + if aroundstate is not None: + after = aroundstate.after + if after: + after() # from now on we hold the GIL stackcounter.stacks_counter += 1 llop.gc_stack_bottom(lltype.Void) # marker for trackgcroot.py @@ -291,11 +297,13 @@ traceback.print_exc() result = errorcode stackcounter.stacks_counter -= 1 - if invoke_around_handlers: - aroundstate.leave_callback() + if aroundstate is not None: + before = aroundstate.before + if before: + before() # here we don't hold the GIL any more. As in the wrapper() produced # by llexternal, it is essential that no exception checking occurs - # after the call to leave_callback(). + # after the call to before(). return result """ % locals()) miniglobals = locals().copy() @@ -303,30 +311,16 @@ miniglobals['os'] = os miniglobals['we_are_translated'] = we_are_translated miniglobals['stackcounter'] = stackcounter - miniglobals['aroundstate'] = aroundstate exec source.compile() in miniglobals return miniglobals['wrapper'] _make_wrapper_for._annspecialcase_ = 'specialize:memo' - -def _standard_gil_acquire(): - if we_are_translated(): - from rpython.rlib import rgil - rgil.gil_acquire() - -def _standard_gil_release(): - if we_are_translated(): - from rpython.rlib import rgil - rgil.gil_release() - AroundFnPtr = lltype.Ptr(lltype.FuncType([], lltype.Void)) class AroundState: def _cleanup_(self): - self.before = _standard_gil_release - self.after = _standard_gil_acquire - self.enter_callback = _standard_gil_acquire - self.leave_calback = _standard_gil_release + self.before = None # or a regular RPython function + self.after = None # or a regular RPython function aroundstate = AroundState() aroundstate._cleanup_() diff --git a/rpython/translator/c/src/thread.c b/rpython/translator/c/src/thread.c --- a/rpython/translator/c/src/thread.c +++ b/rpython/translator/c/src/thread.c @@ -9,17 +9,9 @@ #include "common_header.h" #endif - -/* More ugliness follows... */ -#ifdef PYPY_USE_ASMGCC -#include "common_header.h" -#include "structdef.h" -#include "forwarddecl.h" -#endif - - #ifdef _WIN32 #include "src/thread_nt.c" #else #include "src/thread_pthread.c" #endif + From noreply at buildbot.pypy.org Tue Jun 24 15:42:06 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 15:42:06 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: Reduce the diff Message-ID: <20140624134206.F3C361C023B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72190:00acabcd683c Date: 2014-06-24 14:22 +0200 http://bitbucket.org/pypy/pypy/changeset/00acabcd683c/ Log: Reduce the diff diff --git a/rpython/jit/backend/llsupport/test/zrpy_releasegil_test.py b/rpython/jit/backend/llsupport/test/zrpy_releasegil_test.py --- a/rpython/jit/backend/llsupport/test/zrpy_releasegil_test.py +++ b/rpython/jit/backend/llsupport/test/zrpy_releasegil_test.py @@ -27,7 +27,7 @@ glob.event += 1 def before(n, x): - invoke_around_extcall(func, None) + invoke_around_extcall(func, func) return (n, None, None, None, None, None, None, None, None, None, None, None) # diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -575,6 +575,9 @@ raise TypeError, "hlinvoke is meant to be rtyped and not called direclty" def invoke_around_extcall(before, after): + """Call before() before any external function call, and after() after. + At the moment only one pair before()/after() can be registered at a time. + """ # NOTE: the hooks are cleared during translation! To be effective # in a compiled program they must be set at run-time. from rpython.rtyper.lltypesystem import rffi @@ -583,10 +586,8 @@ # the 'aroundstate' contains regular function and not ll pointers to them, # but let's call llhelper() anyway to force their annotation from rpython.rtyper.annlowlevel import llhelper - if before is not None: - llhelper(rffi.AroundFnPtr, before) - if after is not None: - llhelper(rffi.AroundFnPtr, after) + llhelper(rffi.AroundFnPtr, before) + llhelper(rffi.AroundFnPtr, after) def is_in_callback(): from rpython.rtyper.lltypesystem import rffi diff --git a/rpython/translator/c/gcc/trackgcroot.py b/rpython/translator/c/gcc/trackgcroot.py --- a/rpython/translator/c/gcc/trackgcroot.py +++ b/rpython/translator/c/gcc/trackgcroot.py @@ -846,10 +846,6 @@ return [] def _visit_xchg(self, line): - # ignore the special locking xchg put there by custom assembler - # in thread_pthread.c, with an associated comment - if line.endswith('*/\n'): - return [] # only support the format used in VALGRIND_DISCARD_TRANSLATIONS # which is to use a marker no-op "xchgl %ebx, %ebx" match = self.r_binaryinsn.match(line) diff --git a/rpython/translator/c/src/thread_nt.c b/rpython/translator/c/src/thread_nt.c --- a/rpython/translator/c/src/thread_nt.c +++ b/rpython/translator/c/src/thread_nt.c @@ -244,7 +244,4 @@ InterlockedDecrement(&pending_acquires); } -#ifdef RPY_FASTGIL # error "XXX implement me" -InterlockedExchangePointer -#endif From noreply at buildbot.pypy.org Tue Jun 24 15:42:08 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 15:42:08 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: in-progress Message-ID: <20140624134208.3F9BA1C023B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72191:809cc4f15a0b Date: 2014-06-24 14:43 +0200 http://bitbucket.org/pypy/pypy/changeset/809cc4f15a0b/ Log: in-progress diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py --- a/rpython/jit/backend/x86/rx86.py +++ b/rpython/jit/backend/x86/rx86.py @@ -621,9 +621,9 @@ # http://lists.gnu.org/archive/html/bug-binutils/2007-07/msg00095.html MOVD_rx = xmminsn('\x66', rex_w, '\x0F\x7E', register(2, 8), register(1), '\xC0') MOVD_xr = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), register(2), '\xC0') - MOVD_xb = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), stack_bp(2)) - MOVD_sx = xmminsn('\x66', rex_w, '\x0F\x7E', register(2, 8), stack_sp(1)) - MOVD_xs = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), stack_sp(2)) + MOVQ_xb = xmminsn('\xF3', rex_nw, '\x0F\x7E', register(1, 8), stack_bp(2)) + MOVQ_xs = xmminsn('\xF3', rex_nw, '\x0F\x7E', register(1, 8), stack_sp(2)) + MOVQ_sx = xmminsn('\x66', rex_nw, '\x0F\xD6', register(2, 8), stack_sp(1)) PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b')) diff --git a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py --- a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py +++ b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py @@ -191,7 +191,8 @@ for args in args_lists: suffix = "" if (argmodes and not self.is_xmm_insn - and not instrname.startswith('FSTP')): + and not instrname.startswith('FSTP') + and not instrname.startswith('FLD')): suffix = suffixes[self.WORD] # Special case: On 64-bit CPUs, rx86 assumes 64-bit integer # operands when converting to/from floating point, so we need to From noreply at buildbot.pypy.org Tue Jun 24 15:42:23 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 15:42:23 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: hg merge default Message-ID: <20140624134223.302601C023B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72192:f3b9c99f1aae Date: 2014-06-24 14:46 +0200 http://bitbucket.org/pypy/pypy/changeset/f3b9c99f1aae/ Log: hg merge default diff too long, truncating to 2000 out of 76696 lines diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -64,6 +64,7 @@ ^pypy/goal/pypy-jvm.jar ^pypy/goal/.+\.exe$ ^pypy/goal/.+\.dll$ +^pypy/goal/.+\.lib$ ^pypy/_cache$ ^pypy/doc/statistic/.+\.html$ ^pypy/doc/statistic/.+\.eps$ diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -6,3 +6,11 @@ 9b623bc48b5950cf07184462a0e48f2c4df0d720 pypy-2.1-beta1-arm 9b623bc48b5950cf07184462a0e48f2c4df0d720 pypy-2.1-beta1-arm ab0dd631c22015ed88e583d9fdd4c43eebf0be21 pypy-2.1-beta1-arm +20e51c4389ed4469b66bb9d6289ce0ecfc82c4b9 release-2.3.0 +20e51c4389ed4469b66bb9d6289ce0ecfc82c4b9 release-2.3.0 +0000000000000000000000000000000000000000 release-2.3.0 +394146e9bb673514c61f0150ab2013ccf78e8de7 release-2.3 +32f35069a16d819b58c1b6efb17c44e3e53397b2 release-2.2=3.1 +32f35069a16d819b58c1b6efb17c44e3e53397b2 release-2.3.1 +32f35069a16d819b58c1b6efb17c44e3e53397b2 release-2.2=3.1 +0000000000000000000000000000000000000000 release-2.2=3.1 diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -44,31 +44,33 @@ Alex Gaynor Michael Hudson David Schneider + Matti Picus + Brian Kearns + Philip Jenvey Holger Krekel Christian Tismer Hakan Ardo Benjamin Peterson - Matti Picus - Philip Jenvey + Manuel Jacob Anders Chrigstrom - Brian Kearns Eric van Riet Paap + Wim Lavrijsen + Ronan Lamy Richard Emslie Alexander Schremmer - Wim Lavrijsen Dan Villiom Podlaski Christiansen - Manuel Jacob Lukas Diekmann Sven Hager Anders Lehmann Aurelien Campeas Niklaus Haldimann - Ronan Lamy Camillo Bruni Laura Creighton Toon Verwaest + Remi Meier Leonardo Santagada Seo Sanghyeon + Romain Guillebert Justin Peel Ronny Pfannschmidt David Edelsohn @@ -80,52 +82,62 @@ Daniel Roberts Niko Matsakis Adrien Di Mascio + Alexander Hesse Ludovic Aubry - Alexander Hesse Jacob Hallen - Romain Guillebert Jason Creighton Alex Martelli Michal Bendowski Jan de Mooij + stian Michael Foord Stephan Diehl Stefan Schwarzer Valentino Volonghi Tomek Meka Patrick Maupin - stian Bob Ippolito Bruno Gola Jean-Paul Calderone Timo Paulssen + Squeaky Alexandre Fayolle Simon Burton Marius Gedminas John Witulski + Konstantin Lopuhin Greg Price Dario Bertini Mark Pearse Simon Cross - Konstantin Lopuhin Andreas Stührk Jean-Philippe St. Pierre Guido van Rossum Pavel Vinogradov + Paweł Piotr Przeradowski Paul deGrandis Ilya Osadchiy + Tobias Oberstein Adrian Kuhn Boris Feigin + Stefano Rivera tav + Taavi Burns Georg Brandl Bert Freudenberg Stian Andreassen - Stefano Rivera + Laurence Tratt Wanja Saatkamp + Ivan Sichmann Freitas Gerald Klix Mike Blume - Taavi Burns Oscar Nierstrasz + Stefan H. Muller + Jeremy Thurgood + Gregor Wegberg + Rami Chowdhury + Tobias Pape + Edd Barrett David Malcolm Eugene Oden Henry Mason @@ -135,18 +147,16 @@ Dusty Phillips Lukas Renggli Guenter Jantzen - Tobias Oberstein - Remi Meier Ned Batchelder Amit Regmi Ben Young Nicolas Chauvat Andrew Durdin + Andrew Chambers Michael Schneider Nicholas Riley Jason Chu Igor Trindade Oliveira - Jeremy Thurgood Rocco Moretti Gintautas Miliauskas Michael Twomey @@ -159,18 +169,19 @@ Karl Bartel Brian Dorsey Victor Stinner + Andrews Medina Stuart Williams Jasper Schulz + Christian Hudon Toby Watson Antoine Pitrou Aaron Iles Michael Cheng Justas Sadzevicius + Mikael Schönenberg Gasper Zejn Neil Shepperd - Mikael Schönenberg Elmo Mäntynen - Tobias Pape Jonathan David Riehl Stanislaw Halik Anders Qvist @@ -182,19 +193,18 @@ Alexander Sedov Corbin Simpson Christopher Pope - Laurence Tratt - Guillebert Romain + wenzhuman Christian Tismer + Marc Abramowitz Dan Stromberg Stefano Parmesan - Christian Hudon Alexis Daboville Jens-Uwe Mager Carl Meyer Karl Ramm Pieter Zieschang Gabriel - Paweł Piotr Przeradowski + Lukas Vacek Andrew Dalke Sylvain Thenault Nathan Taylor @@ -203,8 +213,11 @@ Alejandro J. Cura Jacob Oscarson Travis Francis Athougies + Ryan Gonzalez Kristjan Valur Jonsson + Sebastian Pawluś Neil Blakey-Milner + anatoly techtonik Lutz Paelike Lucio Torre Lars Wassermann @@ -218,13 +231,14 @@ Martin Blais Lene Wagner Tomo Cocoa - Andrews Medina roberto at goyle + Yury V. Zaytsev + Anna Katrina Dominguez William Leslie Bobby Impollonia timo at eistee.fritz.box Andrew Thompson - Yusei Tahara + Ben Darnell Roberto De Ioris Juan Francisco Cantero Hurtado Godefroid Chappelle @@ -234,28 +248,39 @@ Michael Hudson-Doyle Anders Sigfridsson Yasir Suhail + rafalgalczynski at gmail.com Floris Bruynooghe + Laurens Van Houtven Akira Li Gustavo Niemeyer Stephan Busemann - Anna Katrina Dominguez + Rafał Gałczyński + Yusei Tahara Christian Muirhead James Lan shoma hosaka - Daniel Neuhäuser + Daniel Neuh?user + Matthew Miller Buck Golemon Konrad Delong Dinu Gherman Chris Lambacher coolbutuseless at gmail.com + Rodrigo Araújo + w31rd0 Jim Baker - Rodrigo Araújo + James Robert Armin Ronacher Brett Cannon yrttyr + aliceinwire + OlivierBlanvillain Zooko Wilcox-O Hearn Tomer Chachamu Christopher Groskopf + Asmo Soinio + Stefan Marr + jiaaro opassembler.py Antony Lee Jim Hunziker @@ -263,12 +288,13 @@ Even Wiik Thomassen jbs soareschen + Kurt Griffiths + Mike Bayer Flavio Percoco Kristoffer Kleine yasirs Michael Chermside Anna Ravencroft - Andrew Chambers Julien Phalip Dan Loewenherz diff --git a/_pytest/resultlog.py b/_pytest/resultlog.py --- a/_pytest/resultlog.py +++ b/_pytest/resultlog.py @@ -51,16 +51,25 @@ self.config = config self.logfile = logfile # preferably line buffered - def write_log_entry(self, testpath, lettercode, longrepr): + def write_log_entry(self, testpath, lettercode, longrepr, sections=[]): py.builtin.print_("%s %s" % (lettercode, testpath), file=self.logfile) for line in longrepr.splitlines(): py.builtin.print_(" %s" % line, file=self.logfile) + for key, text in sections: + # py.io.StdCaptureFD may send in unicode + if isinstance(text, unicode): + text = text.encode('utf-8') + py.builtin.print_(" ", file=self.logfile) + py.builtin.print_(" -------------------- %s --------------------" + % key.rstrip(), file=self.logfile) + py.builtin.print_(" %s" % (text.rstrip().replace('\n', '\n '),), + file=self.logfile) def log_outcome(self, report, lettercode, longrepr): testpath = getattr(report, 'nodeid', None) if testpath is None: testpath = report.fspath - self.write_log_entry(testpath, lettercode, longrepr) + self.write_log_entry(testpath, lettercode, longrepr, report.sections) def pytest_runtest_logreport(self, report): if report.when != "call" and report.passed: diff --git a/dotviewer/graphserver.py b/dotviewer/graphserver.py --- a/dotviewer/graphserver.py +++ b/dotviewer/graphserver.py @@ -160,15 +160,14 @@ " | instructions in dotviewer/sshgraphserver.py\n") try: import pygame - except ImportError: + if isinstance(e, pygame.error): + print >> f, help + except Exception, e: f.seek(0) f.truncate() - print >> f, "ImportError" + print >> f, "%s: %s" % (e.__class__.__name__, e) print >> f, " | Pygame is not installed; either install it, or" print >> f, help - else: - if isinstance(e, pygame.error): - print >> f, help io.sendmsg(msgstruct.MSG_ERROR, f.getvalue()) else: listen_server(sys.argv[1]) diff --git a/lib-python/2.7/cProfile.py b/lib-python/2.7/cProfile.py --- a/lib-python/2.7/cProfile.py +++ b/lib-python/2.7/cProfile.py @@ -161,7 +161,7 @@ # ____________________________________________________________ def main(): - import os, sys + import os, sys, types from optparse import OptionParser usage = "cProfile.py [-o output_file_path] [-s sort] scriptfile [arg] ..." parser = OptionParser(usage=usage) @@ -184,12 +184,10 @@ sys.path.insert(0, os.path.dirname(progname)) with open(progname, 'rb') as fp: code = compile(fp.read(), progname, 'exec') - globs = { - '__file__': progname, - '__name__': '__main__', - '__package__': None, - } - runctx(code, globs, None, options.outfile, options.sort) + mainmod = types.ModuleType('__main__') + mainmod.__file__ = progname + mainmod.__package__ = None + runctx(code, mainmod.__dict__, None, options.outfile, options.sort) else: parser.print_usage() return parser diff --git a/lib-python/2.7/ctypes/util.py b/lib-python/2.7/ctypes/util.py --- a/lib-python/2.7/ctypes/util.py +++ b/lib-python/2.7/ctypes/util.py @@ -86,9 +86,10 @@ elif os.name == "posix": # Andreas Degert's find functions, using gcc, /sbin/ldconfig, objdump - import re, tempfile, errno + import re, errno def _findLib_gcc(name): + import tempfile expr = r'[^\(\)\s]*lib%s\.[^\(\)\s]*' % re.escape(name) fdout, ccout = tempfile.mkstemp() os.close(fdout) diff --git a/lib-python/2.7/imputil.py b/lib-python/2.7/imputil.py --- a/lib-python/2.7/imputil.py +++ b/lib-python/2.7/imputil.py @@ -422,7 +422,8 @@ saved back to the filesystem for future imports. The source file's modification timestamp must be provided as a Long value. """ - codestring = open(pathname, 'rU').read() + with open(pathname, 'rU') as fp: + codestring = fp.read() if codestring and codestring[-1] != '\n': codestring = codestring + '\n' code = __builtin__.compile(codestring, pathname, 'exec') @@ -603,8 +604,8 @@ self.desc = desc def import_file(self, filename, finfo, fqname): - fp = open(filename, self.desc[1]) - module = imp.load_module(fqname, fp, filename, self.desc) + with open(filename, self.desc[1]) as fp: + module = imp.load_module(fqname, fp, filename, self.desc) module.__file__ = filename return 0, module, { } diff --git a/lib-python/2.7/modulefinder.py b/lib-python/2.7/modulefinder.py --- a/lib-python/2.7/modulefinder.py +++ b/lib-python/2.7/modulefinder.py @@ -109,16 +109,16 @@ def run_script(self, pathname): self.msg(2, "run_script", pathname) - fp = open(pathname, READ_MODE) - stuff = ("", "r", imp.PY_SOURCE) - self.load_module('__main__', fp, pathname, stuff) + with open(pathname, READ_MODE) as fp: + stuff = ("", "r", imp.PY_SOURCE) + self.load_module('__main__', fp, pathname, stuff) def load_file(self, pathname): dir, name = os.path.split(pathname) name, ext = os.path.splitext(name) - fp = open(pathname, READ_MODE) - stuff = (ext, "r", imp.PY_SOURCE) - self.load_module(name, fp, pathname, stuff) + with open(pathname, READ_MODE) as fp: + stuff = (ext, "r", imp.PY_SOURCE) + self.load_module(name, fp, pathname, stuff) def import_hook(self, name, caller=None, fromlist=None, level=-1): self.msg(3, "import_hook", name, caller, fromlist, level) @@ -461,6 +461,8 @@ fp, buf, stuff = self.find_module("__init__", m.__path__) self.load_module(fqname, fp, buf, stuff) self.msgout(2, "load_package ->", m) + if fp: + fp.close() return m def add_module(self, fqname): diff --git a/lib-python/2.7/test/test_argparse.py b/lib-python/2.7/test/test_argparse.py --- a/lib-python/2.7/test/test_argparse.py +++ b/lib-python/2.7/test/test_argparse.py @@ -10,6 +10,7 @@ import tempfile import unittest import argparse +import gc from StringIO import StringIO @@ -47,6 +48,12 @@ def tearDown(self): os.chdir(self.old_dir) + import gc + # Force a collection which should close FileType() options + gc.collect() + for root, dirs, files in os.walk(self.temp_dir, topdown=False): + for name in files: + os.chmod(os.path.join(self.temp_dir, name), stat.S_IWRITE) shutil.rmtree(self.temp_dir, True) def create_readonly_file(self, filename): diff --git a/lib-python/2.7/test/test_array.py b/lib-python/2.7/test/test_array.py --- a/lib-python/2.7/test/test_array.py +++ b/lib-python/2.7/test/test_array.py @@ -298,6 +298,7 @@ b = array.array(self.badtypecode()) with self.assertRaises(TypeError): a + b + with self.assertRaises(TypeError): a + 'bad' @@ -320,6 +321,7 @@ b = array.array(self.badtypecode()) with self.assertRaises(TypeError): a += b + with self.assertRaises(TypeError): a += 'bad' diff --git a/lib-python/2.7/test/test_builtin.py b/lib-python/2.7/test/test_builtin.py --- a/lib-python/2.7/test/test_builtin.py +++ b/lib-python/2.7/test/test_builtin.py @@ -250,14 +250,12 @@ self.assertRaises(TypeError, compile) self.assertRaises(ValueError, compile, 'print 42\n', '', 'badmode') self.assertRaises(ValueError, compile, 'print 42\n', '', 'single', 0xff) - if check_impl_detail(cpython=True): - self.assertRaises(TypeError, compile, chr(0), 'f', 'exec') + self.assertRaises(TypeError, compile, chr(0), 'f', 'exec') self.assertRaises(TypeError, compile, 'pass', '?', 'exec', mode='eval', source='0', filename='tmp') if have_unicode: compile(unicode('print u"\xc3\xa5"\n', 'utf8'), '', 'exec') - if check_impl_detail(cpython=True): - self.assertRaises(TypeError, compile, unichr(0), 'f', 'exec') + self.assertRaises(TypeError, compile, unichr(0), 'f', 'exec') self.assertRaises(ValueError, compile, unicode('a = 1'), 'f', 'bad') diff --git a/lib-python/2.7/test/test_file.py b/lib-python/2.7/test/test_file.py --- a/lib-python/2.7/test/test_file.py +++ b/lib-python/2.7/test/test_file.py @@ -301,6 +301,7 @@ self.fail("readlines() after next() with empty buffer " "failed. Got %r, expected %r" % (line, testline)) # Reading after iteration hit EOF shouldn't hurt either + f.close() f = self.open(TESTFN, 'rb') try: for line in f: diff --git a/lib-python/2.7/test/test_file2k.py b/lib-python/2.7/test/test_file2k.py --- a/lib-python/2.7/test/test_file2k.py +++ b/lib-python/2.7/test/test_file2k.py @@ -162,6 +162,7 @@ # Remark: Do not perform more than one test per open file, # since that does NOT catch the readline error on Windows. data = 'xxx' + self.f.close() for mode in ['w', 'wb', 'a', 'ab']: for attr in ['read', 'readline', 'readlines']: self.f = open(TESTFN, mode) @@ -478,11 +479,10 @@ def _create_file(self): if self.use_buffering: - f = open(self.filename, "w+", buffering=1024*16) + self.f = open(self.filename, "w+", buffering=1024*16) else: - f = open(self.filename, "w+") - self.f = f - self.all_files.append(f) + self.f = open(self.filename, "w+") + self.all_files.append(self.f) oldf = self.all_files.pop(0) if oldf is not None: oldf.close() diff --git a/lib-python/2.7/test/test_gdbm.py b/lib-python/2.7/test/test_gdbm.py --- a/lib-python/2.7/test/test_gdbm.py +++ b/lib-python/2.7/test/test_gdbm.py @@ -74,6 +74,29 @@ size2 = os.path.getsize(filename) self.assertTrue(size1 > size2 >= size0) + def test_sync(self): + # check if sync works at all, not sure how to check it + self.g = gdbm.open(filename, 'cf') + self.g['x'] = 'x' * 10000 + self.g.sync() + + def test_get_key(self): + self.g = gdbm.open(filename, 'cf') + self.g['x'] = 'x' * 10000 + self.g.close() + self.g = gdbm.open(filename, 'r') + self.assertEquals(self.g['x'], 'x' * 10000) + + def test_key_with_null_bytes(self): + key = 'a\x00b' + value = 'c\x00d' + self.g = gdbm.open(filename, 'cf') + self.g[key] = value + self.g.close() + self.g = gdbm.open(filename, 'r') + self.assertEquals(self.g[key], value) + self.assertTrue(key in self.g) + self.assertTrue(self.g.has_key(key)) def test_main(): run_unittest(TestGdbm) diff --git a/lib-python/2.7/test/test_genericpath.py b/lib-python/2.7/test/test_genericpath.py --- a/lib-python/2.7/test/test_genericpath.py +++ b/lib-python/2.7/test/test_genericpath.py @@ -231,9 +231,14 @@ unicwd = u'\xe7w\xf0' try: fsencoding = test_support.TESTFN_ENCODING or "ascii" - unicwd.encode(fsencoding) + asciival = unicwd.encode(fsencoding) + if fsencoding == "mbcs": + # http://bugs.python.org/issue850997 + v = asciival.find('?') + if v >= 0: + raise UnicodeEncodeError(fsencoding, unicwd, v, v, asciival) except (AttributeError, UnicodeEncodeError): - # FS encoding is probably ASCII + # FS encoding is probably ASCII or windows and codepage is non-Latin1 pass else: with test_support.temp_cwd(unicwd): diff --git a/lib-python/2.7/test/test_httpservers.py b/lib-python/2.7/test/test_httpservers.py --- a/lib-python/2.7/test/test_httpservers.py +++ b/lib-python/2.7/test/test_httpservers.py @@ -335,6 +335,7 @@ response = self.request(self.tempdir_name + '/') self.check_status_and_reason(response, 404) os.chmod(self.tempdir, 0755) + f.close() def test_head(self): response = self.request( diff --git a/lib-python/2.7/test/test_itertools.py b/lib-python/2.7/test/test_itertools.py --- a/lib-python/2.7/test/test_itertools.py +++ b/lib-python/2.7/test/test_itertools.py @@ -139,7 +139,6 @@ @test_support.impl_detail("tuple reuse is specific to CPython") def test_combinations_tuple_reuse(self): - # Test implementation detail: tuple re-use self.assertEqual(len(set(map(id, combinations('abcde', 3)))), 1) self.assertNotEqual(len(set(map(id, list(combinations('abcde', 3))))), 1) @@ -211,7 +210,6 @@ @test_support.impl_detail("tuple reuse is specific to CPython") def test_combinations_with_replacement_tuple_reuse(self): - # Test implementation detail: tuple re-use cwr = combinations_with_replacement self.assertEqual(len(set(map(id, cwr('abcde', 3)))), 1) self.assertNotEqual(len(set(map(id, list(cwr('abcde', 3))))), 1) @@ -278,7 +276,6 @@ @test_support.impl_detail("tuple reuse is specific to CPython") def test_permutations_tuple_reuse(self): - # Test implementation detail: tuple re-use self.assertEqual(len(set(map(id, permutations('abcde', 3)))), 1) self.assertNotEqual(len(set(map(id, list(permutations('abcde', 3))))), 1) diff --git a/lib-python/2.7/test/test_memoryview.py b/lib-python/2.7/test/test_memoryview.py --- a/lib-python/2.7/test/test_memoryview.py +++ b/lib-python/2.7/test/test_memoryview.py @@ -115,8 +115,8 @@ self.assertRaises(TypeError, setitem, (0,), b"a") self.assertRaises(TypeError, setitem, "a", b"a") # Trying to resize the memory object - self.assertRaises((ValueError, TypeError), setitem, 0, b"") - self.assertRaises((ValueError, TypeError), setitem, 0, b"ab") + self.assertRaises(ValueError, setitem, 0, b"") + self.assertRaises(ValueError, setitem, 0, b"ab") self.assertRaises(ValueError, setitem, slice(1,1), b"a") self.assertRaises(ValueError, setitem, slice(0,2), b"a") @@ -166,18 +166,11 @@ self.assertTrue(m[0:6] == m[:]) self.assertFalse(m[0:5] == m) - if test_support.check_impl_detail(cpython=True): - # what is supported and what is not supported by memoryview is - # very inconsisten on CPython. In PyPy, memoryview supports - # the buffer interface, and thus the following comparison - # succeeds. See also the comment in - # pypy.modules.__builtin__.interp_memoryview.W_MemoryView.descr_buffer - # - # Comparison with objects which don't support the buffer API - self.assertFalse(m == u"abcdef", "%s %s" % (self, tp)) - self.assertTrue(m != u"abcdef") - self.assertFalse(u"abcdef" == m) - self.assertTrue(u"abcdef" != m) + # Comparison with objects which don't support the buffer API + self.assertFalse(m == u"abcdef") + self.assertTrue(m != u"abcdef") + self.assertFalse(u"abcdef" == m) + self.assertTrue(u"abcdef" != m) # Unordered comparisons are unimplemented, and therefore give # arbitrary results (they raise a TypeError in py3k) diff --git a/lib-python/2.7/test/test_zipfile.py b/lib-python/2.7/test/test_zipfile.py --- a/lib-python/2.7/test/test_zipfile.py +++ b/lib-python/2.7/test/test_zipfile.py @@ -421,7 +421,7 @@ zipfp.extractall() for fpath, fdata in SMALL_TEST_DATA: outfile = os.path.join(os.getcwd(), fpath) - + with open(outfile, "rb") as fid: self.assertEqual(fdata, fid.read()) os.remove(outfile) @@ -596,8 +596,6 @@ def tearDown(self): unlink(TESTFN) unlink(TESTFN2) - if os.path.exists(TESTFN): - os.remove(TESTFN) class TestZip64InSmallFiles(unittest.TestCase): @@ -717,12 +715,6 @@ class PyZipFileTests(unittest.TestCase): - def teardown(self): - if os.path.exists(TESTFN): - os.remove(TESTFN) - if os.path.exists(TESTFN2): - os.remove(TESTFN2) - def test_write_pyfile(self): with zipfile.PyZipFile(TemporaryFile(), "w") as zipfp: fn = __file__ @@ -787,8 +779,6 @@ rmtree(TESTFN2) def test_write_non_pyfile(self): - if os.path.exists(TESTFN): - os.remove(TESTFN) with zipfile.PyZipFile(TemporaryFile(), "w") as zipfp: with open(TESTFN, 'w') as fid: fid.write('most definitely not a python file') @@ -1168,8 +1158,6 @@ def tearDown(self): unlink(TESTFN) unlink(TESTFN2) - if os.path.exists(TESTFN): - os.remove(TESTFN) class DecryptionTests(unittest.TestCase): @@ -1220,28 +1208,16 @@ def test_bad_password(self): self.zip.setpassword("perl") - try: - self.assertRaises(RuntimeError, self.zip.read, "test.txt") - finally: - self.zip.close() + self.assertRaises(RuntimeError, self.zip.read, "test.txt") self.zip2.setpassword("perl") - try: - self.assertRaises(RuntimeError, self.zip2.read, "zero") - finally: - self.zip2.close() + self.assertRaises(RuntimeError, self.zip2.read, "zero") @skipUnless(zlib, "requires zlib") def test_good_password(self): self.zip.setpassword("python") - try: - self.assertEqual(self.zip.read("test.txt"), self.plain) - finally: - self.zip.close() + self.assertEqual(self.zip.read("test.txt"), self.plain) self.zip2.setpassword("12345") - try: - self.assertEqual(self.zip2.read("zero"), self.plain2) - finally: - self.zip2.close() + self.assertEqual(self.zip2.read("zero"), self.plain2) class TestsWithRandomBinaryFiles(unittest.TestCase): @@ -1255,10 +1231,8 @@ fp.write(self.data) def tearDown(self): - if os.path.exists(TESTFN): - os.remove(TESTFN) - if os.path.exists(TESTFN2): - os.remove(TESTFN2) + unlink(TESTFN) + unlink(TESTFN2) def make_test_archive(self, f, compression): # Create the ZIP archive @@ -1401,8 +1375,8 @@ zipf.read('ones') with zipf.open('ones') as zopen1: pass - for x in range(10): - self.assertLess(open('/dev/null').fileno(), 100) + with open(os.devnull) as f: + self.assertLess(f.fileno(), 100) def tearDown(self): unlink(TESTFN2) @@ -1433,7 +1407,7 @@ def tearDown(self): rmtree(TESTFN2) if os.path.exists(TESTFN): - os.remove(TESTFN) + unlink(TESTFN) class UniversalNewlineTests(unittest.TestCase): diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py --- a/lib_pypy/_ctypes/function.py +++ b/lib_pypy/_ctypes/function.py @@ -219,6 +219,8 @@ if restype is None: import ctypes restype = ctypes.c_int + if self._argtypes_ is None: + self._argtypes_ = [] self._ptr = self._getfuncptr_fromaddress(self._argtypes_, restype) self._check_argtypes_for_fastpath() return diff --git a/lib_pypy/_ctypes_test.py b/lib_pypy/_ctypes_test.py --- a/lib_pypy/_ctypes_test.py +++ b/lib_pypy/_ctypes_test.py @@ -1,3 +1,6 @@ +import imp +import os + try: import cpyext except ImportError: @@ -10,4 +13,13 @@ pass # obscure condition of _ctypes_test.py being imported by py.test else: import _pypy_testcapi - _pypy_testcapi.compile_shared('_ctypes_test.c', '_ctypes_test') + cfile = '_ctypes_test.c' + thisdir = os.path.dirname(__file__) + output_dir = _pypy_testcapi.get_hashed_dir(os.path.join(thisdir, cfile)) + try: + fp, filename, description = imp.find_module('_ctypes_test', path=[output_dir]) + with fp: + imp.load_module('_ctypes_test', fp, filename, description) + except ImportError: + print('could not find _ctypes_test in %s' % output_dir) + _pypy_testcapi.compile_shared('_ctypes_test.c', '_ctypes_test', output_dir) diff --git a/lib_pypy/_pypy_interact.py b/lib_pypy/_pypy_interact.py --- a/lib_pypy/_pypy_interact.py +++ b/lib_pypy/_pypy_interact.py @@ -3,6 +3,8 @@ import sys import os +irc_header = "And now for something completely different" + def interactive_console(mainmodule=None, quiet=False): # set sys.{ps1,ps2} just before invoking the interactive interpreter. This @@ -15,8 +17,7 @@ if not quiet: try: from _pypy_irc_topic import some_topic - text = "And now for something completely different: ``%s''" % ( - some_topic(),) + text = "%s: ``%s''" % ( irc_header, some_topic()) while len(text) >= 80: i = text[:80].rfind(' ') print(text[:i]) diff --git a/lib_pypy/_pypy_testcapi.py b/lib_pypy/_pypy_testcapi.py --- a/lib_pypy/_pypy_testcapi.py +++ b/lib_pypy/_pypy_testcapi.py @@ -1,5 +1,31 @@ import os, sys, imp -import tempfile +import tempfile, binascii + + +def get_hashed_dir(cfile): + with open(cfile,'r') as fid: + content = fid.read() + # from cffi's Verifier() + key = '\x00'.join([sys.version[:3], content]) + if sys.version_info >= (3,): + key = key.encode('utf-8') + k1 = hex(binascii.crc32(key[0::2]) & 0xffffffff) + k1 = k1.lstrip('0x').rstrip('L') + k2 = hex(binascii.crc32(key[1::2]) & 0xffffffff) + k2 = k2.lstrip('0').rstrip('L') + try: + username = os.environ['USER'] #linux, et al + except KeyError: + try: + username = os.environ['USERNAME'] #windows + except KeyError: + username = os.getuid() + output_dir = tempfile.gettempdir() + os.path.sep + 'tmp_%s_%s%s' % ( + username, k1, k2) + if not os.path.exists(output_dir): + os.mkdir(output_dir) + return output_dir + def _get_c_extension_suffix(): for ext, mod, typ in imp.get_suffixes(): @@ -7,12 +33,13 @@ return ext -def compile_shared(csource, modulename): +def compile_shared(csource, modulename, output_dir=None): """Compile '_testcapi.c' or '_ctypes_test.c' into an extension module, and import it. """ thisdir = os.path.dirname(__file__) - output_dir = tempfile.mkdtemp() + if output_dir is None: + output_dir = tempfile.mkdtemp() from distutils.ccompiler import new_compiler diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py --- a/lib_pypy/_sqlite3.py +++ b/lib_pypy/_sqlite3.py @@ -38,6 +38,7 @@ if sys.version_info[0] >= 3: StandardError = Exception + cmp = lambda x, y: (x > y) - (x < y) long = int xrange = range basestring = unicode = str diff --git a/lib_pypy/_testcapi.py b/lib_pypy/_testcapi.py --- a/lib_pypy/_testcapi.py +++ b/lib_pypy/_testcapi.py @@ -1,7 +1,19 @@ +import imp +import os + try: import cpyext except ImportError: raise ImportError("No module named '_testcapi'") -else: - import _pypy_testcapi - _pypy_testcapi.compile_shared('_testcapimodule.c', '_testcapi') + +import _pypy_testcapi +cfile = '_testcapimodule.c' +thisdir = os.path.dirname(__file__) +output_dir = _pypy_testcapi.get_hashed_dir(os.path.join(thisdir, cfile)) + +try: + fp, filename, description = imp.find_module('_testcapi', path=[output_dir]) + with fp: + imp.load_module('_testcapi', fp, filename, description) +except ImportError: + _pypy_testcapi.compile_shared(cfile, '_testcapi', output_dir) diff --git a/lib_pypy/_tkinter/license.terms b/lib_pypy/_tkinter/license.terms new file mode 100644 --- /dev/null +++ b/lib_pypy/_tkinter/license.terms @@ -0,0 +1,39 @@ +This software is copyrighted by the Regents of the University of +California, Sun Microsystems, Inc., and other parties. The following +terms apply to all files associated with the software unless explicitly +disclaimed in individual files. + +The authors hereby grant permission to use, copy, modify, distribute, +and license this software and its documentation for any purpose, provided +that existing copyright notices are retained in all copies and that this +notice is included verbatim in any distributions. No written agreement, +license, or royalty fee is required for any of the authorized uses. +Modifications to this software may be copyrighted by their authors +and need not follow the licensing terms described here, provided that +the new terms are clearly indicated on the first page of each file where +they apply. + +IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY +FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY +DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE +IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE +NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR +MODIFICATIONS. + +GOVERNMENT USE: If you are acquiring this software on behalf of the +U.S. government, the Government shall have only "Restricted Rights" +in the software and related documentation as defined in the Federal +Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you +are acquiring the software on behalf of the Department of Defense, the +software shall be classified as "Commercial Computer Software" and the +Government shall have only "Restricted Rights" as defined in Clause +252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the +authors grant the U.S. Government and others acting in its behalf +permission to use and distribute the software in accordance with the +terms specified in this license. diff --git a/lib_pypy/_tkinter/tklib.py b/lib_pypy/_tkinter/tklib.py --- a/lib_pypy/_tkinter/tklib.py +++ b/lib_pypy/_tkinter/tklib.py @@ -121,6 +121,10 @@ incdirs = [] linklibs = ['tcl85', 'tk85'] libdirs = [] +elif sys.platform == 'darwin': + incdirs = ['/System/Library/Frameworks/Tk.framework/Versions/Current/Headers/'] + linklibs = ['tcl', 'tk'] + libdirs = [] else: incdirs=['/usr/include/tcl'] linklibs=['tcl', 'tk'] diff --git a/lib_pypy/audioop.py b/lib_pypy/audioop.py --- a/lib_pypy/audioop.py +++ b/lib_pypy/audioop.py @@ -350,7 +350,7 @@ r_sample = getsample(cp, size, i + 1) sample = (l_sample * fac1) + (r_sample * fac2) - sample = clip(sample) + sample = int(clip(sample)) _put_sample(result, size, i // 2, sample) @@ -501,7 +501,7 @@ # slice off extra bytes trim_index = (out_i * bytes_per_frame) - len(retval) - retval = _buffer(retval)[:trim_index] + retval = retval[:trim_index] return (retval, (d, tuple(samps))) diff --git a/lib_pypy/ctypes_support.py b/lib_pypy/ctypes_support.py --- a/lib_pypy/ctypes_support.py +++ b/lib_pypy/ctypes_support.py @@ -1,4 +1,3 @@ - """ This file provides some support for things like standard_c_lib and errno access, as portable as possible """ @@ -22,7 +21,7 @@ standard_c_lib._errno.argtypes = None def _where_is_errno(): return standard_c_lib._errno() - + elif sys.platform in ('linux2', 'freebsd6'): standard_c_lib.__errno_location.restype = ctypes.POINTER(ctypes.c_int) standard_c_lib.__errno_location.argtypes = None @@ -42,5 +41,3 @@ def set_errno(value): errno_p = _where_is_errno() errno_p.contents.value = value - - diff --git a/lib_pypy/datetime.py b/lib_pypy/datetime.py --- a/lib_pypy/datetime.py +++ b/lib_pypy/datetime.py @@ -431,7 +431,7 @@ Representation: (days, seconds, microseconds). Why? Because I felt like it. """ - __slots__ = '_days', '_seconds', '_microseconds' + __slots__ = '_days', '_seconds', '_microseconds', '_hashcode' def __new__(cls, days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=0, weeks=0): @@ -525,14 +525,14 @@ assert isinstance(s, int) and 0 <= s < 24*3600 assert isinstance(us, int) and 0 <= us < 1000000 + if abs(d) > 999999999: + raise OverflowError("timedelta # of days is too large: %d" % d) + self = object.__new__(cls) - self._days = d self._seconds = s self._microseconds = us - if abs(d) > 999999999: - raise OverflowError("timedelta # of days is too large: %d" % d) - + self._hashcode = -1 return self def __repr__(self): @@ -687,7 +687,9 @@ return _cmp(self._getstate(), other._getstate()) def __hash__(self): - return hash(self._getstate()) + if self._hashcode == -1: + self._hashcode = hash(self._getstate()) + return self._hashcode def __nonzero__(self): return (self._days != 0 or @@ -735,7 +737,7 @@ Properties (readonly): year, month, day """ - __slots__ = '_year', '_month', '_day' + __slots__ = '_year', '_month', '_day', '_hashcode' def __new__(cls, year, month=None, day=None): """Constructor. @@ -749,12 +751,14 @@ # Pickle support self = object.__new__(cls) self.__setstate(year) + self._hashcode = -1 return self year, month, day = _check_date_fields(year, month, day) self = object.__new__(cls) self._year = year self._month = month self._day = day + self._hashcode = -1 return self # Additional constructors @@ -936,7 +940,9 @@ def __hash__(self): "Hash." - return hash(self._getstate()) + if self._hashcode == -1: + self._hashcode = hash(self._getstate()) + return self._hashcode # Computations @@ -1122,7 +1128,7 @@ Properties (readonly): hour, minute, second, microsecond, tzinfo """ - __slots__ = '_hour', '_minute', '_second', '_microsecond', '_tzinfo' + __slots__ = '_hour', '_minute', '_second', '_microsecond', '_tzinfo', '_hashcode' def __new__(cls, hour=0, minute=0, second=0, microsecond=0, tzinfo=None): """Constructor. @@ -1137,6 +1143,7 @@ # Pickle support self = object.__new__(cls) self.__setstate(hour, minute or None) + self._hashcode = -1 return self hour, minute, second, microsecond = _check_time_fields( hour, minute, second, microsecond) @@ -1147,6 +1154,7 @@ self._second = second self._microsecond = microsecond self._tzinfo = tzinfo + self._hashcode = -1 return self # Read-only field accessors @@ -1242,13 +1250,17 @@ def __hash__(self): """Hash.""" - tzoff = self._utcoffset() - if not tzoff: # zero or None - return hash(self._getstate()[0]) - h, m = divmod(self.hour * 60 + self.minute - tzoff, 60) - if 0 <= h < 24: - return hash(time(h, m, self.second, self.microsecond)) - return hash((h, m, self.second, self.microsecond)) + if self._hashcode == -1: + tzoff = self._utcoffset() + if not tzoff: # zero or None + self._hashcode = hash(self._getstate()[0]) + else: + h, m = divmod(self.hour * 60 + self.minute - tzoff, 60) + if 0 <= h < 24: + self._hashcode = hash(time(h, m, self.second, self.microsecond)) + else: + self._hashcode = hash((h, m, self.second, self.microsecond)) + return self._hashcode # Conversion to string @@ -1408,14 +1420,13 @@ return (basestate, self._tzinfo) def __setstate(self, string, tzinfo): + if tzinfo is not None and not isinstance(tzinfo, _tzinfo_class): + raise TypeError("bad tzinfo state arg") self._hour, self._minute, self._second, us1, us2, us3 = ( ord(string[0]), ord(string[1]), ord(string[2]), ord(string[3]), ord(string[4]), ord(string[5])) self._microsecond = (((us1 << 8) | us2) << 8) | us3 - if tzinfo is None or isinstance(tzinfo, _tzinfo_class): - self._tzinfo = tzinfo - else: - raise TypeError("bad tzinfo state arg") + self._tzinfo = tzinfo def __reduce__(self): return (time, self._getstate()) @@ -1439,8 +1450,9 @@ if isinstance(year, bytes) and len(year) == 10 and \ 1 <= ord(year[2]) <= 12: # Pickle support - self = date.__new__(cls, year[:4]) + self = object.__new__(cls) self.__setstate(year, month) + self._hashcode = -1 return self year, month, day = _check_date_fields(year, month, day) hour, minute, second, microsecond = _check_time_fields( @@ -1455,6 +1467,7 @@ self._second = second self._microsecond = microsecond self._tzinfo = tzinfo + self._hashcode = -1 return self # Read-only field accessors @@ -1876,12 +1889,15 @@ return base + timedelta(minutes = otoff-myoff) def __hash__(self): - tzoff = self._utcoffset() - if tzoff is None: - return hash(self._getstate()[0]) - days = _ymd2ord(self.year, self.month, self.day) - seconds = self.hour * 3600 + (self.minute - tzoff) * 60 + self.second - return hash(timedelta(days, seconds, self.microsecond)) + if self._hashcode == -1: + tzoff = self._utcoffset() + if tzoff is None: + self._hashcode = hash(self._getstate()[0]) + else: + days = _ymd2ord(self.year, self.month, self.day) + seconds = self.hour * 3600 + (self.minute - tzoff) * 60 + self.second + self._hashcode = hash(timedelta(days, seconds, self.microsecond)) + return self._hashcode # Pickle support. @@ -1898,6 +1914,8 @@ return (basestate, self._tzinfo) def __setstate(self, string, tzinfo): + if tzinfo is not None and not isinstance(tzinfo, _tzinfo_class): + raise TypeError("bad tzinfo state arg") (yhi, ylo, self._month, self._day, self._hour, self._minute, self._second, us1, us2, us3) = (ord(string[0]), ord(string[1]), ord(string[2]), ord(string[3]), @@ -1905,10 +1923,7 @@ ord(string[7]), ord(string[8]), ord(string[9])) self._year = yhi * 256 + ylo self._microsecond = (((us1 << 8) | us2) << 8) | us3 - if tzinfo is None or isinstance(tzinfo, _tzinfo_class): - self._tzinfo = tzinfo - else: - raise TypeError("bad tzinfo state arg") + self._tzinfo = tzinfo def __reduce__(self): return (self.__class__, self._getstate()) diff --git a/lib_pypy/gdbm.py b/lib_pypy/gdbm.py new file mode 100644 --- /dev/null +++ b/lib_pypy/gdbm.py @@ -0,0 +1,174 @@ +import cffi, os + +ffi = cffi.FFI() +ffi.cdef(''' +#define GDBM_READER ... +#define GDBM_WRITER ... +#define GDBM_WRCREAT ... +#define GDBM_NEWDB ... +#define GDBM_FAST ... +#define GDBM_SYNC ... +#define GDBM_NOLOCK ... +#define GDBM_REPLACE ... + +void* gdbm_open(char *, int, int, int, void (*)()); +void gdbm_close(void*); + +typedef struct { + char *dptr; + int dsize; +} datum; + +datum gdbm_fetch(void*, datum); +int gdbm_delete(void*, datum); +int gdbm_store(void*, datum, datum, int); +int gdbm_exists(void*, datum); + +int gdbm_reorganize(void*); + +datum gdbm_firstkey(void*); +datum gdbm_nextkey(void*, datum); +void gdbm_sync(void*); + +char* gdbm_strerror(int); +int gdbm_errno; + +void free(void*); +''') + +try: + lib = ffi.verify(''' + #include "gdbm.h" + ''', libraries=['gdbm']) +except cffi.VerificationError as e: + # distutils does not preserve the actual message, + # but the verification is simple enough that the + # failure must be due to missing gdbm dev libs + raise ImportError('%s: %s' %(e.__class__.__name__, e)) + +class error(Exception): + pass + +def _fromstr(key): + if not isinstance(key, str): + raise TypeError("gdbm mappings have string indices only") + return {'dptr': ffi.new("char[]", key), 'dsize': len(key)} + +class gdbm(object): + ll_dbm = None + + def __init__(self, filename, iflags, mode): + res = lib.gdbm_open(filename, 0, iflags, mode, ffi.NULL) + self.size = -1 + if not res: + self._raise_from_errno() + self.ll_dbm = res + + def close(self): + if self.ll_dbm: + lib.gdbm_close(self.ll_dbm) + self.ll_dbm = None + + def _raise_from_errno(self): + if ffi.errno: + raise error(os.strerror(ffi.errno)) + raise error(lib.gdbm_strerror(lib.gdbm_errno)) + + def __len__(self): + if self.size < 0: + self.size = len(self.keys()) + return self.size + + def __setitem__(self, key, value): + self._check_closed() + self._size = -1 + r = lib.gdbm_store(self.ll_dbm, _fromstr(key), _fromstr(value), + lib.GDBM_REPLACE) + if r < 0: + self._raise_from_errno() + + def __delitem__(self, key): + self._check_closed() + res = lib.gdbm_delete(self.ll_dbm, _fromstr(key)) + if res < 0: + raise KeyError(key) + + def __contains__(self, key): + self._check_closed() + return lib.gdbm_exists(self.ll_dbm, _fromstr(key)) + has_key = __contains__ + + def __getitem__(self, key): + self._check_closed() + drec = lib.gdbm_fetch(self.ll_dbm, _fromstr(key)) + if not drec.dptr: + raise KeyError(key) + res = str(ffi.buffer(drec.dptr, drec.dsize)) + lib.free(drec.dptr) + return res + + def keys(self): + self._check_closed() + l = [] + key = lib.gdbm_firstkey(self.ll_dbm) + while key.dptr: + l.append(str(ffi.buffer(key.dptr, key.dsize))) + nextkey = lib.gdbm_nextkey(self.ll_dbm, key) + lib.free(key.dptr) + key = nextkey + return l + + def firstkey(self): + self._check_closed() + key = lib.gdbm_firstkey(self.ll_dbm) + if key.dptr: + res = str(ffi.buffer(key.dptr, key.dsize)) + lib.free(key.dptr) + return res + + def nextkey(self, key): + self._check_closed() + key = lib.gdbm_nextkey(self.ll_dbm, _fromstr(key)) + if key.dptr: + res = str(ffi.buffer(key.dptr, key.dsize)) + lib.free(key.dptr) + return res + + def reorganize(self): + self._check_closed() + if lib.gdbm_reorganize(self.ll_dbm) < 0: + self._raise_from_errno() + + def _check_closed(self): + if not self.ll_dbm: + raise error("GDBM object has already been closed") + + __del__ = close + + def sync(self): + self._check_closed() + lib.gdbm_sync(self.ll_dbm) + +def open(filename, flags='r', mode=0666): + if flags[0] == 'r': + iflags = lib.GDBM_READER + elif flags[0] == 'w': + iflags = lib.GDBM_WRITER + elif flags[0] == 'c': + iflags = lib.GDBM_WRCREAT + elif flags[0] == 'n': + iflags = lib.GDBM_NEWDB + else: + raise error("First flag must be one of 'r', 'w', 'c' or 'n'") + for flag in flags[1:]: + if flag == 'f': + iflags |= lib.GDBM_FAST + elif flag == 's': + iflags |= lib.GDBM_SYNC + elif flag == 'u': + iflags |= lib.GDBM_NOLOCK + else: + raise error("Flag '%s' not supported" % flag) + return gdbm(filename, iflags, mode) + +open_flags = "rwcnfsu" diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py --- a/pypy/config/pypyoption.py +++ b/pypy/config/pypyoption.py @@ -113,7 +113,7 @@ try: for name in modlist: __import__(name) - except (ImportError, CompilationError, py.test.skip.Exception), e: + except (ImportError, CompilationError, py.test.skip.Exception) as e: errcls = e.__class__.__name__ raise Exception( "The module %r is disabled\n" % (modname,) + @@ -217,7 +217,7 @@ "make instances really small but slow without the JIT", default=False, requires=[("objspace.std.getattributeshortcut", True), - ("objspace.std.withmethodcache", True), + ("objspace.std.withtypeversion", True), ]), BoolOption("withrangelist", diff --git a/pypy/config/test/test_pypyoption.py b/pypy/config/test/test_pypyoption.py --- a/pypy/config/test/test_pypyoption.py +++ b/pypy/config/test/test_pypyoption.py @@ -12,9 +12,9 @@ assert conf.objspace.usemodules.gc conf.objspace.std.withmapdict = True - assert conf.objspace.std.withmethodcache + assert conf.objspace.std.withtypeversion conf = get_pypy_config() - conf.objspace.std.withmethodcache = False + conf.objspace.std.withtypeversion = False py.test.raises(ConfigError, "conf.objspace.std.withmapdict = True") def test_conflicting_gcrootfinder(): diff --git a/pypy/doc/Makefile b/pypy/doc/Makefile --- a/pypy/doc/Makefile +++ b/pypy/doc/Makefile @@ -7,63 +7,80 @@ PAPER = BUILDDIR = _build +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . -.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex man changes linkcheck doctest +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " man to make manual pages" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: - -rm -rf $(BUILDDIR)/* + rm -rf $(BUILDDIR)/* html: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + pickle: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ @@ -72,35 +89,89 @@ @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/PyPy.qhc" +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/PyPy" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/PyPy" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + latex: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ - "run these through (pdf)latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." man: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo - @echo "Build finished. The manual pages are in $(BUILDDIR)/man" + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: - # python config/generate.py #readthedocs will not run this Makefile $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/pypy/doc/_ref.txt b/pypy/doc/_ref.txt --- a/pypy/doc/_ref.txt +++ b/pypy/doc/_ref.txt @@ -1,10 +1,12 @@ +.. This file is generated automatically by makeref.py script, + which in turn is run manually. + .. _`ctypes_configure/doc/sample.py`: https://bitbucket.org/pypy/pypy/src/default/ctypes_configure/doc/sample.py .. _`dotviewer/`: https://bitbucket.org/pypy/pypy/src/default/dotviewer/ .. _`lib-python/`: https://bitbucket.org/pypy/pypy/src/default/lib-python/ .. _`lib-python/2.7/dis.py`: https://bitbucket.org/pypy/pypy/src/default/lib-python/2.7/dis.py .. _`lib_pypy/`: https://bitbucket.org/pypy/pypy/src/default/lib_pypy/ .. _`lib_pypy/greenlet.py`: https://bitbucket.org/pypy/pypy/src/default/lib_pypy/greenlet.py -.. _`lib_pypy/pypy_test/`: https://bitbucket.org/pypy/pypy/src/default/lib_pypy/pypy_test/ .. _`lib_pypy/tputil.py`: https://bitbucket.org/pypy/pypy/src/default/lib_pypy/tputil.py .. _`pypy/bin/`: https://bitbucket.org/pypy/pypy/src/default/pypy/bin/ .. _`pypy/bin/pyinteractive.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/bin/pyinteractive.py @@ -35,7 +37,6 @@ .. _`pypy/interpreter/gateway.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/interpreter/gateway.py .. _`pypy/interpreter/mixedmodule.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/interpreter/mixedmodule.py .. _`pypy/interpreter/module.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/interpreter/module.py -.. _`pypy/interpreter/nestedscope.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/interpreter/nestedscope.py .. _`pypy/interpreter/pyframe.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/interpreter/pyframe.py .. _`pypy/interpreter/pyopcode.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/interpreter/pyopcode.py .. _`pypy/interpreter/pyparser`: @@ -49,21 +50,21 @@ .. _`pypy/module`: .. _`pypy/module/`: https://bitbucket.org/pypy/pypy/src/default/pypy/module/ .. _`pypy/module/__builtin__/__init__.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/module/__builtin__/__init__.py +.. _`pypy/module/cppyy/capi/__init__.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/module/cppyy/capi/__init__.py +.. _`pypy/module/cppyy/capi/builtin_capi.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/module/cppyy/capi/builtin_capi.py +.. _`pypy/module/cppyy/include/capi.h`: https://bitbucket.org/pypy/pypy/src/default/pypy/module/cppyy/include/capi.h +.. _`pypy/module/test_lib_pypy/`: https://bitbucket.org/pypy/pypy/src/default/pypy/module/test_lib_pypy/ .. _`pypy/objspace/`: https://bitbucket.org/pypy/pypy/src/default/pypy/objspace/ -.. _`pypy/objspace/flow/`: https://bitbucket.org/pypy/pypy/src/default/pypy/objspace/flow/ .. _`pypy/objspace/std`: .. _`pypy/objspace/std/`: https://bitbucket.org/pypy/pypy/src/default/pypy/objspace/std/ -.. _`pypy/objspace/std/listtype.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/objspace/std/listtype.py +.. _`pypy/objspace/std/bytesobject.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/objspace/std/bytesobject.py .. _`pypy/objspace/std/multimethod.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/objspace/std/multimethod.py .. _`pypy/objspace/std/objspace.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/objspace/std/objspace.py .. _`pypy/objspace/std/proxy_helpers.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/objspace/std/proxy_helpers.py .. _`pypy/objspace/std/proxyobject.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/objspace/std/proxyobject.py -.. _`pypy/objspace/std/stringtype.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/objspace/std/stringtype.py +.. _`pypy/objspace/std/strbufobject.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/objspace/std/strbufobject.py .. _`pypy/objspace/std/transparent.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/objspace/std/transparent.py -.. _`pypy/objspace/std/tupleobject.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/objspace/std/tupleobject.py -.. _`pypy/objspace/std/tupletype.py`: https://bitbucket.org/pypy/pypy/src/default/pypy/objspace/std/tupletype.py .. _`pypy/tool/`: https://bitbucket.org/pypy/pypy/src/default/pypy/tool/ -.. _`pypy/tool/algo/`: https://bitbucket.org/pypy/pypy/src/default/pypy/tool/algo/ .. _`pypy/tool/pytest/`: https://bitbucket.org/pypy/pypy/src/default/pypy/tool/pytest/ .. _`rpython/annotator`: .. _`rpython/annotator/`: https://bitbucket.org/pypy/pypy/src/default/rpython/annotator/ @@ -75,6 +76,11 @@ .. _`rpython/config/translationoption.py`: https://bitbucket.org/pypy/pypy/src/default/rpython/config/translationoption.py .. _`rpython/flowspace/`: https://bitbucket.org/pypy/pypy/src/default/rpython/flowspace/ .. _`rpython/flowspace/model.py`: https://bitbucket.org/pypy/pypy/src/default/rpython/flowspace/model.py +.. _`rpython/memory/`: https://bitbucket.org/pypy/pypy/src/default/rpython/memory/ +.. _`rpython/memory/gc/generation.py`: https://bitbucket.org/pypy/pypy/src/default/rpython/memory/gc/generation.py +.. _`rpython/memory/gc/hybrid.py`: https://bitbucket.org/pypy/pypy/src/default/rpython/memory/gc/hybrid.py +.. _`rpython/memory/gc/minimarkpage.py`: https://bitbucket.org/pypy/pypy/src/default/rpython/memory/gc/minimarkpage.py +.. _`rpython/memory/gc/semispace.py`: https://bitbucket.org/pypy/pypy/src/default/rpython/memory/gc/semispace.py .. _`rpython/rlib`: .. _`rpython/rlib/`: https://bitbucket.org/pypy/pypy/src/default/rpython/rlib/ .. _`rpython/rlib/listsort.py`: https://bitbucket.org/pypy/pypy/src/default/rpython/rlib/listsort.py @@ -93,16 +99,12 @@ .. _`rpython/rtyper/`: https://bitbucket.org/pypy/pypy/src/default/rpython/rtyper/ .. _`rpython/rtyper/lltypesystem/`: https://bitbucket.org/pypy/pypy/src/default/rpython/rtyper/lltypesystem/ .. _`rpython/rtyper/lltypesystem/lltype.py`: https://bitbucket.org/pypy/pypy/src/default/rpython/rtyper/lltypesystem/lltype.py -.. _`rpython/rtyper/memory/`: https://bitbucket.org/pypy/pypy/src/default/rpython/rtyper/memory/ -.. _`rpython/rtyper/memory/gc/generation.py`: https://bitbucket.org/pypy/pypy/src/default/rpython/rtyper/memory/gc/generation.py -.. _`rpython/rtyper/memory/gc/hybrid.py`: https://bitbucket.org/pypy/pypy/src/default/rpython/rtyper/memory/gc/hybrid.py -.. _`rpython/rtyper/memory/gc/minimarkpage.py`: https://bitbucket.org/pypy/pypy/src/default/rpython/rtyper/memory/gc/minimarkpage.py -.. _`rpython/rtyper/memory/gc/semispace.py`: https://bitbucket.org/pypy/pypy/src/default/rpython/rtyper/memory/gc/semispace.py .. _`rpython/rtyper/rint.py`: https://bitbucket.org/pypy/pypy/src/default/rpython/rtyper/rint.py .. _`rpython/rtyper/rlist.py`: https://bitbucket.org/pypy/pypy/src/default/rpython/rtyper/rlist.py .. _`rpython/rtyper/rmodel.py`: https://bitbucket.org/pypy/pypy/src/default/rpython/rtyper/rmodel.py .. _`rpython/rtyper/rtyper.py`: https://bitbucket.org/pypy/pypy/src/default/rpython/rtyper/rtyper.py .. _`rpython/rtyper/test/test_llinterp.py`: https://bitbucket.org/pypy/pypy/src/default/rpython/rtyper/test/test_llinterp.py +.. _`rpython/tool/algo/`: https://bitbucket.org/pypy/pypy/src/default/rpython/tool/algo/ .. _`rpython/translator`: .. _`rpython/translator/`: https://bitbucket.org/pypy/pypy/src/default/rpython/translator/ .. _`rpython/translator/backendopt/`: https://bitbucket.org/pypy/pypy/src/default/rpython/translator/backendopt/ diff --git a/pypy/doc/cleanup.rst b/pypy/doc/cleanup.rst --- a/pypy/doc/cleanup.rst +++ b/pypy/doc/cleanup.rst @@ -9,9 +9,3 @@ distribution.rst - dot-net.rst - - - - - diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst --- a/pypy/doc/coding-guide.rst +++ b/pypy/doc/coding-guide.rst @@ -105,7 +105,7 @@ while True: try: w_key = space.next(w_iter) - except OperationError, e: + except OperationError as e: if not e.match(space, space.w_StopIteration): raise # re-raise other app-level exceptions break @@ -348,8 +348,12 @@ **objects** - Normal rules apply. Special methods are not honoured, except ``__init__``, - ``__del__`` and ``__iter__``. + Normal rules apply. The only special methods that are honoured are + ``__init__``, ``__del__``, ``__len__``, ``__getitem__``, ``__setitem__``, + ``__getslice__``, ``__setslice__``, and ``__iter__``. To handle slicing, + ``__getslice__`` and ``__setslice__`` must be used; using ``__getitem__`` and + ``__setitem__`` for slicing isn't supported. Additionally, using negative + indices for slicing is still not support, even when using ``__getslice__``. This layout makes the number of types to take care about quite limited. @@ -567,7 +571,7 @@ try: ... - except OperationError, e: + except OperationError as e: if not e.match(space, space.w_XxxError): raise ... @@ -742,9 +746,9 @@ Testing modules in ``lib_pypy/`` -------------------------------- -You can go to the `lib_pypy/pypy_test/`_ directory and invoke the testing tool +You can go to the `pypy/module/test_lib_pypy/`_ directory and invoke the testing tool ("py.test" or "python ../../pypy/test_all.py") to run tests against the -lib_pypy hierarchy. Note, that tests in `lib_pypy/pypy_test/`_ are allowed +lib_pypy hierarchy. Note, that tests in `pypy/module/test_lib_pypy/`_ are allowed and encouraged to let their tests run at interpreter level although `lib_pypy/`_ modules eventually live at PyPy's application level. This allows us to quickly test our python-coded reimplementations @@ -835,15 +839,6 @@ web interface. .. _`development tracker`: https://bugs.pypy.org/ - -use your codespeak login or register ------------------------------------- - -If you have an existing codespeak account, you can use it to login within the -tracker. Else, you can `register with the tracker`_ easily. - - -.. _`register with the tracker`: https://bugs.pypy.org/user?@template=register .. _`roundup`: http://roundup.sourceforge.net/ diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -18,11 +18,31 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.append(os.path.abspath('.')) + +# -- Read The Docs theme config ------------------------------------------------ + +# on_rtd is whether we are on readthedocs.org, this line of code grabbed from docs.readthedocs.org +on_rtd = os.environ.get('READTHEDOCS', None) == 'True' + +if not on_rtd: # only import and set the theme if we're building docs locally + try: + import sphinx_rtd_theme + html_theme = 'sphinx_rtd_theme' + html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + except ImportError: + print('sphinx_rtd_theme is not installed') + html_theme = 'default' + +# otherwise, readthedocs.org uses their theme by default, so no need to specify it + + # -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.ifconfig', 'sphinx.ext.graphviz', 'pypyconfig'] +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', 'sphinx.ext.ifconfig', 'sphinx.ext.graphviz', + 'pypyconfig'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -45,9 +65,9 @@ # built documents. # # The short X.Y version. -version = '2.2' +version = '2.3' # The full version, including alpha/beta/rc tags. -release = '2.2.1' +release = '2.3.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -91,7 +111,7 @@ # The theme to use for HTML and HTML Help pages. Major themes that come with # Sphinx are currently 'default' and 'sphinxdoc'. -html_theme = 'default' +#html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/pypy/doc/config/objspace.usemodules.oracle.txt b/pypy/doc/config/objspace.usemodules.oracle.txt deleted file mode 100644 --- a/pypy/doc/config/objspace.usemodules.oracle.txt +++ /dev/null @@ -1,2 +0,0 @@ -Use the 'oracle' module. -This module is off by default, requires oracle client installed. diff --git a/pypy/doc/config/opt.rst b/pypy/doc/config/opt.rst --- a/pypy/doc/config/opt.rst +++ b/pypy/doc/config/opt.rst @@ -46,5 +46,5 @@ The default level is `2`. -.. _`Boehm-Demers-Weiser garbage collector`: http://www.hpl.hp.com/personal/Hans_Boehm/gc/ +.. _`Boehm-Demers-Weiser garbage collector`: http://hboehm.info/gc/ .. _`custom garbage collectors`: ../garbage_collection.html diff --git a/pypy/doc/config/translation.backendopt.txt b/pypy/doc/config/translation.backendopt.txt --- a/pypy/doc/config/translation.backendopt.txt +++ b/pypy/doc/config/translation.backendopt.txt @@ -1,5 +1,5 @@ This group contains options about various backend optimization passes. Most of them are described in the `EU report about optimization`_ -.. _`EU report about optimization`: http://codespeak.net/pypy/extradoc/eu-report/D07.1_Massive_Parallelism_and_Translation_Aspects-2007-02-28.pdf +.. _`EU report about optimization`: https://bitbucket.org/pypy/extradoc/raw/tip/eu-report/D07.1_Massive_Parallelism_and_Translation_Aspects-2007-02-28.pdf diff --git a/pypy/doc/config/translation.log.txt b/pypy/doc/config/translation.log.txt --- a/pypy/doc/config/translation.log.txt +++ b/pypy/doc/config/translation.log.txt @@ -2,4 +2,4 @@ These must be enabled by setting the PYPYLOG environment variable. The exact set of features supported by PYPYLOG is described in -pypy/translation/c/src/debug_print.h. +rpython/translator/c/src/debug_print.h. diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst --- a/pypy/doc/contributor.rst +++ b/pypy/doc/contributor.rst @@ -15,21 +15,21 @@ Alex Gaynor Michael Hudson David Schneider + Matti Picus + Brian Kearns + Philip Jenvey Holger Krekel Christian Tismer - Matti Picus Hakan Ardo Benjamin Peterson - Philip Jenvey + Manuel Jacob Anders Chrigstrom - Brian Kearns - Manuel Jacob Eric van Riet Paap Wim Lavrijsen + Ronan Lamy Richard Emslie Alexander Schremmer Dan Villiom Podlaski Christiansen - Ronan Lamy Lukas Diekmann Sven Hager Anders Lehmann @@ -38,23 +38,23 @@ Camillo Bruni Laura Creighton Toon Verwaest + Remi Meier Leonardo Santagada Seo Sanghyeon + Romain Guillebert Justin Peel Ronny Pfannschmidt David Edelsohn Anders Hammarquist Jakub Gustak - Romain Guillebert Guido Wesdorp Lawrence Oluyede - Remi Meier Bartosz Skowron Daniel Roberts Niko Matsakis Adrien Di Mascio + Alexander Hesse Ludovic Aubry - Alexander Hesse Jacob Hallen Jason Creighton Alex Martelli @@ -71,6 +71,7 @@ Bruno Gola Jean-Paul Calderone Timo Paulssen + Squeaky Alexandre Fayolle Simon Burton Marius Gedminas @@ -87,6 +88,7 @@ Paweł Piotr Przeradowski Paul deGrandis Ilya Osadchiy + Tobias Oberstein Adrian Kuhn Boris Feigin Stefano Rivera @@ -95,13 +97,18 @@ Georg Brandl Bert Freudenberg Stian Andreassen + Laurence Tratt Wanja Saatkamp + Ivan Sichmann Freitas From noreply at buildbot.pypy.org Tue Jun 24 15:42:24 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 15:42:24 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: Re-add MOVD32_xs, MOVD32_sx Message-ID: <20140624134224.6D2591C023B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72193:3bd28cc2fcbb Date: 2014-06-24 14:51 +0200 http://bitbucket.org/pypy/pypy/changeset/3bd28cc2fcbb/ Log: Re-add MOVD32_xs, MOVD32_sx diff --git a/rpython/jit/backend/x86/callbuilder.py b/rpython/jit/backend/x86/callbuilder.py --- a/rpython/jit/backend/x86/callbuilder.py +++ b/rpython/jit/backend/x86/callbuilder.py @@ -456,7 +456,7 @@ return # if self.restype == 'S': - self.mc.MOVD_xs(xmm0.value, 0) + self.mc.MOVD32_xs(xmm0.value, 0) else: assert self.restype == INT self.mc.MOV_rs(eax.value, 0) diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py --- a/rpython/jit/backend/x86/rx86.py +++ b/rpython/jit/backend/x86/rx86.py @@ -628,8 +628,10 @@ MOVDQ_xb = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), stack_bp(2)) MOVD32_rx = xmminsn('\x66', rex_nw, '\x0F\x7E', register(2, 8), register(1), '\xC0') + MOVD32_sx = xmminsn('\x66', rex_nw, '\x0F\x7E', register(2, 8), stack_sp(1)) MOVD32_xr = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), register(2), '\xC0') MOVD32_xb = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_bp(2)) + MOVD32_xs = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_sp(2)) PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b')) From noreply at buildbot.pypy.org Tue Jun 24 15:42:25 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 15:42:25 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: translation fix Message-ID: <20140624134225.AA3741C023B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72194:bda8c56bee80 Date: 2014-06-24 15:06 +0200 http://bitbucket.org/pypy/pypy/changeset/bda8c56bee80/ Log: translation fix diff --git a/rpython/translator/c/gcc/trackgcroot.py b/rpython/translator/c/gcc/trackgcroot.py --- a/rpython/translator/c/gcc/trackgcroot.py +++ b/rpython/translator/c/gcc/trackgcroot.py @@ -858,7 +858,10 @@ return [] def _visit_xchg(self, line): - # only support the format used in VALGRIND_DISCARD_TRANSLATIONS + # ignore the 'rpy_fastgil' atomic exchange + if 'rpy_fastgil' in line: + return [] + # support the format used in VALGRIND_DISCARD_TRANSLATIONS # which is to use a marker no-op "xchgl %ebx, %ebx" match = self.r_binaryinsn.match(line) source = match.group("source") From noreply at buildbot.pypy.org Tue Jun 24 16:19:10 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 16:19:10 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: Fixes: must look in rpy_fastgil for the extra missing stack too Message-ID: <20140624141910.EF2CE1C31FE@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72195:499ad6f94da1 Date: 2014-06-24 16:18 +0200 http://bitbucket.org/pypy/pypy/changeset/499ad6f94da1/ Log: Fixes: must look in rpy_fastgil for the extra missing stack too diff --git a/rpython/memory/gctransform/asmgcroot.py b/rpython/memory/gctransform/asmgcroot.py --- a/rpython/memory/gctransform/asmgcroot.py +++ b/rpython/memory/gctransform/asmgcroot.py @@ -2,6 +2,7 @@ copygraph, SpaceOperation, checkgraph) from rpython.rlib.debug import ll_assert from rpython.rlib.nonconst import NonConstant +from rpython.rlib import rgil from rpython.rtyper.annlowlevel import llhelper from rpython.rtyper.lltypesystem import lltype, llmemory, rffi from rpython.rtyper.lltypesystem.lloperation import llop @@ -356,16 +357,19 @@ initialframedata = anchor.address[1] stackscount = 0 while initialframedata != anchor: # while we have not looped back - self.fill_initial_frame(curframe, initialframedata) - # Loop over all the frames in the stack - while self.walk_to_parent_frame(curframe, otherframe): - swap = curframe - curframe = otherframe # caller becomes callee - otherframe = swap + self.walk_frames(curframe, otherframe, initialframedata) # Then proceed to the next piece of stack initialframedata = initialframedata.address[1] stackscount += 1 # + # for the JIT: rpy_fastgil may contain an extra framedata + rpy_fastgil = rgil.gil_fetch_fastgil().signed[0] + if rpy_fastgil != 1: + ll_assert(rpy_fastgil != 0, "walk_stack_from doesn't have the GIL") + initialframedata = rffi.cast(llmemory.Address, rpy_fastgil) + self.walk_frames(curframe, otherframe, initialframedata) + stackscount += 1 + # expected = rffi.stackcounter.stacks_counter if NonConstant(0): rffi.stackcounter.stacks_counter += 42 # hack to force it @@ -374,6 +378,14 @@ lltype.free(otherframe, flavor='raw') lltype.free(curframe, flavor='raw') + def walk_frames(self, curframe, otherframe, initialframedata): + self.fill_initial_frame(curframe, initialframedata) + # Loop over all the frames in the stack + while self.walk_to_parent_frame(curframe, otherframe): + swap = curframe + curframe = otherframe # caller becomes callee + otherframe = swap + def fill_initial_frame(self, curframe, initialframedata): # Read the information provided by initialframedata initialframedata += 2*sizeofaddr #skip the prev/next words at the start diff --git a/rpython/translator/c/src/mem.c b/rpython/translator/c/src/mem.c --- a/rpython/translator/c/src/mem.c +++ b/rpython/translator/c/src/mem.c @@ -115,6 +115,11 @@ got += 1; fd = ((void* *) (((char *)fd) + sizeof(void*)))[0]; } + if (rpy_fastgil != 1) { + RPyAssert(rpy_fastgil != 0, + "pypy_check_stack_count doesn't have the GIL"); + got++; /* <= the extra one currently stored in rpy_fastgil */ + } RPyAssert(got == stacks_counter - 1, "bad stacks_counter or non-closed stacks around"); # endif From noreply at buildbot.pypy.org Tue Jun 24 16:52:34 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 16:52:34 +0200 (CEST) Subject: [pypy-commit] cffi default: Unify (manually) the three blocks of #ifs for Windows. Add a comment Message-ID: <20140624145234.3CF301D23C2@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r1523:ab44531bdbe2 Date: 2014-06-24 16:52 +0200 http://bitbucket.org/cffi/cffi/changeset/ab44531bdbe2/ Log: Unify (manually) the three blocks of #ifs for Windows. Add a comment to attempt to keep them from drifting apart in the future. diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c --- a/c/_cffi_backend.c +++ b/c/_cffi_backend.c @@ -5,7 +5,6 @@ #ifdef MS_WIN32 #include #include "misc_win32.h" -#include /* for alloca() */ #else #include #include @@ -13,9 +12,31 @@ #include #include #include -#if (defined (__SVR4) && defined (__sun)) || defined(_AIX) +#endif + +/* this block of #ifs should be kept exactly identical between + c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */ +#if defined(MS_WIN32) && defined(_MSC_VER) +# include /* for alloca() */ +# if _MSC_VER < 1600 /* MSVC < 2010 */ + typedef __int8 int8_t; + typedef __int16 int16_t; + typedef __int32 int32_t; + typedef __int64 int64_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; +# else +# include +# endif +# if _MSC_VER < 1800 /* MSVC < 2013 */ + typedef unsigned char _Bool; +# endif +#else +# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) # include -#endif +# endif #endif #include "malloc_closure.h" diff --git a/c/misc_win32.h b/c/misc_win32.h --- a/c/misc_win32.h +++ b/c/misc_win32.h @@ -210,23 +210,6 @@ return buf; } - -/************************************************************/ -/* types */ - -typedef __int8 int8_t; -typedef __int16 int16_t; -typedef __int32 int32_t; -typedef __int64 int64_t; -typedef unsigned __int8 uint8_t; -typedef unsigned __int16 uint16_t; -typedef unsigned __int32 uint32_t; -typedef unsigned __int64 uint64_t; -#if defined(_MSC_VER) && _MSC_VER <= 1700 -typedef unsigned char _Bool; -#endif - - /************************************************************/ /* obscure */ diff --git a/cffi/vengine_cpy.py b/cffi/vengine_cpy.py --- a/cffi/vengine_cpy.py +++ b/cffi/vengine_cpy.py @@ -770,23 +770,29 @@ #include #include -#if defined(MS_WIN32) && !defined(_STDINT_H) -#include /* for alloca() */ -typedef __int8 int8_t; -typedef __int16 int16_t; -typedef __int32 int32_t; -typedef __int64 int64_t; -typedef unsigned __int8 uint8_t; -typedef unsigned __int16 uint16_t; -typedef unsigned __int32 uint32_t; -typedef unsigned __int64 uint64_t; -#if defined(_MSC_VER) && _MSC_VER <= 1700 -typedef unsigned char _Bool; -#endif +/* this block of #ifs should be kept exactly identical between + c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */ +#if defined(MS_WIN32) && defined(_MSC_VER) +# include /* for alloca() */ +# if _MSC_VER < 1600 /* MSVC < 2010 */ + typedef __int8 int8_t; + typedef __int16 int16_t; + typedef __int32 int32_t; + typedef __int64 int64_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; +# else +# include +# endif +# if _MSC_VER < 1800 /* MSVC < 2013 */ + typedef unsigned char _Bool; +# endif #else -#if (defined (__SVR4) && defined (__sun)) || defined(_AIX) +# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) # include -#endif +# endif #endif #if PY_MAJOR_VERSION < 3 diff --git a/cffi/vengine_gen.py b/cffi/vengine_gen.py --- a/cffi/vengine_gen.py +++ b/cffi/vengine_gen.py @@ -552,22 +552,28 @@ #include #include /* XXX for ssize_t on some platforms */ -#ifdef _WIN32 -# include -# define snprintf _snprintf -typedef __int8 int8_t; -typedef __int16 int16_t; -typedef __int32 int32_t; -typedef __int64 int64_t; -typedef unsigned __int8 uint8_t; -typedef unsigned __int16 uint16_t; -typedef unsigned __int32 uint32_t; -typedef unsigned __int64 uint64_t; -typedef SSIZE_T ssize_t; -#if defined(_MSC_VER) && _MSC_VER <= 1700 -typedef unsigned char _Bool; -#endif +/* this block of #ifs should be kept exactly identical between + c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */ +#if defined(MS_WIN32) && defined(_MSC_VER) +# include /* for alloca() */ +# if _MSC_VER < 1600 /* MSVC < 2010 */ + typedef __int8 int8_t; + typedef __int16 int16_t; + typedef __int32 int32_t; + typedef __int64 int64_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; +# else +# include +# endif +# if _MSC_VER < 1800 /* MSVC < 2013 */ + typedef unsigned char _Bool; +# endif #else -# include +# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) +# include +# endif #endif ''' From noreply at buildbot.pypy.org Tue Jun 24 16:59:04 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 16:59:04 +0200 (CEST) Subject: [pypy-commit] cffi default: Add stdint.h as an explicit include on non-Windows too. Message-ID: <20140624145904.5F8B91D2845@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r1524:447147331347 Date: 2014-06-24 16:59 +0200 http://bitbucket.org/cffi/cffi/changeset/447147331347/ Log: Add stdint.h as an explicit include on non-Windows too. diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c --- a/c/_cffi_backend.c +++ b/c/_cffi_backend.c @@ -34,6 +34,7 @@ typedef unsigned char _Bool; # endif #else +# include # if (defined (__SVR4) && defined (__sun)) || defined(_AIX) # include # endif diff --git a/cffi/vengine_cpy.py b/cffi/vengine_cpy.py --- a/cffi/vengine_cpy.py +++ b/cffi/vengine_cpy.py @@ -790,6 +790,7 @@ typedef unsigned char _Bool; # endif #else +# include # if (defined (__SVR4) && defined (__sun)) || defined(_AIX) # include # endif diff --git a/cffi/vengine_gen.py b/cffi/vengine_gen.py --- a/cffi/vengine_gen.py +++ b/cffi/vengine_gen.py @@ -572,6 +572,7 @@ typedef unsigned char _Bool; # endif #else +# include # if (defined (__SVR4) && defined (__sun)) || defined(_AIX) # include # endif From noreply at buildbot.pypy.org Tue Jun 24 17:35:29 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 17:35:29 +0200 (CEST) Subject: [pypy-commit] cffi default: Don't check MS_WIN32, it's not always defined even on MSVC. Message-ID: <20140624153529.13A841D2942@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r1525:53be22eb427a Date: 2014-06-24 17:09 +0200 http://bitbucket.org/cffi/cffi/changeset/53be22eb427a/ Log: Don't check MS_WIN32, it's not always defined even on MSVC. diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c --- a/c/_cffi_backend.c +++ b/c/_cffi_backend.c @@ -16,7 +16,7 @@ /* this block of #ifs should be kept exactly identical between c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */ -#if defined(MS_WIN32) && defined(_MSC_VER) +#if defined(_MSC_VER) # include /* for alloca() */ # if _MSC_VER < 1600 /* MSVC < 2010 */ typedef __int8 int8_t; diff --git a/cffi/vengine_cpy.py b/cffi/vengine_cpy.py --- a/cffi/vengine_cpy.py +++ b/cffi/vengine_cpy.py @@ -772,7 +772,7 @@ /* this block of #ifs should be kept exactly identical between c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */ -#if defined(MS_WIN32) && defined(_MSC_VER) +#if defined(_MSC_VER) # include /* for alloca() */ # if _MSC_VER < 1600 /* MSVC < 2010 */ typedef __int8 int8_t; diff --git a/cffi/vengine_gen.py b/cffi/vengine_gen.py --- a/cffi/vengine_gen.py +++ b/cffi/vengine_gen.py @@ -554,7 +554,7 @@ /* this block of #ifs should be kept exactly identical between c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */ -#if defined(MS_WIN32) && defined(_MSC_VER) +#if defined(_MSC_VER) # include /* for alloca() */ # if _MSC_VER < 1600 /* MSVC < 2010 */ typedef __int8 int8_t; From noreply at buildbot.pypy.org Tue Jun 24 17:35:30 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 17:35:30 +0200 (CEST) Subject: [pypy-commit] cffi default: Skip "ssize_t" on Windows, where it is usually not defined anyway. Message-ID: <20140624153530.3FD3B1D2942@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r1526:105930aef686 Date: 2014-06-24 17:35 +0200 http://bitbucket.org/cffi/cffi/changeset/105930aef686/ Log: Skip "ssize_t" on Windows, where it is usually not defined anyway. diff --git a/testing/test_verify.py b/testing/test_verify.py --- a/testing/test_verify.py +++ b/testing/test_verify.py @@ -199,6 +199,8 @@ if (all_primitive_types[typename] == 'c' or typename == '_Bool' or typename == 'long double'): pass + elif typename == 'ssize_t' and sys.platform == 'win32': + pass else: typenames.append(typename) # From noreply at buildbot.pypy.org Tue Jun 24 17:40:28 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 17:40:28 +0200 (CEST) Subject: [pypy-commit] cffi default: Generalize 105930aef686 Message-ID: <20140624154028.68A5D1C023B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r1527:4e1d020f98d1 Date: 2014-06-24 17:40 +0200 http://bitbucket.org/cffi/cffi/changeset/4e1d020f98d1/ Log: Generalize 105930aef686 diff --git a/testing/test_verify.py b/testing/test_verify.py --- a/testing/test_verify.py +++ b/testing/test_verify.py @@ -171,6 +171,9 @@ all_primitive_types = model.PrimitiveType.ALL_PRIMITIVE_TYPES +if sys.platform == 'win32': + all_primitive_types = all_primitive_types[:] + all_primitive_types.remove('ssize_t') all_integer_types = sorted(tp for tp in all_primitive_types if all_primitive_types[tp] == 'i') all_float_types = sorted(tp for tp in all_primitive_types @@ -199,8 +202,6 @@ if (all_primitive_types[typename] == 'c' or typename == '_Bool' or typename == 'long double'): pass - elif typename == 'ssize_t' and sys.platform == 'win32': - pass else: typenames.append(typename) # From noreply at buildbot.pypy.org Tue Jun 24 17:43:02 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 17:43:02 +0200 (CEST) Subject: [pypy-commit] cffi default: pom pom pom Message-ID: <20140624154302.53F671C0CA6@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r1528:16a95f861200 Date: 2014-06-24 17:43 +0200 http://bitbucket.org/cffi/cffi/changeset/16a95f861200/ Log: pom pom pom diff --git a/testing/test_verify.py b/testing/test_verify.py --- a/testing/test_verify.py +++ b/testing/test_verify.py @@ -172,8 +172,8 @@ all_primitive_types = model.PrimitiveType.ALL_PRIMITIVE_TYPES if sys.platform == 'win32': - all_primitive_types = all_primitive_types[:] - all_primitive_types.remove('ssize_t') + all_primitive_types = all_primitive_types.copy() + del all_primitive_types['ssize_t'] all_integer_types = sorted(tp for tp in all_primitive_types if all_primitive_types[tp] == 'i') all_float_types = sorted(tp for tp in all_primitive_types From noreply at buildbot.pypy.org Tue Jun 24 17:52:47 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 17:52:47 +0200 (CEST) Subject: [pypy-commit] cffi default: Avoid using ssize_t here Message-ID: <20140624155247.A68121D24C0@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r1529:c5b9bbf1e9cd Date: 2014-06-24 17:52 +0200 http://bitbucket.org/cffi/cffi/changeset/c5b9bbf1e9cd/ Log: Avoid using ssize_t here diff --git a/cffi/vengine_gen.py b/cffi/vengine_gen.py --- a/cffi/vengine_gen.py +++ b/cffi/vengine_gen.py @@ -249,10 +249,10 @@ prnt(' /* %s */' % str(e)) # cannot verify it, ignore prnt('}') self.export_symbols.append(layoutfuncname) - prnt('ssize_t %s(ssize_t i)' % (layoutfuncname,)) + prnt('intptr_t %s(intptr_t i)' % (layoutfuncname,)) prnt('{') prnt(' struct _cffi_aligncheck { char x; %s y; };' % cname) - prnt(' static ssize_t nums[] = {') + prnt(' static intptr_t nums[] = {') prnt(' sizeof(%s),' % cname) prnt(' offsetof(struct _cffi_aligncheck, y),') for fname, ftype, fbitsize in tp.enumfields(): @@ -276,7 +276,7 @@ return # nothing to do with opaque structs layoutfuncname = '_cffi_layout_%s_%s' % (prefix, name) # - BFunc = self.ffi._typeof_locked("ssize_t(*)(ssize_t)")[0] + BFunc = self.ffi._typeof_locked("intptr_t(*)(intptr_t)")[0] function = module.load_function(BFunc, layoutfuncname) layout = [] num = 0 From noreply at buildbot.pypy.org Tue Jun 24 18:04:18 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 18:04:18 +0200 (CEST) Subject: [pypy-commit] pypy default: Update to cffi/c5b9bbf1e9cd Message-ID: <20140624160418.2DE3E1C05B7@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72196:431b98cb71fd Date: 2014-06-24 18:03 +0200 http://bitbucket.org/pypy/pypy/changeset/431b98cb71fd/ Log: Update to cffi/c5b9bbf1e9cd diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -443,6 +443,10 @@ for enumname, enumval in zip(tp.enumerators, tp.enumvalues): if enumname not in library.__dict__: library.__dict__[enumname] = enumval + for key, val in ffi._parser._int_constants.items(): + if key not in library.__dict__: + library.__dict__[key] = val + copied_enums.append(True) if name in library.__dict__: return diff --git a/lib_pypy/cffi/cparser.py b/lib_pypy/cffi/cparser.py --- a/lib_pypy/cffi/cparser.py +++ b/lib_pypy/cffi/cparser.py @@ -24,6 +24,7 @@ _r_partial_array = re.compile(r"\[\s*\.\.\.\s*\]") _r_words = re.compile(r"\w+|\S") _parser_cache = None +_r_int_literal = re.compile(r"^0?x?[0-9a-f]+u?l?$", re.IGNORECASE) def _get_parser(): global _parser_cache @@ -99,6 +100,7 @@ self._structnode2type = weakref.WeakKeyDictionary() self._override = False self._packed = False + self._int_constants = {} def _parse(self, csource): csource, macros = _preprocess(csource) @@ -128,9 +130,10 @@ finally: if lock is not None: lock.release() - return ast, macros + # csource will be used to find buggy source text + return ast, macros, csource - def convert_pycparser_error(self, e, csource): + def _convert_pycparser_error(self, e, csource): # xxx look for ":NUM:" at the start of str(e) and try to interpret # it as a line number line = None @@ -142,6 +145,12 @@ csourcelines = csource.splitlines() if 1 <= linenum <= len(csourcelines): line = csourcelines[linenum-1] + return line + + def convert_pycparser_error(self, e, csource): + line = self._convert_pycparser_error(e, csource) + + msg = str(e) if line: msg = 'cannot parse "%s"\n%s' % (line.strip(), msg) else: @@ -160,14 +169,9 @@ self._packed = prev_packed def _internal_parse(self, csource): - ast, macros = self._parse(csource) + ast, macros, csource = self._parse(csource) # add the macros - for key, value in macros.items(): - value = value.strip() - if value != '...': - raise api.CDefError('only supports the syntax "#define ' - '%s ..." for now (literally)' % key) - self._declare('macro ' + key, value) + self._process_macros(macros) # find the first "__dotdotdot__" and use that as a separator # between the repeated typedefs and the real csource iterator = iter(ast.ext) @@ -175,27 +179,61 @@ if decl.name == '__dotdotdot__': break # - for decl in iterator: - if isinstance(decl, pycparser.c_ast.Decl): - self._parse_decl(decl) - elif isinstance(decl, pycparser.c_ast.Typedef): - if not decl.name: - raise api.CDefError("typedef does not declare any name", - decl) - if (isinstance(decl.type.type, pycparser.c_ast.IdentifierType) - and decl.type.type.names == ['__dotdotdot__']): - realtype = model.unknown_type(decl.name) - elif (isinstance(decl.type, pycparser.c_ast.PtrDecl) and - isinstance(decl.type.type, pycparser.c_ast.TypeDecl) and - isinstance(decl.type.type.type, - pycparser.c_ast.IdentifierType) and - decl.type.type.type.names == ['__dotdotdot__']): - realtype = model.unknown_ptr_type(decl.name) + try: + for decl in iterator: + if isinstance(decl, pycparser.c_ast.Decl): + self._parse_decl(decl) + elif isinstance(decl, pycparser.c_ast.Typedef): + if not decl.name: + raise api.CDefError("typedef does not declare any name", + decl) + if (isinstance(decl.type.type, pycparser.c_ast.IdentifierType) + and decl.type.type.names == ['__dotdotdot__']): + realtype = model.unknown_type(decl.name) + elif (isinstance(decl.type, pycparser.c_ast.PtrDecl) and + isinstance(decl.type.type, pycparser.c_ast.TypeDecl) and + isinstance(decl.type.type.type, + pycparser.c_ast.IdentifierType) and + decl.type.type.type.names == ['__dotdotdot__']): + realtype = model.unknown_ptr_type(decl.name) + else: + realtype = self._get_type(decl.type, name=decl.name) + self._declare('typedef ' + decl.name, realtype) else: - realtype = self._get_type(decl.type, name=decl.name) - self._declare('typedef ' + decl.name, realtype) + raise api.CDefError("unrecognized construct", decl) + except api.FFIError as e: + msg = self._convert_pycparser_error(e, csource) + if msg: + e.args = (e.args[0] + "\n *** Err: %s" % msg,) + raise + + def _add_constants(self, key, val): + if key in self._int_constants: + raise api.FFIError( + "multiple declarations of constant: %s" % (key,)) + self._int_constants[key] = val + + def _process_macros(self, macros): + for key, value in macros.items(): + value = value.strip() + match = _r_int_literal.search(value) + if match is not None: + int_str = match.group(0).lower().rstrip("ul") + + # "010" is not valid oct in py3 + if (int_str.startswith("0") and + int_str != "0" and + not int_str.startswith("0x")): + int_str = "0o" + int_str[1:] + + pyvalue = int(int_str, 0) + self._add_constants(key, pyvalue) + elif value == '...': + self._declare('macro ' + key, value) else: - raise api.CDefError("unrecognized construct", decl) + raise api.CDefError('only supports the syntax "#define ' + '%s ..." (literally) or "#define ' + '%s 0x1FF" for now' % (key, key)) def _parse_decl(self, decl): node = decl.type @@ -227,7 +265,7 @@ self._declare('variable ' + decl.name, tp) def parse_type(self, cdecl): - ast, macros = self._parse('void __dummy(\n%s\n);' % cdecl) + ast, macros = self._parse('void __dummy(\n%s\n);' % cdecl)[:2] assert not macros exprnode = ast.ext[-1].type.args.params[0] if isinstance(exprnode, pycparser.c_ast.ID): @@ -306,7 +344,8 @@ if ident == 'void': return model.void_type if ident == '__dotdotdot__': - raise api.FFIError('bad usage of "..."') + raise api.FFIError(':%d: bad usage of "..."' % + typenode.coord.line) return resolve_common_type(ident) # if isinstance(type, pycparser.c_ast.Struct): @@ -333,7 +372,8 @@ return self._get_struct_union_enum_type('union', typenode, name, nested=True) # - raise api.FFIError("bad or unsupported type declaration") + raise api.FFIError(":%d: bad or unsupported type declaration" % + typenode.coord.line) def _parse_function_type(self, typenode, funcname=None): params = list(getattr(typenode.args, 'params', [])) @@ -499,6 +539,10 @@ if (isinstance(exprnode, pycparser.c_ast.UnaryOp) and exprnode.op == '-'): return -self._parse_constant(exprnode.expr) + # load previously defined int constant + if (isinstance(exprnode, pycparser.c_ast.ID) and + exprnode.name in self._int_constants): + return self._int_constants[exprnode.name] # if partial_length_ok: if (isinstance(exprnode, pycparser.c_ast.ID) and @@ -506,8 +550,8 @@ self._partial_length = True return '...' # - raise api.FFIError("unsupported expression: expected a " - "simple numeric constant") + raise api.FFIError(":%d: unsupported expression: expected a " + "simple numeric constant" % exprnode.coord.line) def _build_enum_type(self, explicit_name, decls): if decls is not None: @@ -522,6 +566,7 @@ if enum.value is not None: nextenumvalue = self._parse_constant(enum.value) enumvalues.append(nextenumvalue) + self._add_constants(enum.name, nextenumvalue) nextenumvalue += 1 enumvalues = tuple(enumvalues) tp = model.EnumType(explicit_name, enumerators, enumvalues) @@ -535,3 +580,5 @@ kind = name.split(' ', 1)[0] if kind in ('typedef', 'struct', 'union', 'enum'): self._declare(name, tp) + for k, v in other._int_constants.items(): + self._add_constants(k, v) diff --git a/lib_pypy/cffi/ffiplatform.py b/lib_pypy/cffi/ffiplatform.py --- a/lib_pypy/cffi/ffiplatform.py +++ b/lib_pypy/cffi/ffiplatform.py @@ -38,6 +38,7 @@ import distutils.errors # dist = Distribution({'ext_modules': [ext]}) + dist.parse_config_files() options = dist.get_option_dict('build_ext') options['force'] = ('ffiplatform', True) options['build_lib'] = ('ffiplatform', tmpdir) diff --git a/lib_pypy/cffi/vengine_cpy.py b/lib_pypy/cffi/vengine_cpy.py --- a/lib_pypy/cffi/vengine_cpy.py +++ b/lib_pypy/cffi/vengine_cpy.py @@ -89,43 +89,54 @@ # by generate_cpy_function_method(). prnt('static PyMethodDef _cffi_methods[] = {') self._generate("method") - prnt(' {"_cffi_setup", _cffi_setup, METH_VARARGS},') - prnt(' {NULL, NULL} /* Sentinel */') + prnt(' {"_cffi_setup", _cffi_setup, METH_VARARGS, NULL},') + prnt(' {NULL, NULL, 0, NULL} /* Sentinel */') prnt('};') prnt() # # standard init. modname = self.verifier.get_module_name() - if sys.version_info >= (3,): - prnt('static struct PyModuleDef _cffi_module_def = {') - prnt(' PyModuleDef_HEAD_INIT,') - prnt(' "%s",' % modname) - prnt(' NULL,') - prnt(' -1,') - prnt(' _cffi_methods,') - prnt(' NULL, NULL, NULL, NULL') - prnt('};') - prnt() - initname = 'PyInit_%s' % modname - createmod = 'PyModule_Create(&_cffi_module_def)' - errorcase = 'return NULL' - finalreturn = 'return lib' - else: - initname = 'init%s' % modname - createmod = 'Py_InitModule("%s", _cffi_methods)' % modname - errorcase = 'return' - finalreturn = 'return' + constants = self._chained_list_constants[False] + prnt('#if PY_MAJOR_VERSION >= 3') + prnt() + prnt('static struct PyModuleDef _cffi_module_def = {') + prnt(' PyModuleDef_HEAD_INIT,') + prnt(' "%s",' % modname) + prnt(' NULL,') + prnt(' -1,') + prnt(' _cffi_methods,') + prnt(' NULL, NULL, NULL, NULL') + prnt('};') + prnt() prnt('PyMODINIT_FUNC') - prnt('%s(void)' % initname) + prnt('PyInit_%s(void)' % modname) prnt('{') prnt(' PyObject *lib;') - prnt(' lib = %s;' % createmod) - prnt(' if (lib == NULL || %s < 0)' % ( - self._chained_list_constants[False],)) - prnt(' %s;' % errorcase) - prnt(' _cffi_init();') - prnt(' %s;' % finalreturn) + prnt(' lib = PyModule_Create(&_cffi_module_def);') + prnt(' if (lib == NULL)') + prnt(' return NULL;') + prnt(' if (%s < 0 || _cffi_init() < 0) {' % (constants,)) + prnt(' Py_DECREF(lib);') + prnt(' return NULL;') + prnt(' }') + prnt(' return lib;') prnt('}') + prnt() + prnt('#else') + prnt() + prnt('PyMODINIT_FUNC') + prnt('init%s(void)' % modname) + prnt('{') + prnt(' PyObject *lib;') + prnt(' lib = Py_InitModule("%s", _cffi_methods);' % modname) + prnt(' if (lib == NULL)') + prnt(' return;') + prnt(' if (%s < 0 || _cffi_init() < 0)' % (constants,)) + prnt(' return;') + prnt(' return;') + prnt('}') + prnt() + prnt('#endif') def load_library(self): # XXX review all usages of 'self' here! @@ -394,7 +405,7 @@ meth = 'METH_O' else: meth = 'METH_VARARGS' - self._prnt(' {"%s", _cffi_f_%s, %s},' % (name, name, meth)) + self._prnt(' {"%s", _cffi_f_%s, %s, NULL},' % (name, name, meth)) _loading_cpy_function = _loaded_noop @@ -481,8 +492,8 @@ if tp.fldnames is None: return # nothing to do with opaque structs layoutfuncname = '_cffi_layout_%s_%s' % (prefix, name) - self._prnt(' {"%s", %s, METH_NOARGS},' % (layoutfuncname, - layoutfuncname)) + self._prnt(' {"%s", %s, METH_NOARGS, NULL},' % (layoutfuncname, + layoutfuncname)) def _loading_struct_or_union(self, tp, prefix, name, module): if tp.fldnames is None: @@ -589,13 +600,7 @@ 'variable type'),)) assert delayed else: - prnt(' if (LONG_MIN <= (%s) && (%s) <= LONG_MAX)' % (name, name)) - prnt(' o = PyInt_FromLong((long)(%s));' % (name,)) - prnt(' else if ((%s) <= 0)' % (name,)) - prnt(' o = PyLong_FromLongLong((long long)(%s));' % (name,)) - prnt(' else') - prnt(' o = PyLong_FromUnsignedLongLong(' - '(unsigned long long)(%s));' % (name,)) + prnt(' o = _cffi_from_c_int_const(%s);' % name) prnt(' if (o == NULL)') prnt(' return -1;') if size_too: @@ -632,13 +637,18 @@ # ---------- # enums + def _enum_funcname(self, prefix, name): + # "$enum_$1" => "___D_enum____D_1" + name = name.replace('$', '___D_') + return '_cffi_e_%s_%s' % (prefix, name) + def _generate_cpy_enum_decl(self, tp, name, prefix='enum'): if tp.partial: for enumerator in tp.enumerators: self._generate_cpy_const(True, enumerator, delayed=False) return # - funcname = '_cffi_e_%s_%s' % (prefix, name) + funcname = self._enum_funcname(prefix, name) prnt = self._prnt prnt('static int %s(PyObject *lib)' % funcname) prnt('{') @@ -760,17 +770,30 @@ #include #include -#ifdef MS_WIN32 -#include /* for alloca() */ -typedef __int8 int8_t; -typedef __int16 int16_t; -typedef __int32 int32_t; -typedef __int64 int64_t; -typedef unsigned __int8 uint8_t; -typedef unsigned __int16 uint16_t; -typedef unsigned __int32 uint32_t; -typedef unsigned __int64 uint64_t; -typedef unsigned char _Bool; +/* this block of #ifs should be kept exactly identical between + c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */ +#if defined(_MSC_VER) +# include /* for alloca() */ +# if _MSC_VER < 1600 /* MSVC < 2010 */ + typedef __int8 int8_t; + typedef __int16 int16_t; + typedef __int32 int32_t; + typedef __int64 int64_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; +# else +# include +# endif +# if _MSC_VER < 1800 /* MSVC < 2013 */ + typedef unsigned char _Bool; +# endif +#else +# include +# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) +# include +# endif #endif #if PY_MAJOR_VERSION < 3 @@ -795,6 +818,15 @@ #define _cffi_to_c_double PyFloat_AsDouble #define _cffi_to_c_float PyFloat_AsDouble +#define _cffi_from_c_int_const(x) \ + (((x) > 0) ? \ + ((unsigned long long)(x) <= (unsigned long long)LONG_MAX) ? \ + PyInt_FromLong((long)(x)) : \ + PyLong_FromUnsignedLongLong((unsigned long long)(x)) : \ + ((long long)(x) >= (long long)LONG_MIN) ? \ + PyInt_FromLong((long)(x)) : \ + PyLong_FromLongLong((long long)(x))) + #define _cffi_from_c_int(x, type) \ (((type)-1) > 0 ? /* unsigned */ \ (sizeof(type) < sizeof(long) ? PyInt_FromLong(x) : \ @@ -804,14 +836,14 @@ PyLong_FromLongLong(x))) #define _cffi_to_c_int(o, type) \ - (sizeof(type) == 1 ? (((type)-1) > 0 ? _cffi_to_c_u8(o) \ - : _cffi_to_c_i8(o)) : \ - sizeof(type) == 2 ? (((type)-1) > 0 ? _cffi_to_c_u16(o) \ - : _cffi_to_c_i16(o)) : \ - sizeof(type) == 4 ? (((type)-1) > 0 ? _cffi_to_c_u32(o) \ - : _cffi_to_c_i32(o)) : \ - sizeof(type) == 8 ? (((type)-1) > 0 ? _cffi_to_c_u64(o) \ - : _cffi_to_c_i64(o)) : \ + (sizeof(type) == 1 ? (((type)-1) > 0 ? (type)_cffi_to_c_u8(o) \ + : (type)_cffi_to_c_i8(o)) : \ + sizeof(type) == 2 ? (((type)-1) > 0 ? (type)_cffi_to_c_u16(o) \ + : (type)_cffi_to_c_i16(o)) : \ + sizeof(type) == 4 ? (((type)-1) > 0 ? (type)_cffi_to_c_u32(o) \ + : (type)_cffi_to_c_i32(o)) : \ + sizeof(type) == 8 ? (((type)-1) > 0 ? (type)_cffi_to_c_u64(o) \ + : (type)_cffi_to_c_i64(o)) : \ (Py_FatalError("unsupported size for type " #type), 0)) #define _cffi_to_c_i8 \ @@ -885,25 +917,32 @@ return PyBool_FromLong(was_alive); } -static void _cffi_init(void) +static int _cffi_init(void) { - PyObject *module = PyImport_ImportModule("_cffi_backend"); - PyObject *c_api_object; + PyObject *module, *c_api_object = NULL; + module = PyImport_ImportModule("_cffi_backend"); if (module == NULL) - return; + goto failure; c_api_object = PyObject_GetAttrString(module, "_C_API"); if (c_api_object == NULL) - return; + goto failure; if (!PyCapsule_CheckExact(c_api_object)) { - Py_DECREF(c_api_object); PyErr_SetNone(PyExc_ImportError); - return; + goto failure; } memcpy(_cffi_exports, PyCapsule_GetPointer(c_api_object, "cffi"), _CFFI_NUM_EXPORTS * sizeof(void *)); + + Py_DECREF(module); Py_DECREF(c_api_object); + return 0; + + failure: + Py_XDECREF(module); + Py_XDECREF(c_api_object); + return -1; } #define _cffi_type(num) ((CTypeDescrObject *)PyList_GET_ITEM(_cffi_types, num)) diff --git a/lib_pypy/cffi/vengine_gen.py b/lib_pypy/cffi/vengine_gen.py --- a/lib_pypy/cffi/vengine_gen.py +++ b/lib_pypy/cffi/vengine_gen.py @@ -249,10 +249,10 @@ prnt(' /* %s */' % str(e)) # cannot verify it, ignore prnt('}') self.export_symbols.append(layoutfuncname) - prnt('ssize_t %s(ssize_t i)' % (layoutfuncname,)) + prnt('intptr_t %s(intptr_t i)' % (layoutfuncname,)) prnt('{') prnt(' struct _cffi_aligncheck { char x; %s y; };' % cname) - prnt(' static ssize_t nums[] = {') + prnt(' static intptr_t nums[] = {') prnt(' sizeof(%s),' % cname) prnt(' offsetof(struct _cffi_aligncheck, y),') for fname, ftype, fbitsize in tp.enumfields(): @@ -276,7 +276,7 @@ return # nothing to do with opaque structs layoutfuncname = '_cffi_layout_%s_%s' % (prefix, name) # - BFunc = self.ffi._typeof_locked("ssize_t(*)(ssize_t)")[0] + BFunc = self.ffi._typeof_locked("intptr_t(*)(intptr_t)")[0] function = module.load_function(BFunc, layoutfuncname) layout = [] num = 0 @@ -410,13 +410,18 @@ # ---------- # enums + def _enum_funcname(self, prefix, name): + # "$enum_$1" => "___D_enum____D_1" + name = name.replace('$', '___D_') + return '_cffi_e_%s_%s' % (prefix, name) + def _generate_gen_enum_decl(self, tp, name, prefix='enum'): if tp.partial: for enumerator in tp.enumerators: self._generate_gen_const(True, enumerator) return # - funcname = '_cffi_e_%s_%s' % (prefix, name) + funcname = self._enum_funcname(prefix, name) self.export_symbols.append(funcname) prnt = self._prnt prnt('int %s(char *out_error)' % funcname) @@ -453,7 +458,7 @@ else: BType = self.ffi._typeof_locked("char[]")[0] BFunc = self.ffi._typeof_locked("int(*)(char*)")[0] - funcname = '_cffi_e_%s_%s' % (prefix, name) + funcname = self._enum_funcname(prefix, name) function = module.load_function(BFunc, funcname) p = self.ffi.new(BType, 256) if function(p) < 0: @@ -547,20 +552,29 @@ #include #include /* XXX for ssize_t on some platforms */ -#ifdef _WIN32 -# include -# define snprintf _snprintf -typedef __int8 int8_t; -typedef __int16 int16_t; -typedef __int32 int32_t; -typedef __int64 int64_t; -typedef unsigned __int8 uint8_t; -typedef unsigned __int16 uint16_t; -typedef unsigned __int32 uint32_t; -typedef unsigned __int64 uint64_t; -typedef SSIZE_T ssize_t; -typedef unsigned char _Bool; +/* this block of #ifs should be kept exactly identical between + c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */ +#if defined(_MSC_VER) +# include /* for alloca() */ +# if _MSC_VER < 1600 /* MSVC < 2010 */ + typedef __int8 int8_t; + typedef __int16 int16_t; + typedef __int32 int32_t; + typedef __int64 int64_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; +# else +# include +# endif +# if _MSC_VER < 1800 /* MSVC < 2013 */ + typedef unsigned char _Bool; +# endif #else -# include +# include +# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) +# include +# endif #endif ''' diff --git a/pypy/module/test_lib_pypy/cffi_tests/backend_tests.py b/pypy/module/test_lib_pypy/cffi_tests/backend_tests.py --- a/pypy/module/test_lib_pypy/cffi_tests/backend_tests.py +++ b/pypy/module/test_lib_pypy/cffi_tests/backend_tests.py @@ -866,25 +866,25 @@ def test_enum(self): ffi = FFI(backend=self.Backend()) - ffi.cdef("enum foo { A, B, CC, D };") - assert ffi.string(ffi.cast("enum foo", 0)) == "A" - assert ffi.string(ffi.cast("enum foo", 2)) == "CC" - assert ffi.string(ffi.cast("enum foo", 3)) == "D" + ffi.cdef("enum foo { A0, B0, CC0, D0 };") + assert ffi.string(ffi.cast("enum foo", 0)) == "A0" + assert ffi.string(ffi.cast("enum foo", 2)) == "CC0" + assert ffi.string(ffi.cast("enum foo", 3)) == "D0" assert ffi.string(ffi.cast("enum foo", 4)) == "4" - ffi.cdef("enum bar { A, B=-2, CC, D, E };") - assert ffi.string(ffi.cast("enum bar", 0)) == "A" - assert ffi.string(ffi.cast("enum bar", -2)) == "B" - assert ffi.string(ffi.cast("enum bar", -1)) == "CC" - assert ffi.string(ffi.cast("enum bar", 1)) == "E" + ffi.cdef("enum bar { A1, B1=-2, CC1, D1, E1 };") + assert ffi.string(ffi.cast("enum bar", 0)) == "A1" + assert ffi.string(ffi.cast("enum bar", -2)) == "B1" + assert ffi.string(ffi.cast("enum bar", -1)) == "CC1" + assert ffi.string(ffi.cast("enum bar", 1)) == "E1" assert ffi.cast("enum bar", -2) != ffi.cast("enum bar", -2) assert ffi.cast("enum foo", 0) != ffi.cast("enum bar", 0) assert ffi.cast("enum bar", 0) != ffi.cast("int", 0) - assert repr(ffi.cast("enum bar", -1)) == "" + assert repr(ffi.cast("enum bar", -1)) == "" assert repr(ffi.cast("enum foo", -1)) == ( # enums are unsigned, if "") # they contain no neg value - ffi.cdef("enum baz { A=0x1000, B=0x2000 };") - assert ffi.string(ffi.cast("enum baz", 0x1000)) == "A" - assert ffi.string(ffi.cast("enum baz", 0x2000)) == "B" + ffi.cdef("enum baz { A2=0x1000, B2=0x2000 };") + assert ffi.string(ffi.cast("enum baz", 0x1000)) == "A2" + assert ffi.string(ffi.cast("enum baz", 0x2000)) == "B2" def test_enum_in_struct(self): ffi = FFI(backend=self.Backend()) @@ -1323,6 +1323,16 @@ e = ffi.cast("enum e", 0) assert ffi.string(e) == "AA" # pick the first one arbitrarily + def test_enum_refer_previous_enum_value(self): + ffi = FFI(backend=self.Backend()) + ffi.cdef("enum e { AA, BB=2, CC=4, DD=BB, EE, FF=CC, GG=FF };") + assert ffi.string(ffi.cast("enum e", 2)) == "BB" + assert ffi.string(ffi.cast("enum e", 3)) == "EE" + assert ffi.sizeof("char[DD]") == 2 + assert ffi.sizeof("char[EE]") == 3 + assert ffi.sizeof("char[FF]") == 4 + assert ffi.sizeof("char[GG]") == 4 + def test_nested_anonymous_struct(self): ffi = FFI(backend=self.Backend()) ffi.cdef(""" @@ -1544,6 +1554,7 @@ ffi2.include(ffi1) p = ffi2.cast("enum foo", 1) assert ffi2.string(p) == "FB" + assert ffi2.sizeof("char[FC]") == 2 def test_include_typedef_2(self): backend = self.Backend() @@ -1564,10 +1575,32 @@ assert ffi.alignof("struct is_packed") == 1 s = ffi.new("struct is_packed[2]") s[0].b = 42623381 - s[0].a = 'X' + s[0].a = b'X' s[1].b = -4892220 - s[1].a = 'Y' + s[1].a = b'Y' assert s[0].b == 42623381 - assert s[0].a == 'X' + assert s[0].a == b'X' assert s[1].b == -4892220 - assert s[1].a == 'Y' + assert s[1].a == b'Y' + + def test_define_integer_constant(self): + ffi = FFI(backend=self.Backend()) + ffi.cdef(""" + #define DOT_0 0 + #define DOT 100 + #define DOT_OCT 0100l + #define DOT_HEX 0x100u + #define DOT_HEX2 0X10 + #define DOT_UL 1000UL + enum foo {AA, BB=DOT, CC}; + """) + lib = ffi.dlopen(None) + assert ffi.string(ffi.cast("enum foo", 100)) == "BB" + assert lib.DOT_0 == 0 + assert lib.DOT == 100 + assert lib.DOT_OCT == 0o100 + assert lib.DOT_HEX == 0x100 + assert lib.DOT_HEX2 == 0x10 + assert lib.DOT_UL == 1000 + + diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_function.py b/pypy/module/test_lib_pypy/cffi_tests/test_function.py --- a/pypy/module/test_lib_pypy/cffi_tests/test_function.py +++ b/pypy/module/test_lib_pypy/cffi_tests/test_function.py @@ -36,13 +36,11 @@ return self._value lib_m = 'm' -has_sinf = True if sys.platform == 'win32': #there is a small chance this fails on Mingw via environ $CC import distutils.ccompiler if distutils.ccompiler.get_default_compiler() == 'msvc': lib_m = 'msvcrt' - has_sinf = False class TestFunction(object): Backend = CTypesBackend @@ -57,8 +55,8 @@ assert x == math.sin(1.23) def test_sinf(self): - if not has_sinf: - py.test.skip("sinf not available") + if sys.platform == 'win32': + py.test.skip("no sinf found in the Windows stdlib") ffi = FFI(backend=self.Backend()) ffi.cdef(""" float sinf(float x); diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_parsing.py b/pypy/module/test_lib_pypy/cffi_tests/test_parsing.py --- a/pypy/module/test_lib_pypy/cffi_tests/test_parsing.py +++ b/pypy/module/test_lib_pypy/cffi_tests/test_parsing.py @@ -162,9 +162,10 @@ def test_define_not_supported_for_now(): ffi = FFI(backend=FakeBackend()) - e = py.test.raises(CDefError, ffi.cdef, "#define FOO 42") - assert str(e.value) == \ - 'only supports the syntax "#define FOO ..." for now (literally)' + e = py.test.raises(CDefError, ffi.cdef, '#define FOO "blah"') + assert str(e.value) == ( + 'only supports the syntax "#define FOO ..." (literally)' + ' or "#define FOO 0x1FF" for now') def test_unnamed_struct(): ffi = FFI(backend=FakeBackend()) diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_verify.py b/pypy/module/test_lib_pypy/cffi_tests/test_verify.py --- a/pypy/module/test_lib_pypy/cffi_tests/test_verify.py +++ b/pypy/module/test_lib_pypy/cffi_tests/test_verify.py @@ -1,5 +1,5 @@ # Generated by pypy/tool/import_cffi.py -import py +import py, re import sys, os, math, weakref from cffi import FFI, VerificationError, VerificationMissing, model from pypy.module.test_lib_pypy.cffi_tests.support import * @@ -30,6 +30,24 @@ def setup_module(): import cffi.verifier cffi.verifier.cleanup_tmpdir() + # + # check that no $ sign is produced in the C file; it used to be the + # case that anonymous enums would produce '$enum_$1', which was + # used as part of a function name. GCC accepts such names, but it's + # apparently non-standard. + _r_comment = re.compile(r"/\*.*?\*/|//.*?$", re.DOTALL | re.MULTILINE) + _r_string = re.compile(r'\".*?\"') + def _write_source_and_check(self, file=None): + base_write_source(self, file) + if file is None: + f = open(self.sourcefilename) + data = f.read() + f.close() + data = _r_comment.sub(' ', data) + data = _r_string.sub('"skipped"', data) + assert '$' not in data + base_write_source = cffi.verifier.Verifier._write_source + cffi.verifier.Verifier._write_source = _write_source_and_check def test_module_type(): @@ -154,6 +172,9 @@ all_primitive_types = model.PrimitiveType.ALL_PRIMITIVE_TYPES +if sys.platform == 'win32': + all_primitive_types = all_primitive_types.copy() + del all_primitive_types['ssize_t'] all_integer_types = sorted(tp for tp in all_primitive_types if all_primitive_types[tp] == 'i') all_float_types = sorted(tp for tp in all_primitive_types @@ -1453,8 +1474,8 @@ assert func() == 42 def test_FILE_stored_in_stdout(): - if sys.platform == 'win32': - py.test.skip("MSVC: cannot assign to stdout") + if not sys.platform.startswith('linux'): + py.test.skip("likely, we cannot assign to stdout") ffi = FFI() ffi.cdef("int printf(const char *, ...); FILE *setstdout(FILE *);") lib = ffi.verify(""" @@ -1637,8 +1658,8 @@ ffi = FFI() ffi.cdef(""" int (*python_callback)(int how_many, int *values); - void *const c_callback; /* pass this ptr to C routines */ - int some_c_function(void *cb); + int (*const c_callback)(int,...); /* pass this ptr to C routines */ + int some_c_function(int(*cb)(int,...)); """) lib = ffi.verify(""" #include diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_version.py b/pypy/module/test_lib_pypy/cffi_tests/test_version.py --- a/pypy/module/test_lib_pypy/cffi_tests/test_version.py +++ b/pypy/module/test_lib_pypy/cffi_tests/test_version.py @@ -11,7 +11,6 @@ '0.7.1': '0.7', # did not change '0.7.2': '0.7', # did not change '0.8.1': '0.8', # did not change (essentially) - '0.8.2': '0.8', # did not change } def test_version(): @@ -26,7 +25,7 @@ content = open(p).read() # v = cffi.__version__ - assert ("version = '%s'\n" % BACKEND_VERSIONS.get(v, v)) in content + assert ("version = '%s'\n" % v[:3]) in content assert ("release = '%s'\n" % v) in content def test_doc_version_file(): From noreply at buildbot.pypy.org Tue Jun 24 18:04:39 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 18:04:39 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: More fixes Message-ID: <20140624160439.E4D981C05B7@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72197:bbab1875b9e5 Date: 2014-06-24 18:04 +0200 http://bitbucket.org/pypy/pypy/changeset/bbab1875b9e5/ Log: More fixes diff --git a/pypy/module/thread/gil.py b/pypy/module/thread/gil.py --- a/pypy/module/thread/gil.py +++ b/pypy/module/thread/gil.py @@ -7,7 +7,7 @@ # all but one will be blocked. The other threads get a chance to run # from time to time, using the periodic action GILReleaseAction. -from rpython.rlib import rthread +from rpython.rlib import rthread, rgil from pypy.module.thread.error import wrap_thread_error from pypy.interpreter.executioncontext import PeriodicAsyncAction from pypy.module.thread.threadlocals import OSThreadLocals @@ -25,8 +25,7 @@ use_bytecode_counter=True) def _initialize_gil(self, space): - if not rthread.gil_allocate(): - raise wrap_thread_error(space, "can't allocate GIL") + rgil.gil_allocate() def setup_threads(self, space): """Enable threads in the object space, if they haven't already been.""" @@ -72,14 +71,14 @@ # this function must not raise, in such a way that the exception # transformer knows that it cannot raise! e = get_errno() - rthread.gil_release() + rgil.gil_release() set_errno(e) before_external_call._gctransformer_hint_cannot_collect_ = True before_external_call._dont_reach_me_in_del_ = True def after_external_call(): e = get_errno() - rthread.gil_acquire() + rgil.gil_acquire() rthread.gc_thread_run() after_thread_switch() set_errno(e) @@ -97,7 +96,7 @@ # explicitly release the gil, in a way that tries to give more # priority to other threads (as opposed to continuing to run in # the same thread). - if rthread.gil_yield_thread(): + if rgil.gil_yield_thread(): rthread.gc_thread_run() after_thread_switch() do_yield_thread._gctransformer_hint_close_stack_ = True diff --git a/rpython/rlib/rgil.py b/rpython/rlib/rgil.py --- a/rpython/rlib/rgil.py +++ b/rpython/rlib/rgil.py @@ -11,13 +11,17 @@ includes = ['src/thread.h'], separate_module_files = [translator_c_dir / 'src' / 'thread.c'], include_dirs = [translator_c_dir], - export_symbols = ['RPyGilYieldThread', 'RPyGilRelease', + export_symbols = ['RPyGilAllocate', 'RPyGilYieldThread', 'RPyGilRelease', 'RPyGilAcquire', 'RPyFetchFastGil']) llexternal = rffi.llexternal -gil_yield_thread = llexternal('RPyGilYieldThread', [], lltype.Void, +gil_allocate = llexternal('RPyGilAllocate', [], lltype.Void, + _nowrapper=True, sandboxsafe=True, + compilation_info=eci) + +gil_yield_thread = llexternal('RPyGilYieldThread', [], lltype.Signed, _nowrapper=True, sandboxsafe=True, compilation_info=eci) diff --git a/rpython/rlib/test/test_rthread.py b/rpython/rlib/test/test_rthread.py --- a/rpython/rlib/test/test_rthread.py +++ b/rpython/rlib/test/test_rthread.py @@ -82,6 +82,7 @@ def test_gc_locking(self): import time + from rpython.rlib.objectmodel import invoke_around_extcall from rpython.rlib.debug import ll_assert class State: diff --git a/rpython/translator/c/src/thread.h b/rpython/translator/c/src/thread.h --- a/rpython/translator/c/src/thread.h +++ b/rpython/translator/c/src/thread.h @@ -24,8 +24,8 @@ #endif /* !_WIN32 */ - -void RPyGilYieldThread(void); +void RPyGilAllocate(void); +long RPyGilYieldThread(void); void RPyGilAcquire(void); #ifdef PYPY_USE_ASMGCC diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c --- a/rpython/translator/c/src/thread_pthread.c +++ b/rpython/translator/c/src/thread_pthread.c @@ -522,19 +522,16 @@ long rpy_fastgil = 1; static pthread_mutex_t mutex_gil_stealer; static pthread_mutex_t mutex_gil; -static pthread_once_t mutex_gil_once = PTHREAD_ONCE_INIT; +long rpy_lock_ready = 0; -static void init_mutex_gil(void) +void RPyGilAllocate(void) { + assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); ASSERT_STATUS(pthread_mutex_init(&mutex_gil_stealer, pthread_mutexattr_default)); ASSERT_STATUS(pthread_mutex_init(&mutex_gil, pthread_mutexattr_default)); ASSERT_STATUS(pthread_mutex_lock(&mutex_gil)); -} - -static inline void prepare_mutexes(void) -{ - pthread_once(&mutex_gil_once, &init_mutex_gil); + rpy_lock_ready = 1; } static inline void timespec_add(struct timespec *t, long incr) @@ -567,7 +564,7 @@ first-in-first-out order, this will nicely give the threads a round-robin chance. */ - prepare_mutexes(); + assert(rpy_lock_ready); ASSERT_STATUS(pthread_mutex_lock(&mutex_gil_stealer)); /* We are now the stealer thread. Steals! */ @@ -638,13 +635,14 @@ } */ -void RPyGilYieldThread(void) +long RPyGilYieldThread(void) { assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); + if (!rpy_lock_ready) + return 0; /* Explicitly release the 'mutex_gil'. */ - prepare_mutexes(); ASSERT_STATUS(pthread_mutex_unlock(&mutex_gil)); /* Now nobody has got the GIL, because 'mutex_gil' is released (but @@ -654,4 +652,5 @@ its pthread_mutex_lock() and pthread_mutex_timedlock() now. */ RPyGilAcquire(); + return 1; } From noreply at buildbot.pypy.org Tue Jun 24 19:09:36 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 19:09:36 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: Move the GIL logic to a new thread_gil.c. Use simple names for the Message-ID: <20140624170936.2FA931C0CA6@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72198:50a199f1b79d Date: 2014-06-24 19:09 +0200 http://bitbucket.org/pypy/pypy/changeset/50a199f1b79d/ Log: Move the GIL logic to a new thread_gil.c. Use simple names for the basic operations and define these in thread_pthread and thread_nt as appropriate. diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c new file mode 100644 --- /dev/null +++ b/rpython/translator/c/src/thread_gil.c @@ -0,0 +1,160 @@ + +/* Idea: + + - "The GIL" is a composite concept. There are two locks, and "the + GIL is locked" when both are locked. + + - The first lock is a simple global variable 'rpy_fastgil'. With + shadowstack, we use the most portable definition: 0 means unlocked + and != 0 means locked. With asmgcc, 0 means unlocked but only 1 + means locked. A different value means unlocked too, but the value + is used by the JIT to contain the stack top for stack root scanning. + + - The second lock is a regular mutex. In the fast path, it is never + unlocked. Remember that "the GIL is unlocked" means that either + the first or the second lock is unlocked. It should never be the + case that both are unlocked at the same time. + + - Let's call "thread 1" the thread with the GIL. Whenever it does an + external function call, it sets 'rpy_fastgil' to 0 (unlocked). + This is the cheapest way to release the GIL. When it returns from + the function call, this thread attempts to atomically change + 'rpy_fastgil' to 1. In the common case where it works, thread 1 + has got the GIL back and so continues to run. + + - Say "thread 2" is eagerly waiting for thread 1 to become blocked in + some long-running call. Regularly, it checks if 'rpy_fastgil' is 0 + and tries to atomically change it to 1. If it succeeds, it means + that the GIL was not previously locked. Thread 2 has now got the GIL. + + - If there are more than 2 threads, the rest is really sleeping by + waiting on the 'mutex_gil_stealer' held by thread 2. + + - An additional mechanism is used for when thread 1 wants to + explicitly yield the GIL to thread 2: it does so by releasing + 'mutex_gil' (which is otherwise not released) but keeping the + value of 'rpy_fastgil' to 1. +*/ + +long rpy_fastgil = 1; +long rpy_waiting_threads = -1; +static mutex_t mutex_gil_stealer; +static mutex_t mutex_gil; + +void RPyGilAllocate(void) +{ + assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); + mutex_init(&mutex_gil_stealer); + mutex_init(&mutex_gil); + mutex_lock(&mutex_gil); + rpy_waiting_threads = 0; +} + +void RPyGilAcquire(void) +{ + /* Acquires the GIL. Note: this function saves and restores 'errno'. + */ + long old_fastgil = lock_test_and_set(&rpy_fastgil, 1); + + if (!RPY_FASTGIL_LOCKED(old_fastgil)) { + /* The fastgil was not previously locked: success. + 'mutex_gil' should still be locked at this point. + */ + } + else { + /* Otherwise, another thread is busy with the GIL. */ + int old_errno = errno; + + /* Register me as one of the threads that is actively waiting + for the GIL. The number of such threads is found in + rpy_lock_ready. */ + assert(rpy_waiting_threads >= 0); + atomic_increment(&rpy_waiting_threads); + + /* Enter the waiting queue from the end. Assuming a roughly + first-in-first-out order, this will nicely give the threads + a round-robin chance. + */ + mutex_lock(&mutex_gil_stealer); + + /* We are now the stealer thread. Steals! */ + while (1) { + /* Sleep for one interval of time. We may be woken up earlier + if 'mutex_gil' is released. + */ + if (mutex_lock_timeout(&mutex_gil, 0.001)) { /* 1 ms... */ + /* We arrive here if 'mutex_gil' was recently released + and we just relocked it. + */ + old_fastgil = 0; + break; + } + + /* Busy-looping here. Try to look again if 'rpy_fastgil' is + released. + */ + if (!RPY_FASTGIL_LOCKED(rpy_fastgil)) { + old_fastgil = lock_test_and_set(&rpy_fastgil, 1); + if (!RPY_FASTGIL_LOCKED(old_fastgil)) + /* yes, got a non-held value! Now we hold it. */ + break; + } + /* Otherwise, loop back. */ + } + atomic_decrement(&rpy_waiting_threads); + mutex_unlock(&mutex_gil_stealer); + + errno = old_errno; + } + +#ifdef PYPY_USE_ASMGCC + if (old_fastgil != 0) { + /* this case only occurs from the JIT compiler */ + struct pypy_ASM_FRAMEDATA_HEAD0 *new = + (struct pypy_ASM_FRAMEDATA_HEAD0 *)old_fastgil; + struct pypy_ASM_FRAMEDATA_HEAD0 *root = &pypy_g_ASM_FRAMEDATA_HEAD; + struct pypy_ASM_FRAMEDATA_HEAD0 *next = root->as_next; + new->as_next = next; + new->as_prev = root; + root->as_next = new; + next->as_prev = new; + } +#else + assert(old_fastgil == 0); +#endif + assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); + return; +} + +/* +void RPyGilRelease(void) +{ + Releases the GIL in order to do an external function call. + We assume that the common case is that the function call is + actually very short, and optimize accordingly. + + Note: this function is defined as a 'static inline' in thread.h. +} +*/ + +long RPyGilYieldThread(void) +{ + /* can be called even before RPyGilAllocate(), but in this case, + 'rpy_waiting_threads' will be -1. */ + assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); + if (rpy_waiting_threads <= 0) + return 0; + + /* Explicitly release the 'mutex_gil'. + */ + mutex_unlock(&mutex_gil); + + /* Now nobody has got the GIL, because 'mutex_gil' is released (but + rpy_fastgil is still locked). Call RPyGilAcquire(). It will + enqueue ourselves at the end of the 'mutex_gil_stealer' queue. + If there is no other waiting thread, it will fall through both + its pthread_mutex_lock() and pthread_mutex_timedlock() now. + */ + RPyGilAcquire(); + return 1; +} diff --git a/rpython/translator/c/src/thread_nt.c b/rpython/translator/c/src/thread_nt.c --- a/rpython/translator/c/src/thread_nt.c +++ b/rpython/translator/c/src/thread_nt.c @@ -196,52 +196,35 @@ /* GIL code */ /************************************************************/ -static volatile LONG pending_acquires = -1; -static CRITICAL_SECTION mutex_gil; -static HANDLE cond_gil; +typedef HANDLE mutex_t; -long RPyGilAllocate(void) -{ - pending_acquires = 0; - InitializeCriticalSection(&mutex_gil); - EnterCriticalSection(&mutex_gil); - cond_gil = CreateEvent (NULL, FALSE, FALSE, NULL); - return 1; +static void gil_fatal(const char *msg) { + fprintf(stderr, "Fatal error in the GIL: %s\n", msg); + abort(); } -long RPyGilYieldThread(void) -{ - /* can be called even before RPyGilAllocate(), but in this case, - pending_acquires will be -1 */ - if (pending_acquires <= 0) - return 0; - InterlockedIncrement(&pending_acquires); - PulseEvent(cond_gil); - - /* hack: the three following lines do a pthread_cond_wait(), and - normally specifying a timeout of INFINITE would be fine. But the - first and second operations are not done atomically, so there is a - (small) risk that PulseEvent misses the WaitForSingleObject(). - In this case the process will just sleep a few milliseconds. */ - LeaveCriticalSection(&mutex_gil); - WaitForSingleObject(cond_gil, 15); - EnterCriticalSection(&mutex_gil); - - InterlockedDecrement(&pending_acquires); - return 1; +static inline void mutex_init(mutex_t *mutex) { + *mutex = CreateMutex(NULL, 0, NULL); + if (*mutex == NULL) + gil_fatal("CreateMutex failed"); } -void RPyGilRelease(void) -{ - LeaveCriticalSection(&mutex_gil); - PulseEvent(cond_gil); +static inline void mutex_lock(mutex_t *mutex) { + WaitForSingleObject(*mutex, INFINITE); } -void RPyGilAcquire(void) -{ - InterlockedIncrement(&pending_acquires); - EnterCriticalSection(&mutex_gil); - InterlockedDecrement(&pending_acquires); +static inline void mutex_unlock(mutex_t *mutex) { + ReleaseMutex(*mutex); } -# error "XXX implement me" +static inline int mutex_lock_timeout(mutex_t *mutex, double delay) +{ + DWORD result = WaitForSingleObject(*mutex, (DWORD)(delay * 1000.0 + 0.9)); + return (result != WAIT_TIMEOUT); +} + +#define lock_test_and_set(ptr, value) InterlockedExchangeAcquire(ptr, value) +#define atomic_increment(ptr) InterlockedIncrement(ptr) +#define atomic_decrement(ptr) InterlockedDecrement(ptr) + +#include "src/thread_gil.c" diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c --- a/rpython/translator/c/src/thread_pthread.c +++ b/rpython/translator/c/src/thread_pthread.c @@ -472,71 +472,18 @@ /* GIL code */ /************************************************************/ - #include - #define ASSERT_STATUS(call) \ if (call != 0) { \ fprintf(stderr, "Fatal error: " #call "\n"); \ abort(); \ } -/* Idea: - - - "The GIL" is a composite concept. There are two locks, and "the - GIL is locked" when both are locked. - - - The first lock is a simple global variable 'rpy_fastgil'. With - shadowstack, we use the most portable definition: 0 means unlocked - and != 0 means locked. With asmgcc, 0 means unlocked but only 1 - means locked. A different value means unlocked too, but the value - is used by the JIT to contain the stack top for stack root scanning. - - - The second lock is a regular mutex. In the fast path, it is never - unlocked. Remember that "the GIL is unlocked" means that either - the first or the second lock is unlocked. It should never be the - case that both are unlocked at the same time. - - - Let's call "thread 1" the thread with the GIL. Whenever it does an - external function call, it sets 'rpy_fastgil' to 0 (unlocked). - This is the cheapest way to release the GIL. When it returns from - the function call, this thread attempts to atomically change - 'rpy_fastgil' to 1. In the common case where it works, thread 1 - has got the GIL back and so continues to run. - - - Say "thread 2" is eagerly waiting for thread 1 to become blocked in - some long-running call. Regularly, it checks if 'rpy_fastgil' is 0 - and tries to atomically change it to 1. If it succeeds, it means - that the GIL was not previously locked. Thread 2 has now got the GIL. - - - If there are more than 2 threads, the rest is really sleeping by - waiting on the 'mutex_gil_stealer' held by thread 2. - - - An additional mechanism is used for when thread 1 wants to - explicitly yield the GIL to thread 2: it does so by releasing - 'mutex_gil' (which is otherwise not released) but keeping the - value of 'rpy_fastgil' to 1. -*/ - -long rpy_fastgil = 1; -static pthread_mutex_t mutex_gil_stealer; -static pthread_mutex_t mutex_gil; -long rpy_lock_ready = 0; - -void RPyGilAllocate(void) +static inline void timespec_add(struct timespec *t, double incr) { - assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); - ASSERT_STATUS(pthread_mutex_init(&mutex_gil_stealer, - pthread_mutexattr_default)); - ASSERT_STATUS(pthread_mutex_init(&mutex_gil, pthread_mutexattr_default)); - ASSERT_STATUS(pthread_mutex_lock(&mutex_gil)); - rpy_lock_ready = 1; -} - -static inline void timespec_add(struct timespec *t, long incr) -{ - long nsec = t->tv_nsec + incr; + /* assumes that "incr" is not too large, less than 1 second */ + long nsec = t->tv_nsec + (long)(incr * 1000000000.0); if (nsec >= 1000000000) { t->tv_sec += 1; nsec -= 1000000000; @@ -545,112 +492,29 @@ t->tv_nsec = nsec; } -void RPyGilAcquire(void) -{ - /* Acquires the GIL. Note: this function saves and restores 'errno'. - */ - long old_fastgil = __sync_lock_test_and_set(&rpy_fastgil, 1); +typedef pthread_mutex_t mutex_t; - if (!RPY_FASTGIL_LOCKED(old_fastgil)) { - /* The fastgil was not previously locked: success. - 'mutex_gil' should still be locked at this point. - */ - } - else { - /* Otherwise, another thread is busy with the GIL. */ - int old_errno = errno; - - /* Enter the waiting queue from the end. Assuming a roughly - first-in-first-out order, this will nicely give the threads - a round-robin chance. - */ - assert(rpy_lock_ready); - ASSERT_STATUS(pthread_mutex_lock(&mutex_gil_stealer)); - - /* We are now the stealer thread. Steals! */ - while (1) { - int delay = 1000000; /* 1 ms... */ - struct timespec t; - - /* Sleep for one interval of time. We may be woken up earlier - if 'mutex_gil' is released. - */ - clock_gettime(CLOCK_REALTIME, &t); - timespec_add(&t, delay); - int error_from_timedlock = pthread_mutex_timedlock(&mutex_gil, &t); - - if (error_from_timedlock != ETIMEDOUT) { - ASSERT_STATUS(error_from_timedlock); - - /* We arrive here if 'mutex_gil' was recently released - and we just relocked it. - */ - old_fastgil = 0; - break; - } - - /* Busy-looping here. Try to look again if 'rpy_fastgil' is - released. - */ - if (!RPY_FASTGIL_LOCKED(rpy_fastgil)) { - old_fastgil = __sync_lock_test_and_set(&rpy_fastgil, 1); - if (!RPY_FASTGIL_LOCKED(old_fastgil)) - /* yes, got a non-held value! Now we hold it. */ - break; - } - /* Otherwise, loop back. */ - } - ASSERT_STATUS(pthread_mutex_unlock(&mutex_gil_stealer)); - - errno = old_errno; - } - -#ifdef PYPY_USE_ASMGCC - if (old_fastgil != 0) { - /* this case only occurs from the JIT compiler */ - struct pypy_ASM_FRAMEDATA_HEAD0 *new = - (struct pypy_ASM_FRAMEDATA_HEAD0 *)old_fastgil; - struct pypy_ASM_FRAMEDATA_HEAD0 *root = &pypy_g_ASM_FRAMEDATA_HEAD; - struct pypy_ASM_FRAMEDATA_HEAD0 *next = root->as_next; - new->as_next = next; - new->as_prev = root; - root->as_next = new; - next->as_prev = new; - } -#else - assert(old_fastgil == 0); -#endif - assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); - return; +static inline void mutex_init(mutex_t *mutex) { + ASSERT_STATUS(pthread_mutex_init(mutex, pthread_mutexattr_default)); } - -/* -void RPyGilRelease(void) -{ - Releases the GIL in order to do an external function call. - We assume that the common case is that the function call is - actually very short, and optimize accordingly. - - Note: this function is defined as a 'static inline' in thread.h. +static inline void mutex_lock(mutex_t *mutex) { + ASSERT_STATUS(pthread_mutex_lock(mutex)); } -*/ - -long RPyGilYieldThread(void) -{ - assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); - if (!rpy_lock_ready) +static inline void mutex_unlock(mutex_t *mutex) { + ASSERT_STATUS(pthread_mutex_unlock(mutex)); +} +static inline int mutex_lock_timeout(mutex_t *mutex, double delay) { + struct timespec t; + clock_gettime(CLOCK_REALTIME, &t); + timespec_add(&t, delay); + int error_from_timedlock = pthread_mutex_timedlock(mutex, &t); + if (error_from_timedlock == ETIMEDOUT) return 0; - - /* Explicitly release the 'mutex_gil'. - */ - ASSERT_STATUS(pthread_mutex_unlock(&mutex_gil)); - - /* Now nobody has got the GIL, because 'mutex_gil' is released (but - rpy_fastgil is still locked). Call RPyGilAcquire(). It will - enqueue ourselves at the end of the 'mutex_gil_stealer' queue. - If there is no other waiting thread, it will fall through both - its pthread_mutex_lock() and pthread_mutex_timedlock() now. - */ - RPyGilAcquire(); + ASSERT_STATUS(error_from_timedlock); return 1; } +#define lock_test_and_set(ptr, value) __sync_lock_test_and_set(ptr, value) +#define atomic_increment(ptr) __sync_fetch_and_add(ptr, 1) +#define atomic_decrement(ptr) __sync_fetch_and_sub(ptr, 1) + +#include "src/thread_gil.c" From noreply at buildbot.pypy.org Tue Jun 24 19:39:05 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 19:39:05 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: Mostly just update comments Message-ID: <20140624173905.821F21C023B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72199:8a1061fca743 Date: 2014-06-24 19:36 +0200 http://bitbucket.org/pypy/pypy/changeset/8a1061fca743/ Log: Mostly just update comments diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c --- a/rpython/translator/c/src/thread_gil.c +++ b/rpython/translator/c/src/thread_gil.c @@ -37,7 +37,7 @@ */ long rpy_fastgil = 1; -long rpy_waiting_threads = -1; +long rpy_waiting_threads = -42; /* GIL not initialized */ static mutex_t mutex_gil_stealer; static mutex_t mutex_gil; @@ -67,7 +67,7 @@ /* Register me as one of the threads that is actively waiting for the GIL. The number of such threads is found in - rpy_lock_ready. */ + rpy_waiting_threads. */ assert(rpy_waiting_threads >= 0); atomic_increment(&rpy_waiting_threads); @@ -140,7 +140,7 @@ long RPyGilYieldThread(void) { /* can be called even before RPyGilAllocate(), but in this case, - 'rpy_waiting_threads' will be -1. */ + 'rpy_waiting_threads' will be -42. */ assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); if (rpy_waiting_threads <= 0) return 0; From noreply at buildbot.pypy.org Tue Jun 24 19:39:06 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 19:39:06 +0200 (CEST) Subject: [pypy-commit] pypy default: Kill old code lying around Message-ID: <20140624173906.B46A71C023B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72200:375133966c12 Date: 2014-06-24 19:37 +0200 http://bitbucket.org/pypy/pypy/changeset/375133966c12/ Log: Kill old code lying around diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -1934,42 +1934,6 @@ self._genop_call(op, arglocs, result_loc, is_call_release_gil=True) self._emit_guard_not_forced(guard_token) - def call_reacquire_gil(self, gcrootmap, save_loc): - # save the previous result (eax/xmm0) into the stack temporarily. - # XXX like with call_release_gil(), we assume that we don't need - # to save xmm0 in this case. - if isinstance(save_loc, RegLoc) and not save_loc.is_xmm: - self.mc.MOV_sr(WORD, save_loc.value) - # call the reopenstack() function (also reacquiring the GIL) - if gcrootmap.is_shadow_stack: - args = [] - css = 0 - else: - from rpython.memory.gctransform import asmgcroot - css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS) - if IS_X86_32: - reg = eax - elif IS_X86_64: - reg = edi - self.mc.LEA_rs(reg.value, css) - args = [reg] - self._emit_call(imm(self.reacqgil_addr), args, can_collect=False) - # - # Now that we required the GIL, we can reload a possibly modified ebp - if not gcrootmap.is_shadow_stack: - # special-case: reload ebp from the css - from rpython.memory.gctransform import asmgcroot - index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP) - self.mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp] - #else: - # for shadowstack, done for us by _reload_frame_if_necessary() - self._reload_frame_if_necessary(self.mc) - self.set_extra_stack_depth(self.mc, 0) - # - # restore the result from the stack - if isinstance(save_loc, RegLoc) and not save_loc.is_xmm: - self.mc.MOV_rs(save_loc.value, WORD) - def imm(self, v): return imm(v) From noreply at buildbot.pypy.org Tue Jun 24 19:42:15 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 19:42:15 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: hg merge default Message-ID: <20140624174215.CFB621D2845@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72201:8ae7ad71f087 Date: 2014-06-24 19:41 +0200 http://bitbucket.org/pypy/pypy/changeset/8ae7ad71f087/ Log: hg merge default diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -443,6 +443,10 @@ for enumname, enumval in zip(tp.enumerators, tp.enumvalues): if enumname not in library.__dict__: library.__dict__[enumname] = enumval + for key, val in ffi._parser._int_constants.items(): + if key not in library.__dict__: + library.__dict__[key] = val + copied_enums.append(True) if name in library.__dict__: return diff --git a/lib_pypy/cffi/cparser.py b/lib_pypy/cffi/cparser.py --- a/lib_pypy/cffi/cparser.py +++ b/lib_pypy/cffi/cparser.py @@ -24,6 +24,7 @@ _r_partial_array = re.compile(r"\[\s*\.\.\.\s*\]") _r_words = re.compile(r"\w+|\S") _parser_cache = None +_r_int_literal = re.compile(r"^0?x?[0-9a-f]+u?l?$", re.IGNORECASE) def _get_parser(): global _parser_cache @@ -99,6 +100,7 @@ self._structnode2type = weakref.WeakKeyDictionary() self._override = False self._packed = False + self._int_constants = {} def _parse(self, csource): csource, macros = _preprocess(csource) @@ -128,9 +130,10 @@ finally: if lock is not None: lock.release() - return ast, macros + # csource will be used to find buggy source text + return ast, macros, csource - def convert_pycparser_error(self, e, csource): + def _convert_pycparser_error(self, e, csource): # xxx look for ":NUM:" at the start of str(e) and try to interpret # it as a line number line = None @@ -142,6 +145,12 @@ csourcelines = csource.splitlines() if 1 <= linenum <= len(csourcelines): line = csourcelines[linenum-1] + return line + + def convert_pycparser_error(self, e, csource): + line = self._convert_pycparser_error(e, csource) + + msg = str(e) if line: msg = 'cannot parse "%s"\n%s' % (line.strip(), msg) else: @@ -160,14 +169,9 @@ self._packed = prev_packed def _internal_parse(self, csource): - ast, macros = self._parse(csource) + ast, macros, csource = self._parse(csource) # add the macros - for key, value in macros.items(): - value = value.strip() - if value != '...': - raise api.CDefError('only supports the syntax "#define ' - '%s ..." for now (literally)' % key) - self._declare('macro ' + key, value) + self._process_macros(macros) # find the first "__dotdotdot__" and use that as a separator # between the repeated typedefs and the real csource iterator = iter(ast.ext) @@ -175,27 +179,61 @@ if decl.name == '__dotdotdot__': break # - for decl in iterator: - if isinstance(decl, pycparser.c_ast.Decl): - self._parse_decl(decl) - elif isinstance(decl, pycparser.c_ast.Typedef): - if not decl.name: - raise api.CDefError("typedef does not declare any name", - decl) - if (isinstance(decl.type.type, pycparser.c_ast.IdentifierType) - and decl.type.type.names == ['__dotdotdot__']): - realtype = model.unknown_type(decl.name) - elif (isinstance(decl.type, pycparser.c_ast.PtrDecl) and - isinstance(decl.type.type, pycparser.c_ast.TypeDecl) and - isinstance(decl.type.type.type, - pycparser.c_ast.IdentifierType) and - decl.type.type.type.names == ['__dotdotdot__']): - realtype = model.unknown_ptr_type(decl.name) + try: + for decl in iterator: + if isinstance(decl, pycparser.c_ast.Decl): + self._parse_decl(decl) + elif isinstance(decl, pycparser.c_ast.Typedef): + if not decl.name: + raise api.CDefError("typedef does not declare any name", + decl) + if (isinstance(decl.type.type, pycparser.c_ast.IdentifierType) + and decl.type.type.names == ['__dotdotdot__']): + realtype = model.unknown_type(decl.name) + elif (isinstance(decl.type, pycparser.c_ast.PtrDecl) and + isinstance(decl.type.type, pycparser.c_ast.TypeDecl) and + isinstance(decl.type.type.type, + pycparser.c_ast.IdentifierType) and + decl.type.type.type.names == ['__dotdotdot__']): + realtype = model.unknown_ptr_type(decl.name) + else: + realtype = self._get_type(decl.type, name=decl.name) + self._declare('typedef ' + decl.name, realtype) else: - realtype = self._get_type(decl.type, name=decl.name) - self._declare('typedef ' + decl.name, realtype) + raise api.CDefError("unrecognized construct", decl) + except api.FFIError as e: + msg = self._convert_pycparser_error(e, csource) + if msg: + e.args = (e.args[0] + "\n *** Err: %s" % msg,) + raise + + def _add_constants(self, key, val): + if key in self._int_constants: + raise api.FFIError( + "multiple declarations of constant: %s" % (key,)) + self._int_constants[key] = val + + def _process_macros(self, macros): + for key, value in macros.items(): + value = value.strip() + match = _r_int_literal.search(value) + if match is not None: + int_str = match.group(0).lower().rstrip("ul") + + # "010" is not valid oct in py3 + if (int_str.startswith("0") and + int_str != "0" and + not int_str.startswith("0x")): + int_str = "0o" + int_str[1:] + + pyvalue = int(int_str, 0) + self._add_constants(key, pyvalue) + elif value == '...': + self._declare('macro ' + key, value) else: - raise api.CDefError("unrecognized construct", decl) + raise api.CDefError('only supports the syntax "#define ' + '%s ..." (literally) or "#define ' + '%s 0x1FF" for now' % (key, key)) def _parse_decl(self, decl): node = decl.type @@ -227,7 +265,7 @@ self._declare('variable ' + decl.name, tp) def parse_type(self, cdecl): - ast, macros = self._parse('void __dummy(\n%s\n);' % cdecl) + ast, macros = self._parse('void __dummy(\n%s\n);' % cdecl)[:2] assert not macros exprnode = ast.ext[-1].type.args.params[0] if isinstance(exprnode, pycparser.c_ast.ID): @@ -306,7 +344,8 @@ if ident == 'void': return model.void_type if ident == '__dotdotdot__': - raise api.FFIError('bad usage of "..."') + raise api.FFIError(':%d: bad usage of "..."' % + typenode.coord.line) return resolve_common_type(ident) # if isinstance(type, pycparser.c_ast.Struct): @@ -333,7 +372,8 @@ return self._get_struct_union_enum_type('union', typenode, name, nested=True) # - raise api.FFIError("bad or unsupported type declaration") + raise api.FFIError(":%d: bad or unsupported type declaration" % + typenode.coord.line) def _parse_function_type(self, typenode, funcname=None): params = list(getattr(typenode.args, 'params', [])) @@ -499,6 +539,10 @@ if (isinstance(exprnode, pycparser.c_ast.UnaryOp) and exprnode.op == '-'): return -self._parse_constant(exprnode.expr) + # load previously defined int constant + if (isinstance(exprnode, pycparser.c_ast.ID) and + exprnode.name in self._int_constants): + return self._int_constants[exprnode.name] # if partial_length_ok: if (isinstance(exprnode, pycparser.c_ast.ID) and @@ -506,8 +550,8 @@ self._partial_length = True return '...' # - raise api.FFIError("unsupported expression: expected a " - "simple numeric constant") + raise api.FFIError(":%d: unsupported expression: expected a " + "simple numeric constant" % exprnode.coord.line) def _build_enum_type(self, explicit_name, decls): if decls is not None: @@ -522,6 +566,7 @@ if enum.value is not None: nextenumvalue = self._parse_constant(enum.value) enumvalues.append(nextenumvalue) + self._add_constants(enum.name, nextenumvalue) nextenumvalue += 1 enumvalues = tuple(enumvalues) tp = model.EnumType(explicit_name, enumerators, enumvalues) @@ -535,3 +580,5 @@ kind = name.split(' ', 1)[0] if kind in ('typedef', 'struct', 'union', 'enum'): self._declare(name, tp) + for k, v in other._int_constants.items(): + self._add_constants(k, v) diff --git a/lib_pypy/cffi/ffiplatform.py b/lib_pypy/cffi/ffiplatform.py --- a/lib_pypy/cffi/ffiplatform.py +++ b/lib_pypy/cffi/ffiplatform.py @@ -38,6 +38,7 @@ import distutils.errors # dist = Distribution({'ext_modules': [ext]}) + dist.parse_config_files() options = dist.get_option_dict('build_ext') options['force'] = ('ffiplatform', True) options['build_lib'] = ('ffiplatform', tmpdir) diff --git a/lib_pypy/cffi/vengine_cpy.py b/lib_pypy/cffi/vengine_cpy.py --- a/lib_pypy/cffi/vengine_cpy.py +++ b/lib_pypy/cffi/vengine_cpy.py @@ -89,43 +89,54 @@ # by generate_cpy_function_method(). prnt('static PyMethodDef _cffi_methods[] = {') self._generate("method") - prnt(' {"_cffi_setup", _cffi_setup, METH_VARARGS},') - prnt(' {NULL, NULL} /* Sentinel */') + prnt(' {"_cffi_setup", _cffi_setup, METH_VARARGS, NULL},') + prnt(' {NULL, NULL, 0, NULL} /* Sentinel */') prnt('};') prnt() # # standard init. modname = self.verifier.get_module_name() - if sys.version_info >= (3,): - prnt('static struct PyModuleDef _cffi_module_def = {') - prnt(' PyModuleDef_HEAD_INIT,') - prnt(' "%s",' % modname) - prnt(' NULL,') - prnt(' -1,') - prnt(' _cffi_methods,') - prnt(' NULL, NULL, NULL, NULL') - prnt('};') - prnt() - initname = 'PyInit_%s' % modname - createmod = 'PyModule_Create(&_cffi_module_def)' - errorcase = 'return NULL' - finalreturn = 'return lib' - else: - initname = 'init%s' % modname - createmod = 'Py_InitModule("%s", _cffi_methods)' % modname - errorcase = 'return' - finalreturn = 'return' + constants = self._chained_list_constants[False] + prnt('#if PY_MAJOR_VERSION >= 3') + prnt() + prnt('static struct PyModuleDef _cffi_module_def = {') + prnt(' PyModuleDef_HEAD_INIT,') + prnt(' "%s",' % modname) + prnt(' NULL,') + prnt(' -1,') + prnt(' _cffi_methods,') + prnt(' NULL, NULL, NULL, NULL') + prnt('};') + prnt() prnt('PyMODINIT_FUNC') - prnt('%s(void)' % initname) + prnt('PyInit_%s(void)' % modname) prnt('{') prnt(' PyObject *lib;') - prnt(' lib = %s;' % createmod) - prnt(' if (lib == NULL || %s < 0)' % ( - self._chained_list_constants[False],)) - prnt(' %s;' % errorcase) - prnt(' _cffi_init();') - prnt(' %s;' % finalreturn) + prnt(' lib = PyModule_Create(&_cffi_module_def);') + prnt(' if (lib == NULL)') + prnt(' return NULL;') + prnt(' if (%s < 0 || _cffi_init() < 0) {' % (constants,)) + prnt(' Py_DECREF(lib);') + prnt(' return NULL;') + prnt(' }') + prnt(' return lib;') prnt('}') + prnt() + prnt('#else') + prnt() + prnt('PyMODINIT_FUNC') + prnt('init%s(void)' % modname) + prnt('{') + prnt(' PyObject *lib;') + prnt(' lib = Py_InitModule("%s", _cffi_methods);' % modname) + prnt(' if (lib == NULL)') + prnt(' return;') + prnt(' if (%s < 0 || _cffi_init() < 0)' % (constants,)) + prnt(' return;') + prnt(' return;') + prnt('}') + prnt() + prnt('#endif') def load_library(self): # XXX review all usages of 'self' here! @@ -394,7 +405,7 @@ meth = 'METH_O' else: meth = 'METH_VARARGS' - self._prnt(' {"%s", _cffi_f_%s, %s},' % (name, name, meth)) + self._prnt(' {"%s", _cffi_f_%s, %s, NULL},' % (name, name, meth)) _loading_cpy_function = _loaded_noop @@ -481,8 +492,8 @@ if tp.fldnames is None: return # nothing to do with opaque structs layoutfuncname = '_cffi_layout_%s_%s' % (prefix, name) - self._prnt(' {"%s", %s, METH_NOARGS},' % (layoutfuncname, - layoutfuncname)) + self._prnt(' {"%s", %s, METH_NOARGS, NULL},' % (layoutfuncname, + layoutfuncname)) def _loading_struct_or_union(self, tp, prefix, name, module): if tp.fldnames is None: @@ -589,13 +600,7 @@ 'variable type'),)) assert delayed else: - prnt(' if (LONG_MIN <= (%s) && (%s) <= LONG_MAX)' % (name, name)) - prnt(' o = PyInt_FromLong((long)(%s));' % (name,)) - prnt(' else if ((%s) <= 0)' % (name,)) - prnt(' o = PyLong_FromLongLong((long long)(%s));' % (name,)) - prnt(' else') - prnt(' o = PyLong_FromUnsignedLongLong(' - '(unsigned long long)(%s));' % (name,)) + prnt(' o = _cffi_from_c_int_const(%s);' % name) prnt(' if (o == NULL)') prnt(' return -1;') if size_too: @@ -632,13 +637,18 @@ # ---------- # enums + def _enum_funcname(self, prefix, name): + # "$enum_$1" => "___D_enum____D_1" + name = name.replace('$', '___D_') + return '_cffi_e_%s_%s' % (prefix, name) + def _generate_cpy_enum_decl(self, tp, name, prefix='enum'): if tp.partial: for enumerator in tp.enumerators: self._generate_cpy_const(True, enumerator, delayed=False) return # - funcname = '_cffi_e_%s_%s' % (prefix, name) + funcname = self._enum_funcname(prefix, name) prnt = self._prnt prnt('static int %s(PyObject *lib)' % funcname) prnt('{') @@ -760,17 +770,30 @@ #include #include -#ifdef MS_WIN32 -#include /* for alloca() */ -typedef __int8 int8_t; -typedef __int16 int16_t; -typedef __int32 int32_t; -typedef __int64 int64_t; -typedef unsigned __int8 uint8_t; -typedef unsigned __int16 uint16_t; -typedef unsigned __int32 uint32_t; -typedef unsigned __int64 uint64_t; -typedef unsigned char _Bool; +/* this block of #ifs should be kept exactly identical between + c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */ +#if defined(_MSC_VER) +# include /* for alloca() */ +# if _MSC_VER < 1600 /* MSVC < 2010 */ + typedef __int8 int8_t; + typedef __int16 int16_t; + typedef __int32 int32_t; + typedef __int64 int64_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; +# else +# include +# endif +# if _MSC_VER < 1800 /* MSVC < 2013 */ + typedef unsigned char _Bool; +# endif +#else +# include +# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) +# include +# endif #endif #if PY_MAJOR_VERSION < 3 @@ -795,6 +818,15 @@ #define _cffi_to_c_double PyFloat_AsDouble #define _cffi_to_c_float PyFloat_AsDouble +#define _cffi_from_c_int_const(x) \ + (((x) > 0) ? \ + ((unsigned long long)(x) <= (unsigned long long)LONG_MAX) ? \ + PyInt_FromLong((long)(x)) : \ + PyLong_FromUnsignedLongLong((unsigned long long)(x)) : \ + ((long long)(x) >= (long long)LONG_MIN) ? \ + PyInt_FromLong((long)(x)) : \ + PyLong_FromLongLong((long long)(x))) + #define _cffi_from_c_int(x, type) \ (((type)-1) > 0 ? /* unsigned */ \ (sizeof(type) < sizeof(long) ? PyInt_FromLong(x) : \ @@ -804,14 +836,14 @@ PyLong_FromLongLong(x))) #define _cffi_to_c_int(o, type) \ - (sizeof(type) == 1 ? (((type)-1) > 0 ? _cffi_to_c_u8(o) \ - : _cffi_to_c_i8(o)) : \ - sizeof(type) == 2 ? (((type)-1) > 0 ? _cffi_to_c_u16(o) \ - : _cffi_to_c_i16(o)) : \ - sizeof(type) == 4 ? (((type)-1) > 0 ? _cffi_to_c_u32(o) \ - : _cffi_to_c_i32(o)) : \ - sizeof(type) == 8 ? (((type)-1) > 0 ? _cffi_to_c_u64(o) \ - : _cffi_to_c_i64(o)) : \ + (sizeof(type) == 1 ? (((type)-1) > 0 ? (type)_cffi_to_c_u8(o) \ + : (type)_cffi_to_c_i8(o)) : \ + sizeof(type) == 2 ? (((type)-1) > 0 ? (type)_cffi_to_c_u16(o) \ + : (type)_cffi_to_c_i16(o)) : \ + sizeof(type) == 4 ? (((type)-1) > 0 ? (type)_cffi_to_c_u32(o) \ + : (type)_cffi_to_c_i32(o)) : \ + sizeof(type) == 8 ? (((type)-1) > 0 ? (type)_cffi_to_c_u64(o) \ + : (type)_cffi_to_c_i64(o)) : \ (Py_FatalError("unsupported size for type " #type), 0)) #define _cffi_to_c_i8 \ @@ -885,25 +917,32 @@ return PyBool_FromLong(was_alive); } -static void _cffi_init(void) +static int _cffi_init(void) { - PyObject *module = PyImport_ImportModule("_cffi_backend"); - PyObject *c_api_object; + PyObject *module, *c_api_object = NULL; + module = PyImport_ImportModule("_cffi_backend"); if (module == NULL) - return; + goto failure; c_api_object = PyObject_GetAttrString(module, "_C_API"); if (c_api_object == NULL) - return; + goto failure; if (!PyCapsule_CheckExact(c_api_object)) { - Py_DECREF(c_api_object); PyErr_SetNone(PyExc_ImportError); - return; + goto failure; } memcpy(_cffi_exports, PyCapsule_GetPointer(c_api_object, "cffi"), _CFFI_NUM_EXPORTS * sizeof(void *)); + + Py_DECREF(module); Py_DECREF(c_api_object); + return 0; + + failure: + Py_XDECREF(module); + Py_XDECREF(c_api_object); + return -1; } #define _cffi_type(num) ((CTypeDescrObject *)PyList_GET_ITEM(_cffi_types, num)) diff --git a/lib_pypy/cffi/vengine_gen.py b/lib_pypy/cffi/vengine_gen.py --- a/lib_pypy/cffi/vengine_gen.py +++ b/lib_pypy/cffi/vengine_gen.py @@ -249,10 +249,10 @@ prnt(' /* %s */' % str(e)) # cannot verify it, ignore prnt('}') self.export_symbols.append(layoutfuncname) - prnt('ssize_t %s(ssize_t i)' % (layoutfuncname,)) + prnt('intptr_t %s(intptr_t i)' % (layoutfuncname,)) prnt('{') prnt(' struct _cffi_aligncheck { char x; %s y; };' % cname) - prnt(' static ssize_t nums[] = {') + prnt(' static intptr_t nums[] = {') prnt(' sizeof(%s),' % cname) prnt(' offsetof(struct _cffi_aligncheck, y),') for fname, ftype, fbitsize in tp.enumfields(): @@ -276,7 +276,7 @@ return # nothing to do with opaque structs layoutfuncname = '_cffi_layout_%s_%s' % (prefix, name) # - BFunc = self.ffi._typeof_locked("ssize_t(*)(ssize_t)")[0] + BFunc = self.ffi._typeof_locked("intptr_t(*)(intptr_t)")[0] function = module.load_function(BFunc, layoutfuncname) layout = [] num = 0 @@ -410,13 +410,18 @@ # ---------- # enums + def _enum_funcname(self, prefix, name): + # "$enum_$1" => "___D_enum____D_1" + name = name.replace('$', '___D_') + return '_cffi_e_%s_%s' % (prefix, name) + def _generate_gen_enum_decl(self, tp, name, prefix='enum'): if tp.partial: for enumerator in tp.enumerators: self._generate_gen_const(True, enumerator) return # - funcname = '_cffi_e_%s_%s' % (prefix, name) + funcname = self._enum_funcname(prefix, name) self.export_symbols.append(funcname) prnt = self._prnt prnt('int %s(char *out_error)' % funcname) @@ -453,7 +458,7 @@ else: BType = self.ffi._typeof_locked("char[]")[0] BFunc = self.ffi._typeof_locked("int(*)(char*)")[0] - funcname = '_cffi_e_%s_%s' % (prefix, name) + funcname = self._enum_funcname(prefix, name) function = module.load_function(BFunc, funcname) p = self.ffi.new(BType, 256) if function(p) < 0: @@ -547,20 +552,29 @@ #include #include /* XXX for ssize_t on some platforms */ -#ifdef _WIN32 -# include -# define snprintf _snprintf -typedef __int8 int8_t; -typedef __int16 int16_t; -typedef __int32 int32_t; -typedef __int64 int64_t; -typedef unsigned __int8 uint8_t; -typedef unsigned __int16 uint16_t; -typedef unsigned __int32 uint32_t; -typedef unsigned __int64 uint64_t; -typedef SSIZE_T ssize_t; -typedef unsigned char _Bool; +/* this block of #ifs should be kept exactly identical between + c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */ +#if defined(_MSC_VER) +# include /* for alloca() */ +# if _MSC_VER < 1600 /* MSVC < 2010 */ + typedef __int8 int8_t; + typedef __int16 int16_t; + typedef __int32 int32_t; + typedef __int64 int64_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; +# else +# include +# endif +# if _MSC_VER < 1800 /* MSVC < 2013 */ + typedef unsigned char _Bool; +# endif #else -# include +# include +# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) +# include +# endif #endif ''' diff --git a/pypy/module/test_lib_pypy/cffi_tests/backend_tests.py b/pypy/module/test_lib_pypy/cffi_tests/backend_tests.py --- a/pypy/module/test_lib_pypy/cffi_tests/backend_tests.py +++ b/pypy/module/test_lib_pypy/cffi_tests/backend_tests.py @@ -866,25 +866,25 @@ def test_enum(self): ffi = FFI(backend=self.Backend()) - ffi.cdef("enum foo { A, B, CC, D };") - assert ffi.string(ffi.cast("enum foo", 0)) == "A" - assert ffi.string(ffi.cast("enum foo", 2)) == "CC" - assert ffi.string(ffi.cast("enum foo", 3)) == "D" + ffi.cdef("enum foo { A0, B0, CC0, D0 };") + assert ffi.string(ffi.cast("enum foo", 0)) == "A0" + assert ffi.string(ffi.cast("enum foo", 2)) == "CC0" + assert ffi.string(ffi.cast("enum foo", 3)) == "D0" assert ffi.string(ffi.cast("enum foo", 4)) == "4" - ffi.cdef("enum bar { A, B=-2, CC, D, E };") - assert ffi.string(ffi.cast("enum bar", 0)) == "A" - assert ffi.string(ffi.cast("enum bar", -2)) == "B" - assert ffi.string(ffi.cast("enum bar", -1)) == "CC" - assert ffi.string(ffi.cast("enum bar", 1)) == "E" + ffi.cdef("enum bar { A1, B1=-2, CC1, D1, E1 };") + assert ffi.string(ffi.cast("enum bar", 0)) == "A1" + assert ffi.string(ffi.cast("enum bar", -2)) == "B1" + assert ffi.string(ffi.cast("enum bar", -1)) == "CC1" + assert ffi.string(ffi.cast("enum bar", 1)) == "E1" assert ffi.cast("enum bar", -2) != ffi.cast("enum bar", -2) assert ffi.cast("enum foo", 0) != ffi.cast("enum bar", 0) assert ffi.cast("enum bar", 0) != ffi.cast("int", 0) - assert repr(ffi.cast("enum bar", -1)) == "" + assert repr(ffi.cast("enum bar", -1)) == "" assert repr(ffi.cast("enum foo", -1)) == ( # enums are unsigned, if "") # they contain no neg value - ffi.cdef("enum baz { A=0x1000, B=0x2000 };") - assert ffi.string(ffi.cast("enum baz", 0x1000)) == "A" - assert ffi.string(ffi.cast("enum baz", 0x2000)) == "B" + ffi.cdef("enum baz { A2=0x1000, B2=0x2000 };") + assert ffi.string(ffi.cast("enum baz", 0x1000)) == "A2" + assert ffi.string(ffi.cast("enum baz", 0x2000)) == "B2" def test_enum_in_struct(self): ffi = FFI(backend=self.Backend()) @@ -1323,6 +1323,16 @@ e = ffi.cast("enum e", 0) assert ffi.string(e) == "AA" # pick the first one arbitrarily + def test_enum_refer_previous_enum_value(self): + ffi = FFI(backend=self.Backend()) + ffi.cdef("enum e { AA, BB=2, CC=4, DD=BB, EE, FF=CC, GG=FF };") + assert ffi.string(ffi.cast("enum e", 2)) == "BB" + assert ffi.string(ffi.cast("enum e", 3)) == "EE" + assert ffi.sizeof("char[DD]") == 2 + assert ffi.sizeof("char[EE]") == 3 + assert ffi.sizeof("char[FF]") == 4 + assert ffi.sizeof("char[GG]") == 4 + def test_nested_anonymous_struct(self): ffi = FFI(backend=self.Backend()) ffi.cdef(""" @@ -1544,6 +1554,7 @@ ffi2.include(ffi1) p = ffi2.cast("enum foo", 1) assert ffi2.string(p) == "FB" + assert ffi2.sizeof("char[FC]") == 2 def test_include_typedef_2(self): backend = self.Backend() @@ -1564,10 +1575,32 @@ assert ffi.alignof("struct is_packed") == 1 s = ffi.new("struct is_packed[2]") s[0].b = 42623381 - s[0].a = 'X' + s[0].a = b'X' s[1].b = -4892220 - s[1].a = 'Y' + s[1].a = b'Y' assert s[0].b == 42623381 - assert s[0].a == 'X' + assert s[0].a == b'X' assert s[1].b == -4892220 - assert s[1].a == 'Y' + assert s[1].a == b'Y' + + def test_define_integer_constant(self): + ffi = FFI(backend=self.Backend()) + ffi.cdef(""" + #define DOT_0 0 + #define DOT 100 + #define DOT_OCT 0100l + #define DOT_HEX 0x100u + #define DOT_HEX2 0X10 + #define DOT_UL 1000UL + enum foo {AA, BB=DOT, CC}; + """) + lib = ffi.dlopen(None) + assert ffi.string(ffi.cast("enum foo", 100)) == "BB" + assert lib.DOT_0 == 0 + assert lib.DOT == 100 + assert lib.DOT_OCT == 0o100 + assert lib.DOT_HEX == 0x100 + assert lib.DOT_HEX2 == 0x10 + assert lib.DOT_UL == 1000 + + diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_function.py b/pypy/module/test_lib_pypy/cffi_tests/test_function.py --- a/pypy/module/test_lib_pypy/cffi_tests/test_function.py +++ b/pypy/module/test_lib_pypy/cffi_tests/test_function.py @@ -36,13 +36,11 @@ return self._value lib_m = 'm' -has_sinf = True if sys.platform == 'win32': #there is a small chance this fails on Mingw via environ $CC import distutils.ccompiler if distutils.ccompiler.get_default_compiler() == 'msvc': lib_m = 'msvcrt' - has_sinf = False class TestFunction(object): Backend = CTypesBackend @@ -57,8 +55,8 @@ assert x == math.sin(1.23) def test_sinf(self): - if not has_sinf: - py.test.skip("sinf not available") + if sys.platform == 'win32': + py.test.skip("no sinf found in the Windows stdlib") ffi = FFI(backend=self.Backend()) ffi.cdef(""" float sinf(float x); diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_parsing.py b/pypy/module/test_lib_pypy/cffi_tests/test_parsing.py --- a/pypy/module/test_lib_pypy/cffi_tests/test_parsing.py +++ b/pypy/module/test_lib_pypy/cffi_tests/test_parsing.py @@ -162,9 +162,10 @@ def test_define_not_supported_for_now(): ffi = FFI(backend=FakeBackend()) - e = py.test.raises(CDefError, ffi.cdef, "#define FOO 42") - assert str(e.value) == \ - 'only supports the syntax "#define FOO ..." for now (literally)' + e = py.test.raises(CDefError, ffi.cdef, '#define FOO "blah"') + assert str(e.value) == ( + 'only supports the syntax "#define FOO ..." (literally)' + ' or "#define FOO 0x1FF" for now') def test_unnamed_struct(): ffi = FFI(backend=FakeBackend()) diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_verify.py b/pypy/module/test_lib_pypy/cffi_tests/test_verify.py --- a/pypy/module/test_lib_pypy/cffi_tests/test_verify.py +++ b/pypy/module/test_lib_pypy/cffi_tests/test_verify.py @@ -1,5 +1,5 @@ # Generated by pypy/tool/import_cffi.py -import py +import py, re import sys, os, math, weakref from cffi import FFI, VerificationError, VerificationMissing, model from pypy.module.test_lib_pypy.cffi_tests.support import * @@ -30,6 +30,24 @@ def setup_module(): import cffi.verifier cffi.verifier.cleanup_tmpdir() + # + # check that no $ sign is produced in the C file; it used to be the + # case that anonymous enums would produce '$enum_$1', which was + # used as part of a function name. GCC accepts such names, but it's + # apparently non-standard. + _r_comment = re.compile(r"/\*.*?\*/|//.*?$", re.DOTALL | re.MULTILINE) + _r_string = re.compile(r'\".*?\"') + def _write_source_and_check(self, file=None): + base_write_source(self, file) + if file is None: + f = open(self.sourcefilename) + data = f.read() + f.close() + data = _r_comment.sub(' ', data) + data = _r_string.sub('"skipped"', data) + assert '$' not in data + base_write_source = cffi.verifier.Verifier._write_source + cffi.verifier.Verifier._write_source = _write_source_and_check def test_module_type(): @@ -154,6 +172,9 @@ all_primitive_types = model.PrimitiveType.ALL_PRIMITIVE_TYPES +if sys.platform == 'win32': + all_primitive_types = all_primitive_types.copy() + del all_primitive_types['ssize_t'] all_integer_types = sorted(tp for tp in all_primitive_types if all_primitive_types[tp] == 'i') all_float_types = sorted(tp for tp in all_primitive_types @@ -1453,8 +1474,8 @@ assert func() == 42 def test_FILE_stored_in_stdout(): - if sys.platform == 'win32': - py.test.skip("MSVC: cannot assign to stdout") + if not sys.platform.startswith('linux'): + py.test.skip("likely, we cannot assign to stdout") ffi = FFI() ffi.cdef("int printf(const char *, ...); FILE *setstdout(FILE *);") lib = ffi.verify(""" @@ -1637,8 +1658,8 @@ ffi = FFI() ffi.cdef(""" int (*python_callback)(int how_many, int *values); - void *const c_callback; /* pass this ptr to C routines */ - int some_c_function(void *cb); + int (*const c_callback)(int,...); /* pass this ptr to C routines */ + int some_c_function(int(*cb)(int,...)); """) lib = ffi.verify(""" #include diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_version.py b/pypy/module/test_lib_pypy/cffi_tests/test_version.py --- a/pypy/module/test_lib_pypy/cffi_tests/test_version.py +++ b/pypy/module/test_lib_pypy/cffi_tests/test_version.py @@ -11,7 +11,6 @@ '0.7.1': '0.7', # did not change '0.7.2': '0.7', # did not change '0.8.1': '0.8', # did not change (essentially) - '0.8.2': '0.8', # did not change } def test_version(): @@ -26,7 +25,7 @@ content = open(p).read() # v = cffi.__version__ - assert ("version = '%s'\n" % BACKEND_VERSIONS.get(v, v)) in content + assert ("version = '%s'\n" % v[:3]) in content assert ("release = '%s'\n" % v) in content def test_doc_version_file(): diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -1934,42 +1934,6 @@ self._genop_call(op, arglocs, result_loc, is_call_release_gil=True) self._emit_guard_not_forced(guard_token) - def call_reacquire_gil(self, gcrootmap, save_loc): - # save the previous result (eax/xmm0) into the stack temporarily. - # XXX like with call_release_gil(), we assume that we don't need - # to save xmm0 in this case. - if isinstance(save_loc, RegLoc) and not save_loc.is_xmm: - self.mc.MOV_sr(WORD, save_loc.value) - # call the reopenstack() function (also reacquiring the GIL) - if gcrootmap.is_shadow_stack: - args = [] - css = 0 - else: - from rpython.memory.gctransform import asmgcroot - css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS) - if IS_X86_32: - reg = eax - elif IS_X86_64: - reg = edi - self.mc.LEA_rs(reg.value, css) - args = [reg] - self._emit_call(imm(self.reacqgil_addr), args, can_collect=False) - # - # Now that we required the GIL, we can reload a possibly modified ebp - if not gcrootmap.is_shadow_stack: - # special-case: reload ebp from the css - from rpython.memory.gctransform import asmgcroot - index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP) - self.mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp] - #else: - # for shadowstack, done for us by _reload_frame_if_necessary() - self._reload_frame_if_necessary(self.mc) - self.set_extra_stack_depth(self.mc, 0) - # - # restore the result from the stack - if isinstance(save_loc, RegLoc) and not save_loc.is_xmm: - self.mc.MOV_rs(save_loc.value, WORD) - def imm(self, v): return imm(v) From noreply at buildbot.pypy.org Tue Jun 24 19:51:33 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 19:51:33 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: For now, increase the active pinging from the stealing thread to every Message-ID: <20140624175133.C438E1D2845@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72202:447468c284c3 Date: 2014-06-24 19:50 +0200 http://bitbucket.org/pypy/pypy/changeset/447468c284c3/ Log: For now, increase the active pinging from the stealing thread to every 0.1 millisecond. diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c --- a/rpython/translator/c/src/thread_gil.c +++ b/rpython/translator/c/src/thread_gil.c @@ -82,7 +82,7 @@ /* Sleep for one interval of time. We may be woken up earlier if 'mutex_gil' is released. */ - if (mutex_lock_timeout(&mutex_gil, 0.001)) { /* 1 ms... */ + if (mutex_lock_timeout(&mutex_gil, 0.0001)) { /* 0.1 ms... */ /* We arrive here if 'mutex_gil' was recently released and we just relocked it. */ diff --git a/rpython/translator/c/src/thread_nt.c b/rpython/translator/c/src/thread_nt.c --- a/rpython/translator/c/src/thread_nt.c +++ b/rpython/translator/c/src/thread_nt.c @@ -219,7 +219,7 @@ static inline int mutex_lock_timeout(mutex_t *mutex, double delay) { - DWORD result = WaitForSingleObject(*mutex, (DWORD)(delay * 1000.0 + 0.9)); + DWORD result = WaitForSingleObject(*mutex, (DWORD)(delay * 1000.0 + 0.999)); return (result != WAIT_TIMEOUT); } From noreply at buildbot.pypy.org Tue Jun 24 20:01:30 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 20:01:30 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: for clarify: we 'have the GIL' from there already Message-ID: <20140624180130.EB8081D2861@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72203:ff442cbc0012 Date: 2014-06-24 20:00 +0200 http://bitbucket.org/pypy/pypy/changeset/ff442cbc0012/ Log: for clarify: we 'have the GIL' from there already diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c --- a/rpython/translator/c/src/thread_gil.c +++ b/rpython/translator/c/src/thread_gil.c @@ -106,6 +106,7 @@ errno = old_errno; } + assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); #ifdef PYPY_USE_ASMGCC if (old_fastgil != 0) { @@ -122,7 +123,6 @@ #else assert(old_fastgil == 0); #endif - assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); return; } From noreply at buildbot.pypy.org Tue Jun 24 21:01:49 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 21:01:49 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: On Windows, a "mutex" does too much by linking to which thread locked it. A "semaphore" doesn't. Message-ID: <20140624190149.831051D23C2@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72204:c00cc7d1e7c8 Date: 2014-06-24 21:01 +0200 http://bitbucket.org/pypy/pypy/changeset/c00cc7d1e7c8/ Log: On Windows, a "mutex" does too much by linking to which thread locked it. A "semaphore" doesn't. diff --git a/rpython/translator/c/src/thread_nt.c b/rpython/translator/c/src/thread_nt.c --- a/rpython/translator/c/src/thread_nt.c +++ b/rpython/translator/c/src/thread_nt.c @@ -196,7 +196,7 @@ /* GIL code */ /************************************************************/ -typedef HANDLE mutex_t; +typedef HANDLE mutex_t; /* a semaphore, on Windows */ static void gil_fatal(const char *msg) { fprintf(stderr, "Fatal error in the GIL: %s\n", msg); @@ -204,9 +204,9 @@ } static inline void mutex_init(mutex_t *mutex) { - *mutex = CreateMutex(NULL, 0, NULL); + *mutex = CreateSemaphore(NULL, 1, 1, NULL); if (*mutex == NULL) - gil_fatal("CreateMutex failed"); + gil_fatal("CreateSemaphore failed"); } static inline void mutex_lock(mutex_t *mutex) { @@ -214,7 +214,7 @@ } static inline void mutex_unlock(mutex_t *mutex) { - ReleaseMutex(*mutex); + ReleaseSemaphore(*mutex, 1, NULL); } static inline int mutex_lock_timeout(mutex_t *mutex, double delay) From noreply at buildbot.pypy.org Tue Jun 24 21:01:50 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 21:01:50 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: typo Message-ID: <20140624190150.C28131D23C2@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72205:8195e6a2214c Date: 2014-06-24 21:01 +0200 http://bitbucket.org/pypy/pypy/changeset/8195e6a2214c/ Log: typo diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py --- a/rpython/jit/backend/llsupport/assembler.py +++ b/rpython/jit/backend/llsupport/assembler.py @@ -312,7 +312,7 @@ if old_rpy_fastgil == 0: # this case occurs if some other thread stole the GIL but # released it again. What occurred here is that we changed - # 'rpy_fastgil' from 0 to 1, thus successfully requiring the + # 'rpy_fastgil' from 0 to 1, thus successfully reaquiring the # GIL. pass From noreply at buildbot.pypy.org Tue Jun 24 21:08:20 2014 From: noreply at buildbot.pypy.org (arigo) Date: Tue, 24 Jun 2014 21:08:20 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: comment Message-ID: <20140624190820.EF6E21D2861@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72206:ecc82fba4e83 Date: 2014-06-24 21:07 +0200 http://bitbucket.org/pypy/pypy/changeset/ecc82fba4e83/ Log: comment diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c --- a/rpython/translator/c/src/thread_gil.c +++ b/rpython/translator/c/src/thread_gil.c @@ -123,7 +123,6 @@ #else assert(old_fastgil == 0); #endif - return; } /* @@ -153,7 +152,8 @@ rpy_fastgil is still locked). Call RPyGilAcquire(). It will enqueue ourselves at the end of the 'mutex_gil_stealer' queue. If there is no other waiting thread, it will fall through both - its pthread_mutex_lock() and pthread_mutex_timedlock() now. + its mutex_lock() and mutex_lock_timeout() now. But that's + unlikely, because we tested above that 'rpy_waiting_threads > 0'. */ RPyGilAcquire(); return 1; From noreply at buildbot.pypy.org Wed Jun 25 10:34:37 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 25 Jun 2014 10:34:37 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: Reintroduce the #includes: without them, we don't get PYPY_USE_ASMGCC here at all Message-ID: <20140625083437.6F0331D2A6C@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72207:15e5e4c34776 Date: 2014-06-25 10:33 +0200 http://bitbucket.org/pypy/pypy/changeset/15e5e4c34776/ Log: Reintroduce the #includes: without them, we don't get PYPY_USE_ASMGCC here at all diff --git a/rpython/translator/c/src/thread.h b/rpython/translator/c/src/thread.h --- a/rpython/translator/c/src/thread.h +++ b/rpython/translator/c/src/thread.h @@ -1,6 +1,7 @@ #ifndef __PYPY_THREAD_H #define __PYPY_THREAD_H #include +#include "common_header.h" #define RPY_TIMEOUT_T long long diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c --- a/rpython/translator/c/src/thread_gil.c +++ b/rpython/translator/c/src/thread_gil.c @@ -1,3 +1,7 @@ +#ifdef PYPY_USE_ASMGCC +# include "structdef.h" +# include "forwarddecl.h" +#endif /* Idea: From noreply at buildbot.pypy.org Wed Jun 25 11:14:10 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 25 Jun 2014 11:14:10 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: Don't need the errno-saving logic Message-ID: <20140625091410.D1CA31C01CB@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72208:40e4dd588bd7 Date: 2014-06-25 11:13 +0200 http://bitbucket.org/pypy/pypy/changeset/40e4dd588bd7/ Log: Don't need the errno-saving logic diff --git a/pypy/module/thread/gil.py b/pypy/module/thread/gil.py --- a/pypy/module/thread/gil.py +++ b/pypy/module/thread/gil.py @@ -70,9 +70,7 @@ def before_external_call(): # this function must not raise, in such a way that the exception # transformer knows that it cannot raise! - e = get_errno() rgil.gil_release() - set_errno(e) before_external_call._gctransformer_hint_cannot_collect_ = True before_external_call._dont_reach_me_in_del_ = True diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c --- a/rpython/translator/c/src/thread_gil.c +++ b/rpython/translator/c/src/thread_gil.c @@ -56,7 +56,7 @@ void RPyGilAcquire(void) { - /* Acquires the GIL. Note: this function saves and restores 'errno'. + /* Acquires the GIL. */ long old_fastgil = lock_test_and_set(&rpy_fastgil, 1); @@ -67,7 +67,6 @@ } else { /* Otherwise, another thread is busy with the GIL. */ - int old_errno = errno; /* Register me as one of the threads that is actively waiting for the GIL. The number of such threads is found in @@ -107,8 +106,6 @@ } atomic_decrement(&rpy_waiting_threads); mutex_unlock(&mutex_gil_stealer); - - errno = old_errno; } assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); From noreply at buildbot.pypy.org Wed Jun 25 11:30:47 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 25 Jun 2014 11:30:47 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: PYPY_USE_ASMGCC should be declared systematically, which requires -D. Message-ID: <20140625093047.AC1541D2A7E@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72209:5af5fbca3235 Date: 2014-06-25 11:30 +0200 http://bitbucket.org/pypy/pypy/changeset/5af5fbca3235/ Log: PYPY_USE_ASMGCC should be declared systematically, which requires -D. diff --git a/rpython/memory/gctransform/asmgcroot.py b/rpython/memory/gctransform/asmgcroot.py --- a/rpython/memory/gctransform/asmgcroot.py +++ b/rpython/memory/gctransform/asmgcroot.py @@ -782,7 +782,7 @@ gcrootanchor.next = gcrootanchor c_gcrootanchor = Constant(gcrootanchor, ASM_FRAMEDATA_HEAD_PTR) -eci = ExternalCompilationInfo(pre_include_bits=['#define PYPY_USE_ASMGCC']) +eci = ExternalCompilationInfo(compile_extra=['-DPYPY_USE_ASMGCC']) pypy_asm_stackwalk = rffi.llexternal('pypy_asm_stackwalk', [ASM_CALLBACK_PTR, diff --git a/rpython/translator/c/src/thread.c b/rpython/translator/c/src/thread.c --- a/rpython/translator/c/src/thread.c +++ b/rpython/translator/c/src/thread.c @@ -9,9 +9,14 @@ #include "common_header.h" #endif +#ifdef PYPY_USE_ASMGCC +# include "common_header.h" +# include "structdef.h" +# include "forwarddecl.h" +#endif + #ifdef _WIN32 #include "src/thread_nt.c" #else #include "src/thread_pthread.c" #endif - diff --git a/rpython/translator/c/src/thread.h b/rpython/translator/c/src/thread.h --- a/rpython/translator/c/src/thread.h +++ b/rpython/translator/c/src/thread.h @@ -1,7 +1,6 @@ #ifndef __PYPY_THREAD_H #define __PYPY_THREAD_H #include -#include "common_header.h" #define RPY_TIMEOUT_T long long diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c --- a/rpython/translator/c/src/thread_gil.c +++ b/rpython/translator/c/src/thread_gil.c @@ -1,7 +1,3 @@ -#ifdef PYPY_USE_ASMGCC -# include "structdef.h" -# include "forwarddecl.h" -#endif /* Idea: From noreply at buildbot.pypy.org Wed Jun 25 11:37:43 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 25 Jun 2014 11:37:43 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: Generalize Message-ID: <20140625093743.896D71D2A7E@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72210:26890e0b7491 Date: 2014-06-25 11:37 +0200 http://bitbucket.org/pypy/pypy/changeset/26890e0b7491/ Log: Generalize diff --git a/rpython/translator/c/gcc/trackgcroot.py b/rpython/translator/c/gcc/trackgcroot.py --- a/rpython/translator/c/gcc/trackgcroot.py +++ b/rpython/translator/c/gcc/trackgcroot.py @@ -858,9 +858,6 @@ return [] def _visit_xchg(self, line): - # ignore the 'rpy_fastgil' atomic exchange - if 'rpy_fastgil' in line: - return [] # support the format used in VALGRIND_DISCARD_TRANSLATIONS # which is to use a marker no-op "xchgl %ebx, %ebx" match = self.r_binaryinsn.match(line) @@ -868,6 +865,10 @@ target = match.group("target") if source == target: return [] + # ignore the 'rpy_fastgil' atomic exchange, or any locked + # atomic exchange at all (involving memory) + if not source.startswith('%'): + return [] raise UnrecognizedOperation(line) def visit_call(self, line): From noreply at buildbot.pypy.org Wed Jun 25 16:42:45 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Wed, 25 Jun 2014 16:42:45 +0200 (CEST) Subject: [pypy-commit] pypy gc_no_cleanup_nursery: remove nursery_real_top and now nursery only have one top Message-ID: <20140625144245.8E1CF1C01CB@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc_no_cleanup_nursery Changeset: r72212:48a484d3ac3b Date: 2014-06-25 10:37 -0400 http://bitbucket.org/pypy/pypy/changeset/48a484d3ac3b/ Log: remove nursery_real_top and now nursery only have one top diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -57,6 +57,7 @@ from rpython.rtyper.lltypesystem import lltype, llmemory, llarena, llgroup from rpython.rtyper.lltypesystem.lloperation import llop from rpython.rtyper.lltypesystem.llmemory import raw_malloc_usage +from rpython.memory.gctypelayout import zero_gc_pointers from rpython.memory.gc.base import GCBase, MovingGCBase from rpython.memory.gc import env from rpython.memory.support import mangle_hash @@ -243,12 +244,6 @@ # minimal allocated size of the nursery is 2x the following # number (by default, at least 132KB on 32-bit and 264KB on 64-bit). "large_object": (16384+512)*WORD, - - # This is the chunk that we cleanup in the nursery. The point is - # to avoid having to trash all the caches just to zero the nursery, - # so we trade it by cleaning it bit-by-bit, as we progress through - # nursery. Has to fit at least one large object - "nursery_cleanup": 32768 * WORD, } def __init__(self, config, @@ -268,7 +263,7 @@ assert small_request_threshold % WORD == 0 self.read_from_env = read_from_env self.nursery_size = nursery_size - self.nursery_cleanup = nursery_cleanup + self.small_request_threshold = small_request_threshold self.major_collection_threshold = major_collection_threshold self.growth_rate_max = growth_rate_max @@ -291,7 +286,6 @@ self.nursery = NULL self.nursery_free = NULL self.nursery_top = NULL - self.nursery_real_top = NULL self.debug_tiny_nursery = -1 self.debug_rotating_nurseries = lltype.nullptr(NURSARRAY) self.extra_threshold = 0 @@ -389,10 +383,6 @@ if newsize < minsize: self.debug_tiny_nursery = newsize & ~(WORD-1) newsize = minsize - - nurs_cleanup = env.read_from_env('PYPY_GC_NURSERY_CLEANUP') - if nurs_cleanup > 0: - self.nursery_cleanup = nurs_cleanup # major_coll = env.read_float_from_env('PYPY_GC_MAJOR_COLLECT') if major_coll > 1.0: @@ -429,17 +419,6 @@ llarena.arena_free(self.nursery) self.nursery_size = newsize self.allocate_nursery() - # - if self.nursery_cleanup < self.nonlarge_max + 1: - self.nursery_cleanup = self.nonlarge_max + 1 - # We need exactly initial_cleanup + N*nursery_cleanup = nursery_size. - # We choose the value of initial_cleanup to be between 1x and 2x the - # value of nursery_cleanup. - self.initial_cleanup = self.nursery_cleanup + ( - self.nursery_size % self.nursery_cleanup) - if (r_uint(self.initial_cleanup) > r_uint(self.nursery_size) or - self.debug_tiny_nursery >= 0): - self.initial_cleanup = self.nursery_size def _nursery_memory_size(self): extra = self.nonlarge_max + 1 @@ -463,7 +442,6 @@ self.nursery_free = self.nursery # the end of the nursery: self.nursery_top = self.nursery + self.nursery_size - self.nursery_real_top = self.nursery_top # initialize the threshold self.min_heap_size = max(self.min_heap_size, self.nursery_size * self.major_collection_threshold) @@ -475,7 +453,6 @@ self.next_major_collection_threshold = self.min_heap_size self.set_major_threshold_from(0.0) ll_assert(self.extra_threshold == 0, "extra_threshold set too early") - self.initial_cleanup = self.nursery_size debug_stop("gc-set-nursery-size") @@ -537,8 +514,7 @@ # llarena.arena_protect(newnurs, self._nursery_memory_size(), False) self.nursery = newnurs - self.nursery_top = self.nursery + self.initial_cleanup - self.nursery_real_top = self.nursery + self.nursery_size + self.nursery_top = self.nursery + self.nursery_size debug_print("switching from nursery", oldnurs, "to nursery", self.nursery, "size", self.nursery_size) @@ -663,15 +639,6 @@ else: self.minor_and_major_collection() - def move_nursery_top(self, totalsize): - size = self.nursery_cleanup - ll_assert(self.nursery_real_top - self.nursery_top >= size, - "nursery_cleanup not a divisor of nursery_size - initial_cleanup") - ll_assert(llmemory.raw_malloc_usage(totalsize) <= size, - "totalsize > nursery_cleanup") - llarena.arena_reset(self.nursery_top, size, 2) - self.nursery_top += size - move_nursery_top._always_inline_ = True def collect_and_reserve(self, prev_result, totalsize): """To call when nursery_free overflows nursery_top. @@ -682,9 +649,6 @@ and finally reserve 'totalsize' bytes at the start of the now-empty nursery. """ - if self.nursery_top < self.nursery_real_top: - self.move_nursery_top(totalsize) - return prev_result self.minor_collection() # # If the gc_state is not STATE_SCANNING, we're in the middle of @@ -700,13 +664,7 @@ # execute_finalizers(). If it is almost full again, # we need to fix it with another call to minor_collection(). if self.nursery_free + totalsize > self.nursery_top: - # - if self.nursery_free + totalsize > self.nursery_real_top: - self.minor_collection() - # then the nursery is empty - else: - # we just need to clean up a bit more of the nursery - self.move_nursery_top(totalsize) + self.minor_collection() # result = self.nursery_free self.nursery_free = result + totalsize @@ -871,8 +829,7 @@ if self.next_major_collection_threshold < 0: # cannot trigger a full collection now, but we can ensure # that one will occur very soon - self.nursery_top = self.nursery_real_top - self.nursery_free = self.nursery_real_top + self.nursery_free = self.nursery_top def can_malloc_nonmovable(self): return True @@ -952,7 +909,7 @@ def is_in_nursery(self, addr): ll_assert(llmemory.cast_adr_to_int(addr) & 1 == 0, "odd-valued (i.e. tagged) pointer unexpected here") - return self.nursery <= addr < self.nursery_real_top + return self.nursery <= addr < self.nursery_top def appears_to_be_young(self, addr): # "is a valid addr to a young object?" @@ -972,7 +929,7 @@ if not self.is_valid_gc_object(addr): return False - if self.nursery <= addr < self.nursery_real_top: + if self.nursery <= addr < self.nursery_top: return True # addr is in the nursery # # Else, it may be in the set 'young_rawmalloced_objects' @@ -1452,11 +1409,9 @@ # All live nursery objects are out, and the rest dies. Fill # the nursery up to the cleanup point with zeros llarena.arena_reset(self.nursery, self.nursery_size, 0) - llarena.arena_reset(self.nursery, self.initial_cleanup, 2) self.debug_rotate_nursery() self.nursery_free = self.nursery - self.nursery_top = self.nursery + self.initial_cleanup - self.nursery_real_top = self.nursery + self.nursery_size + self.nursery_top = self.nursery + self.nursery_size # debug_print("minor collect, total memory used:", self.get_total_memory_used()) From noreply at buildbot.pypy.org Wed Jun 25 16:42:46 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Wed, 25 Jun 2014 16:42:46 +0200 (CEST) Subject: [pypy-commit] pypy gc_no_cleanup_nursery: add zero_gc_pointers_inside() after gc.malloc() Message-ID: <20140625144246.DE3351C01CB@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc_no_cleanup_nursery Changeset: r72213:3a3b6bee087d Date: 2014-06-25 10:41 -0400 http://bitbucket.org/pypy/pypy/changeset/3a3b6bee087d/ Log: add zero_gc_pointers_inside() after gc.malloc() diff --git a/rpython/memory/gc/test/test_direct.py b/rpython/memory/gc/test/test_direct.py --- a/rpython/memory/gc/test/test_direct.py +++ b/rpython/memory/gc/test/test_direct.py @@ -11,7 +11,7 @@ from rpython.memory.gctypelayout import TypeLayoutBuilder from rpython.rlib.rarithmetic import LONG_BIT, is_valid_int from rpython.memory.gc import minimark, incminimark - +from rpython.memory.gctypelayout import zero_gc_pointers, zero_gc_pointers_inside WORD = LONG_BIT // 8 ADDR_ARRAY = lltype.Array(llmemory.Address) @@ -46,6 +46,7 @@ if collect_stack_root: stackroots = self.tester.stackroots a = lltype.malloc(ADDR_ARRAY, len(stackroots), flavor='raw') + zero_gc_pointers_inside(a, ADDR_ARRAY) for i in range(len(a)): a[i] = llmemory.cast_ptr_to_adr(stackroots[i]) a_base = lltype.direct_arrayitems(a) @@ -106,7 +107,10 @@ def malloc(self, TYPE, n=None): addr = self.gc.malloc(self.get_type_id(TYPE), n, zero=True) - return llmemory.cast_adr_to_ptr(addr, lltype.Ptr(TYPE)) + obj_ptr = llmemory.cast_adr_to_ptr(addr, lltype.Ptr(TYPE)) + #TODO: only zero fields if there is gc filed add something like has_gc_ptr() + zero_gc_pointers_inside(obj_ptr, TYPE) + return obj_ptr class DirectGCTest(BaseDirectGCTest): From noreply at buildbot.pypy.org Wed Jun 25 16:48:06 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Wed, 25 Jun 2014 16:48:06 +0200 (CEST) Subject: [pypy-commit] pypy gc-pinning: duplicate branch Message-ID: <20140625144806.3EC751C01CB@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc-pinning Changeset: r72214:3f8b68066272 Date: 2014-06-25 10:46 -0400 http://bitbucket.org/pypy/pypy/changeset/3f8b68066272/ Log: duplicate branch From noreply at buildbot.pypy.org Wed Jun 25 16:48:07 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Wed, 25 Jun 2014 16:48:07 +0200 (CEST) Subject: [pypy-commit] pypy gc-pinning: duplicate branch Message-ID: <20140625144807.6CF1D1C01CB@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc-pinning Changeset: r72215:32435d62aa33 Date: 2014-06-25 10:47 -0400 http://bitbucket.org/pypy/pypy/changeset/32435d62aa33/ Log: duplicate branch From noreply at buildbot.pypy.org Wed Jun 25 16:51:27 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 25 Jun 2014 16:51:27 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: Fix tests Message-ID: <20140625145127.E23341C33EC@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72216:4f7c28e34e18 Date: 2014-06-25 16:50 +0200 http://bitbucket.org/pypy/pypy/changeset/4f7c28e34e18/ Log: Fix tests diff --git a/pypy/module/thread/gil.py b/pypy/module/thread/gil.py --- a/pypy/module/thread/gil.py +++ b/pypy/module/thread/gil.py @@ -11,7 +11,7 @@ from pypy.module.thread.error import wrap_thread_error from pypy.interpreter.executioncontext import PeriodicAsyncAction from pypy.module.thread.threadlocals import OSThreadLocals -from rpython.rlib.objectmodel import invoke_around_extcall +from rpython.rlib.objectmodel import invoke_around_extcall, we_are_translated from rpython.rlib.rposix import get_errno, set_errno class GILThreadLocals(OSThreadLocals): @@ -70,13 +70,15 @@ def before_external_call(): # this function must not raise, in such a way that the exception # transformer knows that it cannot raise! - rgil.gil_release() + if we_are_translated(): + rgil.gil_release() before_external_call._gctransformer_hint_cannot_collect_ = True before_external_call._dont_reach_me_in_del_ = True def after_external_call(): e = get_errno() - rgil.gil_acquire() + if we_are_translated(): + rgil.gil_acquire() rthread.gc_thread_run() after_thread_switch() set_errno(e) @@ -94,9 +96,10 @@ # explicitly release the gil, in a way that tries to give more # priority to other threads (as opposed to continuing to run in # the same thread). - if rgil.gil_yield_thread(): - rthread.gc_thread_run() - after_thread_switch() + if we_are_translated(): + if rgil.gil_yield_thread(): + rthread.gc_thread_run() + after_thread_switch() do_yield_thread._gctransformer_hint_close_stack_ = True do_yield_thread._dont_reach_me_in_del_ = True do_yield_thread._dont_inline_ = True From noreply at buildbot.pypy.org Wed Jun 25 17:13:06 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Wed, 25 Jun 2014 17:13:06 +0200 (CEST) Subject: [pypy-commit] pypy gc-two-end-nursery: change of plan the branch is no longer used Message-ID: <20140625151306.74B071C33EC@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc-two-end-nursery Changeset: r72217:d79aec73fa3c Date: 2014-06-25 10:53 -0400 http://bitbucket.org/pypy/pypy/changeset/d79aec73fa3c/ Log: change of plan the branch is no longer used From noreply at buildbot.pypy.org Wed Jun 25 17:20:58 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 25 Jun 2014 17:20:58 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: Revert some of the last checkins: figured out why some functions were Message-ID: <20140625152058.579E21C33EC@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72218:b9d854c5738e Date: 2014-06-25 17:19 +0200 http://bitbucket.org/pypy/pypy/changeset/b9d854c5738e/ Log: Revert some of the last checkins: figured out why some functions were not present at all in the testing .so --- that's because they were "static inline". diff --git a/pypy/module/thread/gil.py b/pypy/module/thread/gil.py --- a/pypy/module/thread/gil.py +++ b/pypy/module/thread/gil.py @@ -11,7 +11,7 @@ from pypy.module.thread.error import wrap_thread_error from pypy.interpreter.executioncontext import PeriodicAsyncAction from pypy.module.thread.threadlocals import OSThreadLocals -from rpython.rlib.objectmodel import invoke_around_extcall, we_are_translated +from rpython.rlib.objectmodel import invoke_around_extcall from rpython.rlib.rposix import get_errno, set_errno class GILThreadLocals(OSThreadLocals): @@ -70,15 +70,13 @@ def before_external_call(): # this function must not raise, in such a way that the exception # transformer knows that it cannot raise! - if we_are_translated(): - rgil.gil_release() + rgil.gil_release() before_external_call._gctransformer_hint_cannot_collect_ = True before_external_call._dont_reach_me_in_del_ = True def after_external_call(): e = get_errno() - if we_are_translated(): - rgil.gil_acquire() + rgil.gil_acquire() rthread.gc_thread_run() after_thread_switch() set_errno(e) @@ -96,10 +94,9 @@ # explicitly release the gil, in a way that tries to give more # priority to other threads (as opposed to continuing to run in # the same thread). - if we_are_translated(): - if rgil.gil_yield_thread(): - rthread.gc_thread_run() - after_thread_switch() + if rgil.gil_yield_thread(): + rthread.gc_thread_run() + after_thread_switch() do_yield_thread._gctransformer_hint_close_stack_ = True do_yield_thread._dont_reach_me_in_del_ = True do_yield_thread._dont_inline_ = True diff --git a/rpython/jit/backend/llsupport/callbuilder.py b/rpython/jit/backend/llsupport/callbuilder.py --- a/rpython/jit/backend/llsupport/callbuilder.py +++ b/rpython/jit/backend/llsupport/callbuilder.py @@ -1,5 +1,4 @@ from rpython.rlib.clibffi import FFI_DEFAULT_ABI -from rpython.rlib.objectmodel import we_are_translated from rpython.rlib import rgil from rpython.rtyper.lltypesystem import lltype, rffi @@ -46,10 +45,7 @@ def emit_call_release_gil(self): """Emit a CALL_RELEASE_GIL, including calls to releasegil_addr and reacqgil_addr.""" - if we_are_translated(): - fastgil = rffi.cast(lltype.Signed, rgil.gil_fetch_fastgil()) - else: - fastgil = "NOT TRANSLATED" + fastgil = rffi.cast(lltype.Signed, rgil.gil_fetch_fastgil()) self.select_call_release_gil_mode() self.prepare_arguments() self.push_gcmap_for_call_release_gil() diff --git a/rpython/jit/backend/x86/callbuilder.py b/rpython/jit/backend/x86/callbuilder.py --- a/rpython/jit/backend/x86/callbuilder.py +++ b/rpython/jit/backend/x86/callbuilder.py @@ -126,10 +126,10 @@ self.asm.set_extra_stack_depth(self.mc, -delta * WORD) css_value = eax # + self.mc.MOV(heap(fastgil), css_value) + # if not we_are_translated(): # for testing: we should not access self.mc.ADD(ebp, imm(1)) # ebp any more; and ignore 'fastgil' - else: - self.mc.MOV(heap(fastgil), css_value) def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.x86.assembler import heap @@ -160,9 +160,7 @@ mc.LEA_rs(css_value.value, css) # mc.MOV(old_value, imm(1)) - if not we_are_translated(): - mc.MOV(old_value, css_value) # for testing: ignore 'fastgil' - elif rx86.fits_in_32bits(fastgil): + if rx86.fits_in_32bits(fastgil): mc.XCHG_rj(old_value.value, fastgil) else: mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil) diff --git a/rpython/translator/c/src/thread.h b/rpython/translator/c/src/thread.h --- a/rpython/translator/c/src/thread.h +++ b/rpython/translator/c/src/thread.h @@ -27,6 +27,8 @@ void RPyGilAllocate(void); long RPyGilYieldThread(void); void RPyGilAcquire(void); +#define RPyGilRelease _RPyGilRelease +#define RPyFetchFastGil _RPyFetchFastGil #ifdef PYPY_USE_ASMGCC # define RPY_FASTGIL_LOCKED(x) (x == 1) @@ -36,11 +38,11 @@ extern long rpy_fastgil; -static inline void RPyGilRelease(void) { +static inline void _RPyGilRelease(void) { assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); rpy_fastgil = 0; } -static inline long *RPyFetchFastGil(void) { +static inline long *_RPyFetchFastGil(void) { return &rpy_fastgil; } diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c --- a/rpython/translator/c/src/thread_gil.c +++ b/rpython/translator/c/src/thread_gil.c @@ -122,17 +122,6 @@ #endif } -/* -void RPyGilRelease(void) -{ - Releases the GIL in order to do an external function call. - We assume that the common case is that the function call is - actually very short, and optimize accordingly. - - Note: this function is defined as a 'static inline' in thread.h. -} -*/ - long RPyGilYieldThread(void) { /* can be called even before RPyGilAllocate(), but in this case, @@ -155,3 +144,26 @@ RPyGilAcquire(); return 1; } + +/********** for tests only **********/ + +/* These functions are usually defined as a macros RPyXyz() in thread.h + which get translated into calls to _RpyXyz(). But for tests we need + the real functions to exists in the library as well. +*/ + +#undef RPyGilRelease +void RPyGilRelease(void) +{ + /* Releases the GIL in order to do an external function call. + We assume that the common case is that the function call is + actually very short, and optimize accordingly. + */ + _RPyGilRelease(); +} + +#undef RPyFetchFastGil +long *RPyFetchFastGil(void) +{ + return _RPyFetchFastGil(); +} From noreply at buildbot.pypy.org Wed Jun 25 18:32:03 2014 From: noreply at buildbot.pypy.org (groggi) Date: Wed, 25 Jun 2014 18:32:03 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: introduce pin()/unpin() to the JIT Message-ID: <20140625163203.1747F1C33EC@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r72219:cfd8cfed9b5c Date: 2014-06-24 17:01 +0200 http://bitbucket.org/pypy/pypy/changeset/cfd8cfed9b5c/ Log: introduce pin()/unpin() to the JIT diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py --- a/rpython/jit/codewriter/jtransform.py +++ b/rpython/jit/codewriter/jtransform.py @@ -493,30 +493,32 @@ # XXX some of the following functions should not become residual calls # but be really compiled - rewrite_op_int_floordiv_ovf_zer = _do_builtin_call - rewrite_op_int_floordiv_ovf = _do_builtin_call - rewrite_op_int_floordiv_zer = _do_builtin_call - rewrite_op_int_mod_ovf_zer = _do_builtin_call - rewrite_op_int_mod_ovf = _do_builtin_call - rewrite_op_int_mod_zer = _do_builtin_call - rewrite_op_int_lshift_ovf = _do_builtin_call - rewrite_op_int_abs = _do_builtin_call - rewrite_op_llong_abs = _do_builtin_call - rewrite_op_llong_floordiv = _do_builtin_call - rewrite_op_llong_floordiv_zer = _do_builtin_call - rewrite_op_llong_mod = _do_builtin_call - rewrite_op_llong_mod_zer = _do_builtin_call - rewrite_op_ullong_floordiv = _do_builtin_call - rewrite_op_ullong_floordiv_zer = _do_builtin_call - rewrite_op_ullong_mod = _do_builtin_call - rewrite_op_ullong_mod_zer = _do_builtin_call - rewrite_op_gc_identityhash = _do_builtin_call - rewrite_op_gc_id = _do_builtin_call - rewrite_op_uint_mod = _do_builtin_call - rewrite_op_cast_float_to_uint = _do_builtin_call - rewrite_op_cast_uint_to_float = _do_builtin_call - rewrite_op_weakref_create = _do_builtin_call - rewrite_op_weakref_deref = _do_builtin_call + rewrite_op_int_floordiv_ovf_zer = _do_builtin_call + rewrite_op_int_floordiv_ovf = _do_builtin_call + rewrite_op_int_floordiv_zer = _do_builtin_call + rewrite_op_int_mod_ovf_zer = _do_builtin_call + rewrite_op_int_mod_ovf = _do_builtin_call + rewrite_op_int_mod_zer = _do_builtin_call + rewrite_op_int_lshift_ovf = _do_builtin_call + rewrite_op_int_abs = _do_builtin_call + rewrite_op_llong_abs = _do_builtin_call + rewrite_op_llong_floordiv = _do_builtin_call + rewrite_op_llong_floordiv_zer = _do_builtin_call + rewrite_op_llong_mod = _do_builtin_call + rewrite_op_llong_mod_zer = _do_builtin_call + rewrite_op_ullong_floordiv = _do_builtin_call + rewrite_op_ullong_floordiv_zer = _do_builtin_call + rewrite_op_ullong_mod = _do_builtin_call + rewrite_op_ullong_mod_zer = _do_builtin_call + rewrite_op_gc_identityhash = _do_builtin_call + rewrite_op_gc_id = _do_builtin_call + rewrite_op_gc_pin = _do_builtin_call + rewrite_op_gc_unpin = _do_builtin_call + rewrite_op_uint_mod = _do_builtin_call + rewrite_op_cast_float_to_uint = _do_builtin_call + rewrite_op_cast_uint_to_float = _do_builtin_call + rewrite_op_weakref_create = _do_builtin_call + rewrite_op_weakref_deref = _do_builtin_call rewrite_op_gc_add_memory_pressure = _do_builtin_call # ---------- diff --git a/rpython/jit/codewriter/support.py b/rpython/jit/codewriter/support.py --- a/rpython/jit/codewriter/support.py +++ b/rpython/jit/codewriter/support.py @@ -229,6 +229,13 @@ return llop.gc_id(lltype.Signed, ptr) +def _ll_1_gc_pin(ptr): + return llop.gc_pin(lltype.Bool, ptr) + +def _ll_1_gc_unpin(ptr): + llop.gc_unpin(lltype.Void, ptr) + + @oopspec("jit.force_virtual(inst)") def _ll_1_jit_force_virtual(inst): return llop.jit_force_virtual(lltype.typeOf(inst), inst) From noreply at buildbot.pypy.org Wed Jun 25 18:32:04 2014 From: noreply at buildbot.pypy.org (groggi) Date: Wed, 25 Jun 2014 18:32:04 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: (wip) add JIT support for object pinning Message-ID: <20140625163204.640921C33EC@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r72220:51aa811f0ec2 Date: 2014-06-24 17:41 +0200 http://bitbucket.org/pypy/pypy/changeset/51aa811f0ec2/ Log: (wip) add JIT support for object pinning diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py --- a/rpython/jit/backend/llsupport/gc.py +++ b/rpython/jit/backend/llsupport/gc.py @@ -8,8 +8,8 @@ from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref from rpython.translator.tool.cbuild import ExternalCompilationInfo from rpython.jit.codewriter import heaptracker -from rpython.jit.metainterp.history import ConstPtr, AbstractDescr -from rpython.jit.metainterp.resoperation import rop +from rpython.jit.metainterp.history import ConstPtr, AbstractDescr, BoxPtr, ConstInt +from rpython.jit.metainterp.resoperation import rop, ResOperation from rpython.jit.backend.llsupport import symbolic, jitframe from rpython.jit.backend.llsupport.symbolic import WORD from rpython.jit.backend.llsupport.descr import SizeDescr, ArrayDescr @@ -92,26 +92,70 @@ def gc_malloc_unicode(self, num_elem): return self._bh_malloc_array(num_elem, self.unicode_descr) - def _record_constptrs(self, op, gcrefs_output_list): + class PinnedObjectTracker(object): + """Simple helper class to keep informations regarding the 'GcArray' + in one place that is used to double load pinned objects. + """ + def __init__(self, cpu, size): + self._nextItem = 0 + self._refArrayType = lltype.GcArray(llmemory.GCREF) + self.refArrayDescr = cpu.arraydescrof(self._refArrayType) + self._refArray = lltype.malloc(self._refArrayType, size) + self.refArrayGCREF = lltype.cast_opaque_ptr(llmemory.GCREF, self._refArray) + + def add_ref(self, ref): + index = self._nextItem + self._nextItem += 1 + # + self._refArray[index] = ref + return index + + def _record_constptrs(self, op, gcrefs_output_list, pinnedObjTracker): + newops = [] for i in range(op.numargs()): v = op.getarg(i) if isinstance(v, ConstPtr) and bool(v.value): p = v.value - rgc._make_sure_does_not_move(p) - gcrefs_output_list.append(p) + if rgc._make_sure_does_not_move(p): + gcrefs_output_list.append(p) + else: + # encountered a moving pointer. Solve the problem by double + # loading the address to the pointer each run of the JITed code. + resultPtr = BoxPtr() + arrayIndex = pinnedObjTracker.add_ref(p) + loadOp = ResOperation(rop.GETARRAYITEM_GC, + [ConstPtr(pinnedObjTracker.refArrayGCREF), ConstInt(arrayIndex)], + resultPtr, + descr=pinnedObjTracker.refArrayDescr) + newops.append(loadOp) + op.setarg(i, resultPtr) + # if op.is_guard() or op.getopnum() == rop.FINISH: llref = cast_instance_to_gcref(op.getdescr()) - rgc._make_sure_does_not_move(llref) + if not rgc._make_sure_does_not_move(llref): + raise NotImplementedError("blub") # XXX handle (groggi) gcrefs_output_list.append(llref) + newops.append(op) + return newops def rewrite_assembler(self, cpu, operations, gcrefs_output_list): rewriter = GcRewriterAssembler(self, cpu) newops = rewriter.rewrite(operations) # record all GCREFs, because the GC (or Boehm) cannot see them and # keep them alive if they end up as constants in the assembler + + # XXX add comment (groggi) + # XXX handle size in a not constant way? Get it from the GC? (groggi) + pinnedObjTracker = self.PinnedObjectTracker(cpu, 100) + gcrefs_output_list.append(pinnedObjTracker.refArrayGCREF) + rgc._make_sure_does_not_move(pinnedObjTracker.refArrayGCREF) + + newnewops = [] # XXX better name... (groggi) + for op in newops: - self._record_constptrs(op, gcrefs_output_list) - return newops + ops = self._record_constptrs(op, gcrefs_output_list, pinnedObjTracker) + newnewops.extend(ops) + return newnewops @specialize.memo() def getframedescrs(self, cpu): From noreply at buildbot.pypy.org Wed Jun 25 18:32:05 2014 From: noreply at buildbot.pypy.org (groggi) Date: Wed, 25 Jun 2014 18:32:05 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: (wip) support tests with a nonfunctional pin feature inside rgc Message-ID: <20140625163205.913861C33EC@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r72221:cbae297c7a2b Date: 2014-06-24 17:50 +0200 http://bitbucket.org/pypy/pypy/changeset/cbae297c7a2b/ Log: (wip) support tests with a nonfunctional pin feature inside rgc diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py --- a/rpython/rlib/rgc.py +++ b/rpython/rlib/rgc.py @@ -18,6 +18,12 @@ """ pass +# for test purposes we allow objects to be pinned and use +# the following list to keep track of the pinned objects +# XXX think about possible unexpected behavior (groggi) +if not we_are_translated(): + pinned_objects = [] + def pin(obj): """If 'obj' can move, then attempt to temporarily fix it. This function returns True if and only if 'obj' could be pinned; this is @@ -40,7 +46,12 @@ Note further that pinning an object does not prevent it from being collected if it is not used anymore. """ - return False + if we_are_translated(): + return False + else: + pinned_objects.append(obj) + return True + class PinEntry(ExtRegistryEntry): _about_ = pin @@ -57,8 +68,12 @@ """Unpin 'obj', allowing it to move again. Must only be called after a call to pin(obj) returned True. """ - raise AssertionError("pin() always returns False, " - "so unpin() should not be called") + if we_are_translated(): + raise AssertionError("pin() always returns False, " + "so unpin() should not be called") + else: + pinned_objects.remove(obj) + class UnpinEntry(ExtRegistryEntry): _about_ = unpin @@ -145,7 +160,8 @@ on objects that are already a bit old, so have a chance to be already non-movable.""" if not we_are_translated(): - return True # XXX: check if True is the right return (groggi) + return p not in pinned_objects + # if _is_pinned(p): return False i = 0 From noreply at buildbot.pypy.org Wed Jun 25 18:32:06 2014 From: noreply at buildbot.pypy.org (groggi) Date: Wed, 25 Jun 2014 18:32:06 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: (wip) JIT support for pinned objects translates now Message-ID: <20140625163206.D41031C33EC@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r72222:1b72f8089e13 Date: 2014-06-25 12:20 +0200 http://bitbucket.org/pypy/pypy/changeset/1b72f8089e13/ Log: (wip) JIT support for pinned objects translates now diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py --- a/rpython/jit/backend/llsupport/gc.py +++ b/rpython/jit/backend/llsupport/gc.py @@ -19,6 +19,26 @@ from rpython.jit.backend.llsupport.rewrite import GcRewriterAssembler from rpython.memory.gctransform import asmgcroot +class PinnedObjectTracker(object): + """Simple helper class to keep informations regarding the 'GcArray' + in one place that is used to double load pinned objects. + """ + + _ref_array_type = lltype.GcArray(llmemory.GCREF) + + def __init__(self, cpu, size): + self._next_item = 0 + self._ref_array = lltype.malloc(PinnedObjectTracker._ref_array_type, size) + self.ref_array_descr = cpu.arraydescrof(PinnedObjectTracker._ref_array_type) + self.ref_array_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, self._ref_array) + + def add_ref(self, ref): + index = self._next_item + self._next_item += 1 + # + self._ref_array[index] = ref + return index + # ____________________________________________________________ class GcLLDescription(GcCache): @@ -92,25 +112,7 @@ def gc_malloc_unicode(self, num_elem): return self._bh_malloc_array(num_elem, self.unicode_descr) - class PinnedObjectTracker(object): - """Simple helper class to keep informations regarding the 'GcArray' - in one place that is used to double load pinned objects. - """ - def __init__(self, cpu, size): - self._nextItem = 0 - self._refArrayType = lltype.GcArray(llmemory.GCREF) - self.refArrayDescr = cpu.arraydescrof(self._refArrayType) - self._refArray = lltype.malloc(self._refArrayType, size) - self.refArrayGCREF = lltype.cast_opaque_ptr(llmemory.GCREF, self._refArray) - - def add_ref(self, ref): - index = self._nextItem - self._nextItem += 1 - # - self._refArray[index] = ref - return index - - def _record_constptrs(self, op, gcrefs_output_list, pinnedObjTracker): + def _record_constptrs(self, op, gcrefs_output_list, pinned_obj_tracker): newops = [] for i in range(op.numargs()): v = op.getarg(i) @@ -119,16 +121,17 @@ if rgc._make_sure_does_not_move(p): gcrefs_output_list.append(p) else: - # encountered a moving pointer. Solve the problem by double - # loading the address to the pointer each run of the JITed code. - resultPtr = BoxPtr() - arrayIndex = pinnedObjTracker.add_ref(p) - loadOp = ResOperation(rop.GETARRAYITEM_GC, - [ConstPtr(pinnedObjTracker.refArrayGCREF), ConstInt(arrayIndex)], - resultPtr, - descr=pinnedObjTracker.refArrayDescr) - newops.append(loadOp) - op.setarg(i, resultPtr) + # encountered a pointer that points to a possibly moving object. + # Solve the problem by double loading the address to the object + # each run of the JITed code. + result_ptr = BoxPtr() + array_index = pinned_obj_tracker.add_ref(p) + load_op = ResOperation(rop.GETARRAYITEM_GC, + [ConstPtr(pinned_obj_tracker.ref_array_gcref), ConstInt(array_index)], + result_ptr, + descr=pinned_obj_tracker.ref_array_descr) + newops.append(load_op) + op.setarg(i, result_ptr) # if op.is_guard() or op.getopnum() == rop.FINISH: llref = cast_instance_to_gcref(op.getdescr()) @@ -138,6 +141,9 @@ newops.append(op) return newops + if not we_are_translated(): + last_pinned_object_tracker = None + def rewrite_assembler(self, cpu, operations, gcrefs_output_list): rewriter = GcRewriterAssembler(self, cpu) newops = rewriter.rewrite(operations) @@ -146,14 +152,17 @@ # XXX add comment (groggi) # XXX handle size in a not constant way? Get it from the GC? (groggi) - pinnedObjTracker = self.PinnedObjectTracker(cpu, 100) - gcrefs_output_list.append(pinnedObjTracker.refArrayGCREF) - rgc._make_sure_does_not_move(pinnedObjTracker.refArrayGCREF) + pinned_obj_tracker = PinnedObjectTracker(cpu, 100) + if not we_are_translated(): + self.last_pinned_object_tracker = pinned_obj_tracker + print "blub: %r" % self.last_pinned_object_tracker + gcrefs_output_list.append(pinned_obj_tracker.ref_array_gcref) + rgc._make_sure_does_not_move(pinned_obj_tracker.ref_array_gcref) newnewops = [] # XXX better name... (groggi) for op in newops: - ops = self._record_constptrs(op, gcrefs_output_list, pinnedObjTracker) + ops = self._record_constptrs(op, gcrefs_output_list, pinned_obj_tracker) newnewops.extend(ops) return newnewops diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py --- a/rpython/jit/backend/llsupport/llmodel.py +++ b/rpython/jit/backend/llsupport/llmodel.py @@ -299,6 +299,7 @@ return ofs, size, sign unpack_fielddescr_size._always_inline_ = True + @specialize.memo() def arraydescrof(self, A): return get_array_descr(self.gc_ll_descr, A) From noreply at buildbot.pypy.org Wed Jun 25 18:32:08 2014 From: noreply at buildbot.pypy.org (groggi) Date: Wed, 25 Jun 2014 18:32:08 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: ups. remove the "blub" message Message-ID: <20140625163208.1C14E1C33EC@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r72223:239b330023a8 Date: 2014-06-25 15:42 +0200 http://bitbucket.org/pypy/pypy/changeset/239b330023a8/ Log: ups. remove the "blub" message diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py --- a/rpython/jit/backend/llsupport/gc.py +++ b/rpython/jit/backend/llsupport/gc.py @@ -155,7 +155,6 @@ pinned_obj_tracker = PinnedObjectTracker(cpu, 100) if not we_are_translated(): self.last_pinned_object_tracker = pinned_obj_tracker - print "blub: %r" % self.last_pinned_object_tracker gcrefs_output_list.append(pinned_obj_tracker.ref_array_gcref) rgc._make_sure_does_not_move(pinned_obj_tracker.ref_array_gcref) From noreply at buildbot.pypy.org Wed Jun 25 18:32:09 2014 From: noreply at buildbot.pypy.org (groggi) Date: Wed, 25 Jun 2014 18:32:09 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: (wip) first test for JIT's object pinning support Message-ID: <20140625163209.45A2E1C33EC@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r72224:373e93cab4c5 Date: 2014-06-25 18:30 +0200 http://bitbucket.org/pypy/pypy/changeset/373e93cab4c5/ Log: (wip) first test for JIT's object pinning support diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py --- a/rpython/jit/backend/llsupport/gc.py +++ b/rpython/jit/backend/llsupport/gc.py @@ -141,9 +141,6 @@ newops.append(op) return newops - if not we_are_translated(): - last_pinned_object_tracker = None - def rewrite_assembler(self, cpu, operations, gcrefs_output_list): rewriter = GcRewriterAssembler(self, cpu) newops = rewriter.rewrite(operations) diff --git a/rpython/jit/backend/llsupport/test/test_object_pinning_rewrite.py b/rpython/jit/backend/llsupport/test/test_object_pinning_rewrite.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/llsupport/test/test_object_pinning_rewrite.py @@ -0,0 +1,127 @@ +from test_rewrite import get_size_descr, get_array_descr, get_description, BaseFakeCPU +from rpython.jit.backend.llsupport.descr import get_size_descr,\ + get_field_descr, get_array_descr, ArrayDescr, FieldDescr,\ + SizeDescrWithVTable, get_interiorfield_descr +from rpython.jit.backend.llsupport.gc import GcLLDescr_boehm,\ + GcLLDescr_framework, PinnedObjectTracker +from rpython.jit.backend.llsupport import jitframe, gc +from rpython.jit.metainterp.gc import get_description +from rpython.jit.tool.oparser import parse +from rpython.jit.metainterp.optimizeopt.util import equaloplists +from rpython.jit.codewriter.heaptracker import register_known_gctype +from rpython.jit.metainterp.history import JitCellToken, FLOAT +from rpython.rtyper.lltypesystem import lltype, rclass, rffi, lltype, llmemory +from rpython.jit.backend.x86.arch import WORD +from rpython.rlib import rgc + +class Evaluator(object): + def __init__(self, scope): + self.scope = scope + def __getitem__(self, key): + return eval(key, self.scope) + + +class FakeLoopToken(object): + pass + +# The following class is based on rpython.jit.backend.llsupport.test.test_rewrite.RewriteTests. +# It's modified to be able to test the object pinning specific features. +class RewriteTests(object): + def check_rewrite(self, frm_operations, to_operations, **namespace): + # objects to use inside the test + A = lltype.GcArray(lltype.Signed) + adescr = get_array_descr(self.gc_ll_descr, A) + adescr.tid = 4321 + alendescr = adescr.lendescr + # + pinned_obj_type = lltype.GcStruct('PINNED_STRUCT', ('my_int', lltype.Signed)) + pinned_obj_my_int_descr = get_field_descr(self.gc_ll_descr, pinned_obj_type, 'my_int') + pinned_obj_ptr = lltype.malloc(pinned_obj_type) + pinned_obj_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, pinned_obj_ptr) + assert rgc.pin(pinned_obj_gcref) + # + ref_array_descr = self.cpu.arraydescrof(PinnedObjectTracker._ref_array_type) + # + vtable_descr = self.gc_ll_descr.fielddescr_vtable + O = lltype.GcStruct('O', ('parent', rclass.OBJECT), + ('x', lltype.Signed)) + o_vtable = lltype.malloc(rclass.OBJECT_VTABLE, immortal=True) + register_known_gctype(self.cpu, o_vtable, O) + # + tiddescr = self.gc_ll_descr.fielddescr_tid + wbdescr = self.gc_ll_descr.write_barrier_descr + WORD = globals()['WORD'] + # + strdescr = self.gc_ll_descr.str_descr + unicodedescr = self.gc_ll_descr.unicode_descr + strlendescr = strdescr.lendescr + unicodelendescr = unicodedescr.lendescr + + casmdescr = JitCellToken() + clt = FakeLoopToken() + clt._ll_initial_locs = [0, 8] + frame_info = lltype.malloc(jitframe.JITFRAMEINFO, flavor='raw') + clt.frame_info = frame_info + frame_info.jfi_frame_depth = 13 + frame_info.jfi_frame_size = 255 + framedescrs = self.gc_ll_descr.getframedescrs(self.cpu) + framelendescr = framedescrs.arraydescr.lendescr + jfi_frame_depth = framedescrs.jfi_frame_depth + jfi_frame_size = framedescrs.jfi_frame_size + jf_frame_info = framedescrs.jf_frame_info + signedframedescr = self.cpu.signedframedescr + floatframedescr = self.cpu.floatframedescr + casmdescr.compiled_loop_token = clt + tzdescr = None # noone cares + # + namespace.update(locals()) + # + for funcname in self.gc_ll_descr._generated_functions: + namespace[funcname] = self.gc_ll_descr.get_malloc_fn(funcname) + namespace[funcname + '_descr'] = getattr(self.gc_ll_descr, + '%s_descr' % funcname) + # + ops = parse(frm_operations, namespace=namespace) + operations = self.gc_ll_descr.rewrite_assembler(self.cpu, + ops.operations, + []) + # make the array containing the GCREF's accessible inside the tests. + # This must be done after we call 'rewrite_assembler'. Before that + # call 'last_pinned_object_tracker' is None or filled with some old + # value. + namespace['ref_array_gcref'] = self.gc_ll_descr.last_pinned_object_tracker.ref_array_gcref + expected = parse(to_operations % Evaluator(namespace), + namespace=namespace) + equaloplists(operations, expected.operations) + lltype.free(frame_info, flavor='raw') + +class TestFramework(RewriteTests): + def setup_method(self, meth): + class config_(object): + class translation(object): + gc = 'minimark' + gcrootfinder = 'asmgcc' + gctransformer = 'framework' + gcremovetypeptr = False + gcdescr = get_description(config_) + self.gc_ll_descr = GcLLDescr_framework(gcdescr, None, None, None, + really_not_translated=True) + self.gc_ll_descr.write_barrier_descr.has_write_barrier_from_array = ( + lambda cpu: True) + # + class FakeCPU(BaseFakeCPU): + def sizeof(self, STRUCT): + descr = SizeDescrWithVTable(104) + descr.tid = 9315 + return descr + self.cpu = FakeCPU() + + def test_simple_getfield(self): + self.check_rewrite(""" + [] + i0 = getfield_gc(ConstPtr(pinned_obj_gcref), descr=pinned_obj_my_int_descr) + """, """ + [] + p1 = getarrayitem_gc(ConstPtr(ref_array_gcref), 0, descr=ref_array_descr) + i0 = getfield_gc(p1, descr=pinned_obj_my_int_descr) + """) From noreply at buildbot.pypy.org Wed Jun 25 18:36:39 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 25 Jun 2014 18:36:39 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: Partial revert of 40e4dd588bd7. Message-ID: <20140625163639.99D691C33EC@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72225:c43e39223544 Date: 2014-06-25 18:07 +0200 http://bitbucket.org/pypy/pypy/changeset/c43e39223544/ Log: Partial revert of 40e4dd588bd7. diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c --- a/rpython/translator/c/src/thread_gil.c +++ b/rpython/translator/c/src/thread_gil.c @@ -53,6 +53,11 @@ void RPyGilAcquire(void) { /* Acquires the GIL. + + XXX Note: this function saves and restores 'errno'. This is + needed for now because it may be *followed* by reading the + 'errno', although it's kind of bogus: it should be read before + calling RPyGilAcquire(). */ long old_fastgil = lock_test_and_set(&rpy_fastgil, 1); @@ -63,6 +68,7 @@ } else { /* Otherwise, another thread is busy with the GIL. */ + int old_errno = errno; /* Register me as one of the threads that is actively waiting for the GIL. The number of such threads is found in @@ -102,6 +108,8 @@ } atomic_decrement(&rpy_waiting_threads); mutex_unlock(&mutex_gil_stealer); + + errno = old_errno; } assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); From noreply at buildbot.pypy.org Wed Jun 25 18:36:40 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 25 Jun 2014 18:36:40 +0200 (CEST) Subject: [pypy-commit] pypy default: Don't invalidate already-compiled pieces of JIT code when we only Message-ID: <20140625163640.E3DC81C33EC@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72226:cc99ff68804a Date: 2014-06-25 18:36 +0200 http://bitbucket.org/pypy/pypy/changeset/cc99ff68804a/ Log: Don't invalidate already-compiled pieces of JIT code when we only *add* new global variables, but only if we *change* old globals. diff --git a/pypy/objspace/std/celldict.py b/pypy/objspace/std/celldict.py --- a/pypy/objspace/std/celldict.py +++ b/pypy/objspace/std/celldict.py @@ -16,7 +16,7 @@ class ModuleCell(W_Root): - def __init__(self, w_value=None): + def __init__(self, w_value): self.w_value = w_value def __repr__(self): @@ -49,17 +49,24 @@ return self.erase({}) def mutated(self): + # A mutation means changing an existing key to point to a new value. + # A value is either a regular wrapped object, or a ModuleCell if we + # detect mutations. It means that each existing key can only trigger + # a mutation at most once. self.version = VersionTag() - def getdictvalue_no_unwrapping(self, w_dict, key): + def dictvalue_no_unwrapping(self, w_dict, key): # NB: it's important to promote self here, so that self.version is a # no-op due to the quasi-immutable field self = jit.promote(self) - return self._getdictvalue_no_unwrapping_pure(self.version, w_dict, key) + return self._dictvalue_no_unwrapping_pure(self.version, w_dict, key) @jit.elidable_promote('0,1,2') - def _getdictvalue_no_unwrapping_pure(self, version, w_dict, key): - return self.unerase(w_dict.dstorage).get(key, None) + def _dictvalue_no_unwrapping_pure(self, version, w_dict, key): + # may raise KeyError. If it does, then the JIT is prevented from + # considering this function as elidable. This is what lets us add + # new keys to the dictionary without changing the version. + return self.unerase(w_dict.dstorage)[key] def setitem(self, w_dict, w_key, w_value): space = self.space @@ -70,17 +77,20 @@ w_dict.setitem(w_key, w_value) def setitem_str(self, w_dict, key, w_value): - cell = self.getdictvalue_no_unwrapping(w_dict, key) - if isinstance(cell, ModuleCell): - cell.w_value = w_value - return - if cell is not None: + try: + cell = self.dictvalue_no_unwrapping(w_dict, key) + except KeyError: + pass + else: + if isinstance(cell, ModuleCell): + cell.w_value = w_value + return # If the new value and the current value are the same, don't # create a level of indirection, or mutate the version. if self.space.is_w(w_value, cell): return w_value = ModuleCell(w_value) - self.mutated() + self.mutated() self.unerase(w_dict.dstorage)[key] = w_value def setdefault(self, w_dict, w_key, w_default): @@ -130,7 +140,10 @@ return w_dict.getitem(w_key) def getitem_str(self, w_dict, key): - cell = self.getdictvalue_no_unwrapping(w_dict, key) + try: + cell = self.dictvalue_no_unwrapping(w_dict, key) + except KeyError: + return None return unwrap_cell(cell) def w_keys(self, w_dict): diff --git a/pypy/objspace/std/test/test_celldict.py b/pypy/objspace/std/test/test_celldict.py --- a/pypy/objspace/std/test/test_celldict.py +++ b/pypy/objspace/std/test/test_celldict.py @@ -21,27 +21,27 @@ w_key = self.FakeString(key) d.setitem(w_key, 1) v2 = strategy.version - assert v1 is not v2 + assert v1 is v2 # doesn't change when adding new keys assert d.getitem(w_key) == 1 - assert d.strategy.getdictvalue_no_unwrapping(d, key) == 1 + assert d.strategy.dictvalue_no_unwrapping(d, key) == 1 d.setitem(w_key, 2) v3 = strategy.version assert v2 is not v3 assert d.getitem(w_key) == 2 - assert d.strategy.getdictvalue_no_unwrapping(d, key).w_value == 2 + assert d.strategy.dictvalue_no_unwrapping(d, key).w_value == 2 d.setitem(w_key, 3) v4 = strategy.version assert v3 is v4 assert d.getitem(w_key) == 3 - assert d.strategy.getdictvalue_no_unwrapping(d, key).w_value == 3 + assert d.strategy.dictvalue_no_unwrapping(d, key).w_value == 3 d.delitem(w_key) v5 = strategy.version assert v5 is not v4 assert d.getitem(w_key) is None - assert d.strategy.getdictvalue_no_unwrapping(d, key) is None + py.test.raises(KeyError, d.strategy.dictvalue_no_unwrapping, d, key) def test_same_key_set_twice(self): strategy = ModuleDictStrategy(space) @@ -52,7 +52,7 @@ x = object() d.setitem("a", x) v2 = strategy.version - assert v1 is not v2 + assert v1 is v2 d.setitem("a", x) v3 = strategy.version assert v2 is v3 From noreply at buildbot.pypy.org Wed Jun 25 18:50:42 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Wed, 25 Jun 2014 18:50:42 +0200 (CEST) Subject: [pypy-commit] pypy gc_no_cleanup_nursery: modify class _uninitialized 's attribute _TYPE to TYPE Message-ID: <20140625165042.1F8201D2A6C@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc_no_cleanup_nursery Changeset: r72227:6ae62c5e4941 Date: 2014-06-25 12:49 -0400 http://bitbucket.org/pypy/pypy/changeset/6ae62c5e4941/ Log: modify class _uninitialized 's attribute _TYPE to TYPE diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -57,7 +57,6 @@ from rpython.rtyper.lltypesystem import lltype, llmemory, llarena, llgroup from rpython.rtyper.lltypesystem.lloperation import llop from rpython.rtyper.lltypesystem.llmemory import raw_malloc_usage -from rpython.memory.gctypelayout import zero_gc_pointers from rpython.memory.gc.base import GCBase, MovingGCBase from rpython.memory.gc import env from rpython.memory.support import mangle_hash diff --git a/rpython/memory/gc/test/test_direct.py b/rpython/memory/gc/test/test_direct.py --- a/rpython/memory/gc/test/test_direct.py +++ b/rpython/memory/gc/test/test_direct.py @@ -46,7 +46,6 @@ if collect_stack_root: stackroots = self.tester.stackroots a = lltype.malloc(ADDR_ARRAY, len(stackroots), flavor='raw') - zero_gc_pointers_inside(a, ADDR_ARRAY) for i in range(len(a)): a[i] = llmemory.cast_ptr_to_adr(stackroots[i]) a_base = lltype.direct_arrayitems(a) diff --git a/rpython/rtyper/lltypesystem/lltype.py b/rpython/rtyper/lltypesystem/lltype.py --- a/rpython/rtyper/lltypesystem/lltype.py +++ b/rpython/rtyper/lltypesystem/lltype.py @@ -47,7 +47,7 @@ class _uninitialized(object): def __init__(self, TYPE): - self.TYPE = TYPE + self._TYPE = TYPE def __repr__(self): return ''%(self.TYPE,) From noreply at buildbot.pypy.org Wed Jun 25 19:29:16 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 25 Jun 2014 19:29:16 +0200 (CEST) Subject: [pypy-commit] cffi default: A test for checking the multithreaded safety of errno (and GetLastError Message-ID: <20140625172916.DF4BE1D2A6C@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r1530:2db43585e0b2 Date: 2014-06-25 19:28 +0200 http://bitbucket.org/cffi/cffi/changeset/2db43585e0b2/ Log: A test for checking the multithreaded safety of errno (and GetLastError on Windows). diff --git a/testing/test_verify.py b/testing/test_verify.py --- a/testing/test_verify.py +++ b/testing/test_verify.py @@ -1905,3 +1905,60 @@ p = lib.f2(42) x = lib.f1(p) assert x == 42 + +def _run_in_multiple_threads(test1): + test1() + import sys + try: + import thread + except ImportError: + import _thread as thread + errors = [] + def wrapper(lock): + try: + test1() + except: + errors.append(sys.exc_info()) + lock.release() + locks = [] + for i in range(10): + _lock = thread.allocate_lock() + _lock.acquire() + thread.start_new_thread(wrapper, (_lock,)) + locks.append(_lock) + for _lock in locks: + _lock.acquire() + if errors: + raise errors[0][1] + +def test_errno_working_even_with_pypys_jit(): + ffi = FFI() + ffi.cdef("int f(int);") + lib = ffi.verify(""" + #include + int f(int x) { return (errno = errno + x); } + """) + @_run_in_multiple_threads + def test1(): + ffi.errno = 0 + for i in range(10000): + e = lib.f(1) + assert e == i + 1 + assert ffi.errno == e + for i in range(10000): + ffi.errno = i + e = lib.f(42) + assert e == i + 42 + +def test_getlasterror_working_even_with_pypys_jit(): + if sys.platform != 'win32': + py.test.skip("win32-only test") + ffi = FFI() + ffi.cdef("void SetLastError(DWORD);") + lib = ffi.dlopen("Kernel32.dll") + @_run_in_multiple_threads + def test1(): + for i in range(10000): + n = (1 << 29) + i + lib.SetLastError(n) + assert ffi.getwinerror()[0] == n From noreply at buildbot.pypy.org Wed Jun 25 19:34:09 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 25 Jun 2014 19:34:09 +0200 (CEST) Subject: [pypy-commit] pypy default: import cffi/2db43585e0b2 Message-ID: <20140625173409.4EB641D2A7B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72228:4841c3bead14 Date: 2014-06-25 19:33 +0200 http://bitbucket.org/pypy/pypy/changeset/4841c3bead14/ Log: import cffi/2db43585e0b2 diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_verify.py b/pypy/module/test_lib_pypy/cffi_tests/test_verify.py --- a/pypy/module/test_lib_pypy/cffi_tests/test_verify.py +++ b/pypy/module/test_lib_pypy/cffi_tests/test_verify.py @@ -1906,3 +1906,60 @@ p = lib.f2(42) x = lib.f1(p) assert x == 42 + +def _run_in_multiple_threads(test1): + test1() + import sys + try: + import thread + except ImportError: + import _thread as thread + errors = [] + def wrapper(lock): + try: + test1() + except: + errors.append(sys.exc_info()) + lock.release() + locks = [] + for i in range(10): + _lock = thread.allocate_lock() + _lock.acquire() + thread.start_new_thread(wrapper, (_lock,)) + locks.append(_lock) + for _lock in locks: + _lock.acquire() + if errors: + raise errors[0][1] + +def test_errno_working_even_with_pypys_jit(): + ffi = FFI() + ffi.cdef("int f(int);") + lib = ffi.verify(""" + #include + int f(int x) { return (errno = errno + x); } + """) + @_run_in_multiple_threads + def test1(): + ffi.errno = 0 + for i in range(10000): + e = lib.f(1) + assert e == i + 1 + assert ffi.errno == e + for i in range(10000): + ffi.errno = i + e = lib.f(42) + assert e == i + 42 + +def test_getlasterror_working_even_with_pypys_jit(): + if sys.platform != 'win32': + py.test.skip("win32-only test") + ffi = FFI() + ffi.cdef("void SetLastError(DWORD);") + lib = ffi.dlopen("Kernel32.dll") + @_run_in_multiple_threads + def test1(): + for i in range(10000): + n = (1 << 29) + i + lib.SetLastError(n) + assert ffi.getwinerror()[0] == n From noreply at buildbot.pypy.org Wed Jun 25 19:34:10 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 25 Jun 2014 19:34:10 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: hg merge default Message-ID: <20140625173410.972EA1D2A7B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72229:ed9493101461 Date: 2014-06-25 19:33 +0200 http://bitbucket.org/pypy/pypy/changeset/ed9493101461/ Log: hg merge default diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_verify.py b/pypy/module/test_lib_pypy/cffi_tests/test_verify.py --- a/pypy/module/test_lib_pypy/cffi_tests/test_verify.py +++ b/pypy/module/test_lib_pypy/cffi_tests/test_verify.py @@ -1906,3 +1906,60 @@ p = lib.f2(42) x = lib.f1(p) assert x == 42 + +def _run_in_multiple_threads(test1): + test1() + import sys + try: + import thread + except ImportError: + import _thread as thread + errors = [] + def wrapper(lock): + try: + test1() + except: + errors.append(sys.exc_info()) + lock.release() + locks = [] + for i in range(10): + _lock = thread.allocate_lock() + _lock.acquire() + thread.start_new_thread(wrapper, (_lock,)) + locks.append(_lock) + for _lock in locks: + _lock.acquire() + if errors: + raise errors[0][1] + +def test_errno_working_even_with_pypys_jit(): + ffi = FFI() + ffi.cdef("int f(int);") + lib = ffi.verify(""" + #include + int f(int x) { return (errno = errno + x); } + """) + @_run_in_multiple_threads + def test1(): + ffi.errno = 0 + for i in range(10000): + e = lib.f(1) + assert e == i + 1 + assert ffi.errno == e + for i in range(10000): + ffi.errno = i + e = lib.f(42) + assert e == i + 42 + +def test_getlasterror_working_even_with_pypys_jit(): + if sys.platform != 'win32': + py.test.skip("win32-only test") + ffi = FFI() + ffi.cdef("void SetLastError(DWORD);") + lib = ffi.dlopen("Kernel32.dll") + @_run_in_multiple_threads + def test1(): + for i in range(10000): + n = (1 << 29) + i + lib.SetLastError(n) + assert ffi.getwinerror()[0] == n diff --git a/pypy/objspace/std/celldict.py b/pypy/objspace/std/celldict.py --- a/pypy/objspace/std/celldict.py +++ b/pypy/objspace/std/celldict.py @@ -16,7 +16,7 @@ class ModuleCell(W_Root): - def __init__(self, w_value=None): + def __init__(self, w_value): self.w_value = w_value def __repr__(self): @@ -49,17 +49,24 @@ return self.erase({}) def mutated(self): + # A mutation means changing an existing key to point to a new value. + # A value is either a regular wrapped object, or a ModuleCell if we + # detect mutations. It means that each existing key can only trigger + # a mutation at most once. self.version = VersionTag() - def getdictvalue_no_unwrapping(self, w_dict, key): + def dictvalue_no_unwrapping(self, w_dict, key): # NB: it's important to promote self here, so that self.version is a # no-op due to the quasi-immutable field self = jit.promote(self) - return self._getdictvalue_no_unwrapping_pure(self.version, w_dict, key) + return self._dictvalue_no_unwrapping_pure(self.version, w_dict, key) @jit.elidable_promote('0,1,2') - def _getdictvalue_no_unwrapping_pure(self, version, w_dict, key): - return self.unerase(w_dict.dstorage).get(key, None) + def _dictvalue_no_unwrapping_pure(self, version, w_dict, key): + # may raise KeyError. If it does, then the JIT is prevented from + # considering this function as elidable. This is what lets us add + # new keys to the dictionary without changing the version. + return self.unerase(w_dict.dstorage)[key] def setitem(self, w_dict, w_key, w_value): space = self.space @@ -70,17 +77,20 @@ w_dict.setitem(w_key, w_value) def setitem_str(self, w_dict, key, w_value): - cell = self.getdictvalue_no_unwrapping(w_dict, key) - if isinstance(cell, ModuleCell): - cell.w_value = w_value - return - if cell is not None: + try: + cell = self.dictvalue_no_unwrapping(w_dict, key) + except KeyError: + pass + else: + if isinstance(cell, ModuleCell): + cell.w_value = w_value + return # If the new value and the current value are the same, don't # create a level of indirection, or mutate the version. if self.space.is_w(w_value, cell): return w_value = ModuleCell(w_value) - self.mutated() + self.mutated() self.unerase(w_dict.dstorage)[key] = w_value def setdefault(self, w_dict, w_key, w_default): @@ -130,7 +140,10 @@ return w_dict.getitem(w_key) def getitem_str(self, w_dict, key): - cell = self.getdictvalue_no_unwrapping(w_dict, key) + try: + cell = self.dictvalue_no_unwrapping(w_dict, key) + except KeyError: + return None return unwrap_cell(cell) def w_keys(self, w_dict): diff --git a/pypy/objspace/std/test/test_celldict.py b/pypy/objspace/std/test/test_celldict.py --- a/pypy/objspace/std/test/test_celldict.py +++ b/pypy/objspace/std/test/test_celldict.py @@ -21,27 +21,27 @@ w_key = self.FakeString(key) d.setitem(w_key, 1) v2 = strategy.version - assert v1 is not v2 + assert v1 is v2 # doesn't change when adding new keys assert d.getitem(w_key) == 1 - assert d.strategy.getdictvalue_no_unwrapping(d, key) == 1 + assert d.strategy.dictvalue_no_unwrapping(d, key) == 1 d.setitem(w_key, 2) v3 = strategy.version assert v2 is not v3 assert d.getitem(w_key) == 2 - assert d.strategy.getdictvalue_no_unwrapping(d, key).w_value == 2 + assert d.strategy.dictvalue_no_unwrapping(d, key).w_value == 2 d.setitem(w_key, 3) v4 = strategy.version assert v3 is v4 assert d.getitem(w_key) == 3 - assert d.strategy.getdictvalue_no_unwrapping(d, key).w_value == 3 + assert d.strategy.dictvalue_no_unwrapping(d, key).w_value == 3 d.delitem(w_key) v5 = strategy.version assert v5 is not v4 assert d.getitem(w_key) is None - assert d.strategy.getdictvalue_no_unwrapping(d, key) is None + py.test.raises(KeyError, d.strategy.dictvalue_no_unwrapping, d, key) def test_same_key_set_twice(self): strategy = ModuleDictStrategy(space) @@ -52,7 +52,7 @@ x = object() d.setitem("a", x) v2 = strategy.version - assert v1 is not v2 + assert v1 is v2 d.setitem("a", x) v3 = strategy.version assert v2 is v3 From noreply at buildbot.pypy.org Wed Jun 25 19:48:07 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 25 Jun 2014 19:48:07 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: On Windows, save the LastError in addition to the errno Message-ID: <20140625174807.088E51D2A7B@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72230:a0d7c3cc50e7 Date: 2014-06-25 19:47 +0200 http://bitbucket.org/pypy/pypy/changeset/a0d7c3cc50e7/ Log: On Windows, save the LastError in addition to the errno diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c --- a/rpython/translator/c/src/thread_gil.c +++ b/rpython/translator/c/src/thread_gil.c @@ -68,7 +68,7 @@ } else { /* Otherwise, another thread is busy with the GIL. */ - int old_errno = errno; + SAVE_ERRNO(); /* Register me as one of the threads that is actively waiting for the GIL. The number of such threads is found in @@ -109,7 +109,7 @@ atomic_decrement(&rpy_waiting_threads); mutex_unlock(&mutex_gil_stealer); - errno = old_errno; + RESTORE_ERRNO(); } assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); diff --git a/rpython/translator/c/src/thread_nt.c b/rpython/translator/c/src/thread_nt.c --- a/rpython/translator/c/src/thread_nt.c +++ b/rpython/translator/c/src/thread_nt.c @@ -227,4 +227,9 @@ #define atomic_increment(ptr) InterlockedIncrement(ptr) #define atomic_decrement(ptr) InterlockedDecrement(ptr) +#define SAVE_ERRNO() int saved_errno = errno; \ + DWORD saved_lasterr = GetLastError() +#define RESTORE_ERRNO() errno = saved_errno; \ + SetLastError(saved_lasterr) + #include "src/thread_gil.c" diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c --- a/rpython/translator/c/src/thread_pthread.c +++ b/rpython/translator/c/src/thread_pthread.c @@ -517,4 +517,7 @@ #define atomic_increment(ptr) __sync_fetch_and_add(ptr, 1) #define atomic_decrement(ptr) __sync_fetch_and_sub(ptr, 1) +#define SAVE_ERRNO() int saved_errno = errno +#define RESTORE_ERRNO() errno = saved_errno + #include "src/thread_gil.c" From noreply at buildbot.pypy.org Wed Jun 25 20:20:05 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 25 Jun 2014 20:20:05 +0200 (CEST) Subject: [pypy-commit] pypy default: backout cc99ff68804a: any built-in lookup first misses a global Message-ID: <20140625182005.A72071D2A6C@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72231:8f38ba1f23f0 Date: 2014-06-25 20:19 +0200 http://bitbucket.org/pypy/pypy/changeset/8f38ba1f23f0/ Log: backout cc99ff68804a: any built-in lookup first misses a global ModuleDict! We need to be more subtle... diff --git a/pypy/objspace/std/celldict.py b/pypy/objspace/std/celldict.py --- a/pypy/objspace/std/celldict.py +++ b/pypy/objspace/std/celldict.py @@ -16,7 +16,7 @@ class ModuleCell(W_Root): - def __init__(self, w_value): + def __init__(self, w_value=None): self.w_value = w_value def __repr__(self): @@ -49,24 +49,17 @@ return self.erase({}) def mutated(self): - # A mutation means changing an existing key to point to a new value. - # A value is either a regular wrapped object, or a ModuleCell if we - # detect mutations. It means that each existing key can only trigger - # a mutation at most once. self.version = VersionTag() - def dictvalue_no_unwrapping(self, w_dict, key): + def getdictvalue_no_unwrapping(self, w_dict, key): # NB: it's important to promote self here, so that self.version is a # no-op due to the quasi-immutable field self = jit.promote(self) - return self._dictvalue_no_unwrapping_pure(self.version, w_dict, key) + return self._getdictvalue_no_unwrapping_pure(self.version, w_dict, key) @jit.elidable_promote('0,1,2') - def _dictvalue_no_unwrapping_pure(self, version, w_dict, key): - # may raise KeyError. If it does, then the JIT is prevented from - # considering this function as elidable. This is what lets us add - # new keys to the dictionary without changing the version. - return self.unerase(w_dict.dstorage)[key] + def _getdictvalue_no_unwrapping_pure(self, version, w_dict, key): + return self.unerase(w_dict.dstorage).get(key, None) def setitem(self, w_dict, w_key, w_value): space = self.space @@ -77,20 +70,17 @@ w_dict.setitem(w_key, w_value) def setitem_str(self, w_dict, key, w_value): - try: - cell = self.dictvalue_no_unwrapping(w_dict, key) - except KeyError: - pass - else: - if isinstance(cell, ModuleCell): - cell.w_value = w_value - return + cell = self.getdictvalue_no_unwrapping(w_dict, key) + if isinstance(cell, ModuleCell): + cell.w_value = w_value + return + if cell is not None: # If the new value and the current value are the same, don't # create a level of indirection, or mutate the version. if self.space.is_w(w_value, cell): return w_value = ModuleCell(w_value) - self.mutated() + self.mutated() self.unerase(w_dict.dstorage)[key] = w_value def setdefault(self, w_dict, w_key, w_default): @@ -140,10 +130,7 @@ return w_dict.getitem(w_key) def getitem_str(self, w_dict, key): - try: - cell = self.dictvalue_no_unwrapping(w_dict, key) - except KeyError: - return None + cell = self.getdictvalue_no_unwrapping(w_dict, key) return unwrap_cell(cell) def w_keys(self, w_dict): diff --git a/pypy/objspace/std/test/test_celldict.py b/pypy/objspace/std/test/test_celldict.py --- a/pypy/objspace/std/test/test_celldict.py +++ b/pypy/objspace/std/test/test_celldict.py @@ -21,27 +21,27 @@ w_key = self.FakeString(key) d.setitem(w_key, 1) v2 = strategy.version - assert v1 is v2 # doesn't change when adding new keys + assert v1 is not v2 assert d.getitem(w_key) == 1 - assert d.strategy.dictvalue_no_unwrapping(d, key) == 1 + assert d.strategy.getdictvalue_no_unwrapping(d, key) == 1 d.setitem(w_key, 2) v3 = strategy.version assert v2 is not v3 assert d.getitem(w_key) == 2 - assert d.strategy.dictvalue_no_unwrapping(d, key).w_value == 2 + assert d.strategy.getdictvalue_no_unwrapping(d, key).w_value == 2 d.setitem(w_key, 3) v4 = strategy.version assert v3 is v4 assert d.getitem(w_key) == 3 - assert d.strategy.dictvalue_no_unwrapping(d, key).w_value == 3 + assert d.strategy.getdictvalue_no_unwrapping(d, key).w_value == 3 d.delitem(w_key) v5 = strategy.version assert v5 is not v4 assert d.getitem(w_key) is None - py.test.raises(KeyError, d.strategy.dictvalue_no_unwrapping, d, key) + assert d.strategy.getdictvalue_no_unwrapping(d, key) is None def test_same_key_set_twice(self): strategy = ModuleDictStrategy(space) @@ -52,7 +52,7 @@ x = object() d.setitem("a", x) v2 = strategy.version - assert v1 is v2 + assert v1 is not v2 d.setitem("a", x) v3 = strategy.version assert v2 is v3 From noreply at buildbot.pypy.org Wed Jun 25 20:30:54 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Wed, 25 Jun 2014 20:30:54 +0200 (CEST) Subject: [pypy-commit] pypy gc_no_cleanup_nursery: change the malloc_zero_filled flag to false Message-ID: <20140625183054.F1F801D2A7E@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc_no_cleanup_nursery Changeset: r72232:10d6e0089dee Date: 2014-06-25 14:22 -0400 http://bitbucket.org/pypy/pypy/changeset/10d6e0089dee/ Log: change the malloc_zero_filled flag to false diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -169,7 +169,7 @@ inline_simple_malloc_varsize = True needs_write_barrier = True prebuilt_gc_objects_are_static_roots = False - malloc_zero_filled = True # xxx experiment with False + malloc_zero_filled = False # xxx experiment with False gcflag_extra = GCFLAG_EXTRA # All objects start with a HDR, i.e. with a field 'tid' which contains diff --git a/rpython/rtyper/lltypesystem/lltype.py b/rpython/rtyper/lltypesystem/lltype.py --- a/rpython/rtyper/lltypesystem/lltype.py +++ b/rpython/rtyper/lltypesystem/lltype.py @@ -48,6 +48,7 @@ class _uninitialized(object): def __init__(self, TYPE): self._TYPE = TYPE + self.TYPE = TYPE def __repr__(self): return ''%(self.TYPE,) From noreply at buildbot.pypy.org Wed Jun 25 21:35:49 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 25 Jun 2014 21:35:49 +0200 (CEST) Subject: [pypy-commit] pypy fast-gil: Close branch, ready for merge (os/x and windows not tested) Message-ID: <20140625193549.CFE831D34B9@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: fast-gil Changeset: r72233:d38a82268cb0 Date: 2014-06-25 21:31 +0200 http://bitbucket.org/pypy/pypy/changeset/d38a82268cb0/ Log: Close branch, ready for merge (os/x and windows not tested) From noreply at buildbot.pypy.org Wed Jun 25 21:35:51 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 25 Jun 2014 21:35:51 +0200 (CEST) Subject: [pypy-commit] pypy default: hg merge fast-gil Message-ID: <20140625193551.B90C21D34B9@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72234:dd8a43cdd8d5 Date: 2014-06-25 21:33 +0200 http://bitbucket.org/pypy/pypy/changeset/dd8a43cdd8d5/ Log: hg merge fast-gil A faster way to handle the GIL, particularly in JIT code. The GIL is now a composite of two concepts: a global number (it's just set from 1 to 0 and back around CALL_RELEASE_GIL), and a real mutex. If there are threads waiting to acquire the GIL, one of them is actively checking the global number every 0.1 ms to 1 ms. diff --git a/pypy/module/thread/gil.py b/pypy/module/thread/gil.py --- a/pypy/module/thread/gil.py +++ b/pypy/module/thread/gil.py @@ -7,7 +7,7 @@ # all but one will be blocked. The other threads get a chance to run # from time to time, using the periodic action GILReleaseAction. -from rpython.rlib import rthread +from rpython.rlib import rthread, rgil from pypy.module.thread.error import wrap_thread_error from pypy.interpreter.executioncontext import PeriodicAsyncAction from pypy.module.thread.threadlocals import OSThreadLocals @@ -25,8 +25,7 @@ use_bytecode_counter=True) def _initialize_gil(self, space): - if not rthread.gil_allocate(): - raise wrap_thread_error(space, "can't allocate GIL") + rgil.gil_allocate() def setup_threads(self, space): """Enable threads in the object space, if they haven't already been.""" @@ -71,15 +70,13 @@ def before_external_call(): # this function must not raise, in such a way that the exception # transformer knows that it cannot raise! - e = get_errno() - rthread.gil_release() - set_errno(e) + rgil.gil_release() before_external_call._gctransformer_hint_cannot_collect_ = True before_external_call._dont_reach_me_in_del_ = True def after_external_call(): e = get_errno() - rthread.gil_acquire() + rgil.gil_acquire() rthread.gc_thread_run() after_thread_switch() set_errno(e) @@ -97,7 +94,7 @@ # explicitly release the gil, in a way that tries to give more # priority to other threads (as opposed to continuing to run in # the same thread). - if rthread.gil_yield_thread(): + if rgil.gil_yield_thread(): rthread.gc_thread_run() after_thread_switch() do_yield_thread._gctransformer_hint_close_stack_ = True diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py --- a/rpython/jit/backend/llsupport/assembler.py +++ b/rpython/jit/backend/llsupport/assembler.py @@ -303,28 +303,39 @@ @staticmethod @rgc.no_collect - def _release_gil_asmgcc(css): - # similar to trackgcroot.py:pypy_asm_stackwalk, first part - from rpython.memory.gctransform import asmgcroot - new = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css) - next = asmgcroot.gcrootanchor.next - new.next = next - new.prev = asmgcroot.gcrootanchor - asmgcroot.gcrootanchor.next = new - next.prev = new - # and now release the GIL - before = rffi.aroundstate.before - if before: - before() + def _reacquire_gil_asmgcc(css, old_rpy_fastgil): + # Before doing an external call, 'rpy_fastgil' is initialized to + # be equal to css. This function is called if we find out after + # the call that it is no longer equal to css. See description + # in translator/c/src/thread_pthread.c. - @staticmethod - @rgc.no_collect - def _reacquire_gil_asmgcc(css): - # first reacquire the GIL - after = rffi.aroundstate.after - if after: - after() - # similar to trackgcroot.py:pypy_asm_stackwalk, second part + if old_rpy_fastgil == 0: + # this case occurs if some other thread stole the GIL but + # released it again. What occurred here is that we changed + # 'rpy_fastgil' from 0 to 1, thus successfully reaquiring the + # GIL. + pass + + elif old_rpy_fastgil == 1: + # 'rpy_fastgil' was (and still is) locked by someone else. + # We need to wait for the regular mutex. + after = rffi.aroundstate.after + if after: + after() + else: + # stole the GIL from a different thread that is also + # currently in an external call from the jit. Attach + # the 'old_rpy_fastgil' into the chained list. + from rpython.memory.gctransform import asmgcroot + oth = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, old_rpy_fastgil) + next = asmgcroot.gcrootanchor.next + oth.next = next + oth.prev = asmgcroot.gcrootanchor + asmgcroot.gcrootanchor.next = oth + next.prev = oth + + # similar to trackgcroot.py:pypy_asm_stackwalk, second part: + # detach the 'css' from the chained list from rpython.memory.gctransform import asmgcroot old = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css) prev = old.prev @@ -334,42 +345,28 @@ @staticmethod @rgc.no_collect - def _release_gil_shadowstack(): - before = rffi.aroundstate.before - if before: - before() - - @staticmethod - @rgc.no_collect def _reacquire_gil_shadowstack(): + # Simplified version of _reacquire_gil_asmgcc(): in shadowstack mode, + # 'rpy_fastgil' contains only zero or non-zero, and this is only + # called when the old value stored in 'rpy_fastgil' was non-zero + # (i.e. still locked, must wait with the regular mutex) after = rffi.aroundstate.after if after: after() - @staticmethod - def _no_op(): - pass - - _NOARG_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void)) - _CLOSESTACK_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP], - lltype.Void)) + _REACQGIL0_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void)) + _REACQGIL2_FUNC = lltype.Ptr(lltype.FuncType([rffi.CCHARP, lltype.Signed], + lltype.Void)) def _build_release_gil(self, gcrootmap): - if gcrootmap is None: - releasegil_func = llhelper(self._NOARG_FUNC, self._no_op) - reacqgil_func = llhelper(self._NOARG_FUNC, self._no_op) - elif gcrootmap.is_shadow_stack: - releasegil_func = llhelper(self._NOARG_FUNC, - self._release_gil_shadowstack) - reacqgil_func = llhelper(self._NOARG_FUNC, + if gcrootmap is None or gcrootmap.is_shadow_stack: + reacqgil_func = llhelper(self._REACQGIL0_FUNC, self._reacquire_gil_shadowstack) + self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func) else: - releasegil_func = llhelper(self._CLOSESTACK_FUNC, - self._release_gil_asmgcc) - reacqgil_func = llhelper(self._CLOSESTACK_FUNC, + reacqgil_func = llhelper(self._REACQGIL2_FUNC, self._reacquire_gil_asmgcc) - self.releasegil_addr = self.cpu.cast_ptr_to_int(releasegil_func) - self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func) + self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func) def _is_asmgcc(self): gcrootmap = self.cpu.gc_ll_descr.gcrootmap diff --git a/rpython/jit/backend/llsupport/callbuilder.py b/rpython/jit/backend/llsupport/callbuilder.py --- a/rpython/jit/backend/llsupport/callbuilder.py +++ b/rpython/jit/backend/llsupport/callbuilder.py @@ -1,4 +1,7 @@ from rpython.rlib.clibffi import FFI_DEFAULT_ABI +from rpython.rlib import rgil +from rpython.rtyper.lltypesystem import lltype, rffi + class AbstractCallBuilder(object): @@ -42,20 +45,21 @@ def emit_call_release_gil(self): """Emit a CALL_RELEASE_GIL, including calls to releasegil_addr and reacqgil_addr.""" + fastgil = rffi.cast(lltype.Signed, rgil.gil_fetch_fastgil()) self.select_call_release_gil_mode() self.prepare_arguments() self.push_gcmap_for_call_release_gil() - self.call_releasegil_addr_and_move_real_arguments() + self.call_releasegil_addr_and_move_real_arguments(fastgil) self.emit_raw_call() self.restore_stack_pointer() - self.move_real_result_and_call_reacqgil_addr() + self.move_real_result_and_call_reacqgil_addr(fastgil) self.pop_gcmap() self.load_result() - def call_releasegil_addr_and_move_real_arguments(self): + def call_releasegil_addr_and_move_real_arguments(self, fastgil): raise NotImplementedError - def move_real_result_and_call_reacqgil_addr(self): + def move_real_result_and_call_reacqgil_addr(self, fastgil): raise NotImplementedError def select_call_release_gil_mode(self): diff --git a/rpython/jit/backend/llsupport/test/test_gc_integration.py b/rpython/jit/backend/llsupport/test/test_gc_integration.py --- a/rpython/jit/backend/llsupport/test/test_gc_integration.py +++ b/rpython/jit/backend/llsupport/test/test_gc_integration.py @@ -2,6 +2,7 @@ """ Tests for register allocation for common constructs """ +import py import re from rpython.jit.metainterp.history import TargetToken, BasicFinalDescr,\ JitCellToken, BasicFailDescr, AbstractDescr @@ -780,6 +781,9 @@ assert rffi.cast(JITFRAMEPTR, cpu.gc_ll_descr.write_barrier_on_frame_called) == frame def test_call_release_gil(self): + py.test.skip("xxx fix this test: the code is now assuming that " + "'before' is just rgil.release_gil(), and 'after' is " + "only needed if 'rpy_fastgil' was not changed.") # note that we can't test floats here because when untranslated # people actually wreck xmm registers cpu = self.cpu diff --git a/rpython/jit/backend/x86/callbuilder.py b/rpython/jit/backend/x86/callbuilder.py --- a/rpython/jit/backend/x86/callbuilder.py +++ b/rpython/jit/backend/x86/callbuilder.py @@ -25,9 +25,6 @@ # arguments, we need to decrease esp temporarily stack_max = PASS_ON_MY_FRAME - # set by save_result_value() - tmpresloc = None - def __init__(self, assembler, fnloc, arglocs, resloc=eax, restype=INT, ressize=WORD): AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs, @@ -41,7 +38,6 @@ self.current_esp = 0 # 0 or (usually) negative, counted in bytes def select_call_release_gil_mode(self): - """Overridden in CallBuilder64""" AbstractCallBuilder.select_call_release_gil_mode(self) if self.asm._is_asmgcc(): from rpython.memory.gctransform import asmgcroot @@ -69,12 +65,10 @@ if self.ressize == 0: return # void result # use the code in load_from_mem to do the zero- or sign-extension - srcloc = self.tmpresloc - if srcloc is None: - if self.restype == FLOAT: - srcloc = xmm0 - else: - srcloc = eax + if self.restype == FLOAT: + srcloc = xmm0 + else: + srcloc = eax if self.ressize >= WORD and self.resloc is srcloc: return # no need for any MOV if self.ressize == 1 and isinstance(srcloc, RegLoc): @@ -100,13 +94,14 @@ self.asm.set_extra_stack_depth(self.mc, 0) self.asm.pop_gcmap(self.mc) - def call_releasegil_addr_and_move_real_arguments(self): - initial_esp = self.current_esp - self.save_register_arguments() + def call_releasegil_addr_and_move_real_arguments(self, fastgil): + from rpython.jit.backend.x86.assembler import heap # if not self.asm._is_asmgcc(): - # the helper takes no argument + # shadowstack: change 'rpy_fastgil' to 0 (it should be + # non-zero right now). self.change_extra_stack_depth = False + css_value = imm(0) else: from rpython.memory.gctransform import asmgcroot # build a 'css' structure on the stack: 2 words for the linkage, @@ -120,73 +115,95 @@ index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP) self.mc.MOV_sr(index_of_ebp, ebp.value) # MOV [css.ebp], EBP # Save the "return address": we pretend that it's css - if IS_X86_32: - reg = eax - elif IS_X86_64: - reg = edi - self.mc.LEA_rs(reg.value, css) # LEA reg, [css] + self.mc.LEA_rs(eax.value, css) # LEA eax, [css] frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR) - self.mc.MOV_sr(frame_ptr, reg.value) # MOV [css.frame], reg + self.mc.MOV_sr(frame_ptr, eax.value) # MOV [css.frame], eax # Set up jf_extra_stack_depth to pretend that the return address # was at css, and so our stack frame is supposedly shorter by # (PASS_ON_MY_FRAME-JIT_USE_WORDS+1) words delta = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS + 1 self.change_extra_stack_depth = True self.asm.set_extra_stack_depth(self.mc, -delta * WORD) - # Call the closestack() function (also releasing the GIL) - # with 'reg' as argument - if IS_X86_32: - self.subtract_esp_aligned(1) - self.mc.MOV_sr(0, reg.value) - #else: - # on x86_64, reg is edi so that it is already correct + css_value = eax # - self.mc.CALL(imm(self.asm.releasegil_addr)) + self.mc.MOV(heap(fastgil), css_value) # if not we_are_translated(): # for testing: we should not access - self.mc.ADD(ebp, imm(1)) # ebp any more + self.mc.ADD(ebp, imm(1)) # ebp any more; and ignore 'fastgil' + + def move_real_result_and_call_reacqgil_addr(self, fastgil): + from rpython.jit.backend.x86.assembler import heap + from rpython.jit.backend.x86 import rx86 # - self.restore_register_arguments() - self.restore_stack_pointer(initial_esp) - - def save_register_arguments(self): - """Overridden in CallBuilder64""" - - def restore_register_arguments(self): - """Overridden in CallBuilder64""" - - def move_real_result_and_call_reacqgil_addr(self): - # save the result we just got (in eax/eax+edx/st(0)/xmm0) - self.save_result_value() - # call the reopenstack() function (also reacquiring the GIL) + # check if we need to call the reacqgil() function or not + # (to acquiring the GIL, remove the asmgcc head from + # the chained list, etc.) + mc = self.mc + restore_edx = False if not self.asm._is_asmgcc(): - css = 0 # the helper takes no argument + css = 0 + css_value = imm(0) + old_value = ecx else: from rpython.memory.gctransform import asmgcroot css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS) if IS_X86_32: - reg = eax + assert css >= 16 + if self.restype == 'L': # long long result: eax/edx + mc.MOV_sr(12, edx.value) + restore_edx = True + css_value = edx + old_value = ecx elif IS_X86_64: - reg = edi - self.mc.LEA_rs(reg.value, css) + css_value = edi + old_value = esi + mc.LEA_rs(css_value.value, css) + # + mc.MOV(old_value, imm(1)) + if rx86.fits_in_32bits(fastgil): + mc.XCHG_rj(old_value.value, fastgil) + else: + mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil) + mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0)) + mc.CMP(old_value, css_value) + mc.J_il8(rx86.Conditions['E'], 0) + je_location = mc.get_relative_pos() + # + # Yes, we need to call the reacqgil() function + self.save_result_value_reacq() + if self.asm._is_asmgcc(): if IS_X86_32: - self.mc.MOV_sr(0, reg.value) + mc.MOV_sr(4, old_value.value) + mc.MOV_sr(0, css_value.value) + # on X86_64, they are already in the right registers + mc.CALL(imm(self.asm.reacqgil_addr)) + self.restore_result_value_reacq() # - self.mc.CALL(imm(self.asm.reacqgil_addr)) + # patch the JE above + offset = mc.get_relative_pos() - je_location + assert 0 < offset <= 127 + mc.overwrite(je_location-1, chr(offset)) # - if not we_are_translated(): # for testing: now we can accesss - self.mc.SUB(ebp, imm(1)) # ebp again + if restore_edx: + mc.MOV_rs(edx.value, 12) # restore this + # + if not we_are_translated(): # for testing: now we can accesss + mc.SUB(ebp, imm(1)) # ebp again # # Now that we required the GIL, we can reload a possibly modified ebp if self.asm._is_asmgcc(): # special-case: reload ebp from the css from rpython.memory.gctransform import asmgcroot index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP) - self.mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp] + mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp] #else: # for shadowstack, done for us by _reload_frame_if_necessary() - def save_result_value(self): + def save_result_value_reacq(self): + """Overridden in CallBuilder32 and CallBuilder64""" + raise NotImplementedError + + def restore_result_value_reacq(self): """Overridden in CallBuilder32 and CallBuilder64""" raise NotImplementedError @@ -239,58 +256,71 @@ resloc = self.resloc if resloc is not None and resloc.is_float(): # a float or a long long return - if self.tmpresloc is None: - if self.restype == 'L': # long long - # move eax/edx -> xmm0 - self.mc.MOVD32_xr(resloc.value^1, edx.value) - self.mc.MOVD32_xr(resloc.value, eax.value) - self.mc.PUNPCKLDQ_xx(resloc.value, resloc.value^1) - else: - # float: we have to go via the stack - self.mc.FSTPL_s(0) - self.mc.MOVSD_xs(resloc.value, 0) + if self.restype == 'L': # long long + # move eax/edx -> xmm0 + self.mc.MOVD32_xr(resloc.value^1, edx.value) + self.mc.MOVD32_xr(resloc.value, eax.value) + self.mc.PUNPCKLDQ_xx(resloc.value, resloc.value^1) else: - self.mc.MOVSD(resloc, self.tmpresloc) + # float: we have to go via the stack + self.mc.FSTPL_s(0) + self.mc.MOVSD_xs(resloc.value, 0) # elif self.restype == 'S': # singlefloat return: must convert ST(0) to a 32-bit singlefloat # and load it into self.resloc. mess mess mess - if self.tmpresloc is None: - self.mc.FSTPS_s(0) - self.mc.MOV_rs(resloc.value, 0) - else: - self.mc.MOV(resloc, self.tmpresloc) + self.mc.FSTPS_s(0) + self.mc.MOV_rs(resloc.value, 0) else: CallBuilderX86.load_result(self) - def save_result_value(self): - # Temporarily save the result value into [ESP+4]. We use "+4" - # in order to leave the word at [ESP+0] free, in case it's needed + def save_result_value_reacq(self): + # Temporarily save the result value into [ESP+8]. We use "+8" + # in order to leave the two initial words free, in case it's needed. + # Also note that in this 32-bit case, a long long return value is + # in eax/edx, but we already saved the value of edx in + # move_real_result_and_call_reacqgil_addr(). if self.ressize == 0: # void return return if self.resloc.is_float(): # a float or a long long return - self.tmpresloc = RawEspLoc(4, FLOAT) if self.restype == 'L': - self.mc.MOV_sr(4, eax.value) # long long - self.mc.MOV_sr(8, edx.value) + self.mc.MOV_sr(8, eax.value) # long long + #self.mc.MOV_sr(12, edx.value) -- already done by the caller else: - self.mc.FSTPL_s(4) # float return + self.mc.FSTPL_s(8) # float return else: - self.tmpresloc = RawEspLoc(4, INT) if self.restype == 'S': - self.mc.FSTPS_s(4) + self.mc.FSTPS_s(8) else: assert self.restype == INT assert self.ressize <= WORD - self.mc.MOV_sr(4, eax.value) + self.mc.MOV_sr(8, eax.value) + + def restore_result_value_reacq(self): + # Opposite of save_result_value_reacq() + if self.ressize == 0: # void return + return + if self.resloc.is_float(): + # a float or a long long return + if self.restype == 'L': + self.mc.MOV_rs(eax.value, 8) # long long + #self.mc.MOV_rs(edx.value, 12) -- will be done by the caller + else: + self.mc.FLDL_s(8) # float return + else: + if self.restype == 'S': + self.mc.FLDS_s(8) + else: + assert self.restype == INT + assert self.ressize <= WORD + self.mc.MOV_rs(eax.value, 8) class CallBuilder64(CallBuilderX86): ARGUMENTS_GPR = [edi, esi, edx, ecx, r8, r9] ARGUMENTS_XMM = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7] - DONT_MOVE_GPR = [] _ALL_CALLEE_SAVE_GPR = [ebx, r12, r13, r14, r15] next_arg_gpr = 0 @@ -303,13 +333,6 @@ res = self.ARGUMENTS_GPR[i] except IndexError: return None - if hint in self.DONT_MOVE_GPR: - for j in range(i): - if hint is self.ARGUMENTS_GPR[j]: - break - else: - self.ARGUMENTS_GPR[i] = hint - res = hint return res def _unused_xmm(self): @@ -320,51 +343,6 @@ except IndexError: return None - def _permute_to_prefer_unused_registers(self, lst): - # permute 'lst' so that it starts with registers that are not - # in 'self.already_used', and ends with registers that are. - N = len(lst) - i = 0 - while i < N: - reg = lst[i] - if reg in self.already_used: - # move this reg to the end, and decrement N - N -= 1 - assert N >= i - lst[N], lst[i] = lst[i], lst[N] - else: - i += 1 - - def select_call_release_gil_mode(self): - CallBuilderX86.select_call_release_gil_mode(self) - # We have to copy the arguments around a bit more in this mode, - # but on the other hand we don't need prepare_arguments() moving - # them in precisely the final registers. Here we look around for - # unused registers that may be more likely usable. - from rpython.jit.backend.x86.regalloc import X86_64_RegisterManager - from rpython.jit.backend.x86.regalloc import X86_64_XMMRegisterManager - self.already_used = {} - for loc in self.arglocs: - self.already_used[loc] = None - # - lst = X86_64_RegisterManager.save_around_call_regs[:] - self._permute_to_prefer_unused_registers(lst) - # - extra = [] - for reg in self.asm._regalloc.rm.free_regs: - if (reg not in self.already_used and - reg in self._ALL_CALLEE_SAVE_GPR): - extra.append(reg) - self.free_callee_save_gprs = extra - lst = extra + lst - # - self.ARGUMENTS_GPR = lst[:len(self.ARGUMENTS_GPR)] - self.DONT_MOVE_GPR = self._ALL_CALLEE_SAVE_GPR - # - lst = X86_64_XMMRegisterManager.save_around_call_regs[:] - self._permute_to_prefer_unused_registers(lst) - self.ARGUMENTS_XMM = lst[:len(self.ARGUMENTS_XMM)] - def prepare_arguments(self): src_locs = [] dst_locs = [] @@ -444,78 +422,44 @@ assert 0 # should not occur on 64-bit def load_result(self): - if self.restype == 'S' and self.tmpresloc is None: + if self.restype == 'S': # singlefloat return: use MOVD to load the target register # from the lower 32 bits of XMM0 self.mc.MOVD32(self.resloc, xmm0) else: CallBuilderX86.load_result(self) - def save_result_value(self): + def save_result_value_reacq(self): # Temporarily save the result value into [ESP]. if self.ressize == 0: # void return return # if self.restype == FLOAT: # and not 'S' self.mc.MOVSD_sx(0, xmm0.value) - self.tmpresloc = RawEspLoc(0, FLOAT) return # - if len(self.free_callee_save_gprs) == 0: - self.tmpresloc = RawEspLoc(0, INT) - else: - self.tmpresloc = self.free_callee_save_gprs[0] - # if self.restype == 'S': # singlefloat return: use MOVD to store the lower 32 bits - # of XMM0 into the tmpresloc (register or [ESP]) - self.mc.MOVD32(self.tmpresloc, xmm0) + # of XMM0 into [ESP] + self.mc.MOVD32_sx(0, xmm0.value) else: assert self.restype == INT - self.mc.MOV(self.tmpresloc, eax) + self.mc.MOV_sr(0, eax.value) - def save_register_arguments(self): - # Save the argument registers, which are given by self.ARGUMENTS_xxx. - n_gpr = min(self.next_arg_gpr, len(self.ARGUMENTS_GPR)) - n_xmm = min(self.next_arg_xmm, len(self.ARGUMENTS_XMM)) - n_saved_regs = n_gpr + n_xmm - for i in range(n_gpr): - if self.ARGUMENTS_GPR[i] in self._ALL_CALLEE_SAVE_GPR: - n_saved_regs -= 1 # don't need to save it - self.subtract_esp_aligned(n_saved_regs) + def restore_result_value_reacq(self): + # Opposite of save_result_value_reacq() + if self.ressize == 0: # void return + return # - n = 0 - for i in range(n_gpr): - if self.ARGUMENTS_GPR[i] not in self._ALL_CALLEE_SAVE_GPR: - self.mc.MOV_sr(n * WORD, self.ARGUMENTS_GPR[i].value) - n += 1 - for i in range(n_xmm): - self.mc.MOVSD_sx(n * WORD, self.ARGUMENTS_XMM[i].value) - n += 1 - assert n == n_saved_regs - self.n_saved_regs = n_saved_regs - - def restore_register_arguments(self): - # Restore the saved values into the *real* registers used for calls - # --- which are not self.ARGUMENTS_xxx! - n_gpr = min(self.next_arg_gpr, len(self.ARGUMENTS_GPR)) - n_xmm = min(self.next_arg_xmm, len(self.ARGUMENTS_XMM)) + if self.restype == FLOAT: # and not 'S' + self.mc.MOVSD_xs(xmm0.value, 0) + return # - n = 0 - for i in range(n_gpr): - tgtvalue = CallBuilder64.ARGUMENTS_GPR[i].value - if self.ARGUMENTS_GPR[i] not in self._ALL_CALLEE_SAVE_GPR: - self.mc.MOV_rs(tgtvalue, n * WORD) - n += 1 - else: - self.mc.MOV_rr(tgtvalue, self.ARGUMENTS_GPR[i].value) - for i in range(n_xmm): - self.mc.MOVSD_xs(CallBuilder64.ARGUMENTS_XMM[i].value, n * WORD) - n += 1 - assert n == self.n_saved_regs - # - if isinstance(self.fnloc, RegLoc): # fix this register - self.fnloc = CallBuilder64.ARGUMENTS_GPR[n_gpr - 1] + if self.restype == 'S': + self.mc.MOVD32_xs(xmm0.value, 0) + else: + assert self.restype == INT + self.mc.MOV_rs(eax.value, 0) if IS_X86_32: diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py --- a/rpython/jit/backend/x86/rx86.py +++ b/rpython/jit/backend/x86/rx86.py @@ -561,7 +561,7 @@ # XXX: Only here for testing purposes..."as" happens the encode the # registers in the opposite order that we would otherwise do in a # register-register exchange. - #XCHG_rr = insn(rex_w, '\x87', register(1), register(2,8), '\xC0') + XCHG_rr = insn(rex_w, '\x87', register(1), register(2,8), '\xC0') JMP_l = insn('\xE9', relative(1)) JMP_r = insn(rex_nw, '\xFF', orbyte(4<<3), register(1), '\xC0') @@ -589,6 +589,8 @@ FSTPL_b = insn('\xDD', orbyte(3<<3), stack_bp(1)) # rffi.DOUBLE ('as' wants L??) FSTPL_s = insn('\xDD', orbyte(3<<3), stack_sp(1)) # rffi.DOUBLE ('as' wants L??) FSTPS_s = insn('\xD9', orbyte(3<<3), stack_sp(1)) # lltype.SingleFloat + FLDL_s = insn('\xDD', orbyte(0<<3), stack_sp(1)) + FLDS_s = insn('\xD9', orbyte(0<<3), stack_sp(1)) # ------------------------------ Random mess ----------------------- RDTSC = insn('\x0F\x31') @@ -626,8 +628,10 @@ MOVDQ_xb = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), stack_bp(2)) MOVD32_rx = xmminsn('\x66', rex_nw, '\x0F\x7E', register(2, 8), register(1), '\xC0') + MOVD32_sx = xmminsn('\x66', rex_nw, '\x0F\x7E', register(2, 8), stack_sp(1)) MOVD32_xr = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), register(2), '\xC0') MOVD32_xb = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_bp(2)) + MOVD32_xs = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_sp(2)) PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b')) @@ -751,7 +755,7 @@ define_modrm_modes('SQRTSD_x*', ['\xF2', rex_nw, '\x0F\x51', register(1,8)], regtype='XMM') -#define_modrm_modes('XCHG_r*', [rex_w, '\x87', register(1, 8)]) +define_modrm_modes('XCHG_r*', [rex_w, '\x87', register(1, 8)]) define_modrm_modes('ADDSD_x*', ['\xF2', rex_nw, '\x0F\x58', register(1, 8)], regtype='XMM') define_modrm_modes('ADDPD_x*', ['\x66', rex_nw, '\x0F\x58', register(1, 8)], regtype='XMM') diff --git a/rpython/jit/backend/x86/test/test_callbuilder.py b/rpython/jit/backend/x86/test/test_callbuilder.py --- a/rpython/jit/backend/x86/test/test_callbuilder.py +++ b/rpython/jit/backend/x86/test/test_callbuilder.py @@ -18,16 +18,14 @@ self._log.append(('mov', src, dst)) -def test_base_case(): +def test_base_case(call_release_gil_mode=False): asm = FakeAssembler() cb = callbuilder.CallBuilder64(asm, ImmedLoc(12345), [ebx, ebx]) + if call_release_gil_mode: + cb.select_call_release_gil_mode() cb.prepare_arguments() assert asm._log == [('mov', ebx, edi), ('mov', ebx, esi)] -def test_bug_call_release_gil(): - asm = FakeAssembler() - cb = callbuilder.CallBuilder64(asm, ImmedLoc(12345), [ebx, ebx]) - cb.select_call_release_gil_mode() - cb.prepare_arguments() - assert asm._log == [('mov', ebx, ecx)] +def test_call_release_gil(): + test_base_case(call_release_gil_mode=True) diff --git a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py --- a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py +++ b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py @@ -194,7 +194,8 @@ for args in args_lists: suffix = "" if (argmodes and not self.is_xmm_insn - and not instrname.startswith('FSTP')): + and not instrname.startswith('FSTP') + and not instrname.startswith('FLD')): suffix = suffixes[self.WORD] # Special case: On 64-bit CPUs, rx86 assumes 64-bit integer # operands when converting to/from floating point, so we need to diff --git a/rpython/memory/gctransform/asmgcroot.py b/rpython/memory/gctransform/asmgcroot.py --- a/rpython/memory/gctransform/asmgcroot.py +++ b/rpython/memory/gctransform/asmgcroot.py @@ -2,6 +2,7 @@ copygraph, SpaceOperation, checkgraph) from rpython.rlib.debug import ll_assert from rpython.rlib.nonconst import NonConstant +from rpython.rlib import rgil from rpython.rtyper.annlowlevel import llhelper from rpython.rtyper.lltypesystem import lltype, llmemory, rffi from rpython.rtyper.lltypesystem.lloperation import llop @@ -356,16 +357,19 @@ initialframedata = anchor.address[1] stackscount = 0 while initialframedata != anchor: # while we have not looped back - self.fill_initial_frame(curframe, initialframedata) - # Loop over all the frames in the stack - while self.walk_to_parent_frame(curframe, otherframe): - swap = curframe - curframe = otherframe # caller becomes callee - otherframe = swap + self.walk_frames(curframe, otherframe, initialframedata) # Then proceed to the next piece of stack initialframedata = initialframedata.address[1] stackscount += 1 # + # for the JIT: rpy_fastgil may contain an extra framedata + rpy_fastgil = rgil.gil_fetch_fastgil().signed[0] + if rpy_fastgil != 1: + ll_assert(rpy_fastgil != 0, "walk_stack_from doesn't have the GIL") + initialframedata = rffi.cast(llmemory.Address, rpy_fastgil) + self.walk_frames(curframe, otherframe, initialframedata) + stackscount += 1 + # expected = rffi.stackcounter.stacks_counter if NonConstant(0): rffi.stackcounter.stacks_counter += 42 # hack to force it @@ -374,6 +378,14 @@ lltype.free(otherframe, flavor='raw') lltype.free(curframe, flavor='raw') + def walk_frames(self, curframe, otherframe, initialframedata): + self.fill_initial_frame(curframe, initialframedata) + # Loop over all the frames in the stack + while self.walk_to_parent_frame(curframe, otherframe): + swap = curframe + curframe = otherframe # caller becomes callee + otherframe = swap + def fill_initial_frame(self, curframe, initialframedata): # Read the information provided by initialframedata initialframedata += 2*sizeofaddr #skip the prev/next words at the start @@ -770,7 +782,7 @@ gcrootanchor.next = gcrootanchor c_gcrootanchor = Constant(gcrootanchor, ASM_FRAMEDATA_HEAD_PTR) -eci = ExternalCompilationInfo(pre_include_bits=['#define PYPY_USE_ASMGCC']) +eci = ExternalCompilationInfo(compile_extra=['-DPYPY_USE_ASMGCC']) pypy_asm_stackwalk = rffi.llexternal('pypy_asm_stackwalk', [ASM_CALLBACK_PTR, diff --git a/rpython/rlib/rgil.py b/rpython/rlib/rgil.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rgil.py @@ -0,0 +1,38 @@ +import py +from rpython.conftest import cdir +from rpython.translator.tool.cbuild import ExternalCompilationInfo +from rpython.rtyper.lltypesystem import lltype, llmemory, rffi + +# these functions manipulate directly the GIL, whose definition does not +# escape the C code itself +translator_c_dir = py.path.local(cdir) + +eci = ExternalCompilationInfo( + includes = ['src/thread.h'], + separate_module_files = [translator_c_dir / 'src' / 'thread.c'], + include_dirs = [translator_c_dir], + export_symbols = ['RPyGilAllocate', 'RPyGilYieldThread', 'RPyGilRelease', + 'RPyGilAcquire', 'RPyFetchFastGil']) + +llexternal = rffi.llexternal + + +gil_allocate = llexternal('RPyGilAllocate', [], lltype.Void, + _nowrapper=True, sandboxsafe=True, + compilation_info=eci) + +gil_yield_thread = llexternal('RPyGilYieldThread', [], lltype.Signed, + _nowrapper=True, sandboxsafe=True, + compilation_info=eci) + +gil_release = llexternal('RPyGilRelease', [], lltype.Void, + _nowrapper=True, sandboxsafe=True, + compilation_info=eci) + +gil_acquire = llexternal('RPyGilAcquire', [], lltype.Void, + _nowrapper=True, sandboxsafe=True, + compilation_info=eci) + +gil_fetch_fastgil = llexternal('RPyFetchFastGil', [], llmemory.Address, + _nowrapper=True, sandboxsafe=True, + compilation_info=eci) diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py --- a/rpython/rlib/rthread.py +++ b/rpython/rlib/rthread.py @@ -19,8 +19,7 @@ include_dirs = [translator_c_dir], export_symbols = ['RPyThreadGetIdent', 'RPyThreadLockInit', 'RPyThreadAcquireLock', 'RPyThreadAcquireLockTimed', - 'RPyThreadReleaseLock', 'RPyGilAllocate', - 'RPyGilYieldThread', 'RPyGilRelease', 'RPyGilAcquire', + 'RPyThreadReleaseLock', 'RPyThreadGetStackSize', 'RPyThreadSetStackSize', 'RPyOpaqueDealloc_ThreadLock', 'RPyThreadAfterFork'] @@ -76,16 +75,6 @@ [TLOCKP], lltype.Void, _nowrapper=True) -# these functions manipulate directly the GIL, whose definition does not -# escape the C code itself -gil_allocate = llexternal('RPyGilAllocate', [], lltype.Signed, - _nowrapper=True) -gil_yield_thread = llexternal('RPyGilYieldThread', [], lltype.Signed, - _nowrapper=True) -gil_release = llexternal('RPyGilRelease', [], lltype.Void, - _nowrapper=True) -gil_acquire = llexternal('RPyGilAcquire', [], lltype.Void, - _nowrapper=True) def allocate_lock(): return Lock(allocate_ll_lock()) diff --git a/rpython/translator/c/gcc/trackgcroot.py b/rpython/translator/c/gcc/trackgcroot.py --- a/rpython/translator/c/gcc/trackgcroot.py +++ b/rpython/translator/c/gcc/trackgcroot.py @@ -858,13 +858,17 @@ return [] def _visit_xchg(self, line): - # only support the format used in VALGRIND_DISCARD_TRANSLATIONS + # support the format used in VALGRIND_DISCARD_TRANSLATIONS # which is to use a marker no-op "xchgl %ebx, %ebx" match = self.r_binaryinsn.match(line) source = match.group("source") target = match.group("target") if source == target: return [] + # ignore the 'rpy_fastgil' atomic exchange, or any locked + # atomic exchange at all (involving memory) + if not source.startswith('%'): + return [] raise UnrecognizedOperation(line) def visit_call(self, line): diff --git a/rpython/translator/c/src/mem.c b/rpython/translator/c/src/mem.c --- a/rpython/translator/c/src/mem.c +++ b/rpython/translator/c/src/mem.c @@ -115,6 +115,11 @@ got += 1; fd = ((void* *) (((char *)fd) + sizeof(void*)))[0]; } + if (rpy_fastgil != 1) { + RPyAssert(rpy_fastgil != 0, + "pypy_check_stack_count doesn't have the GIL"); + got++; /* <= the extra one currently stored in rpy_fastgil */ + } RPyAssert(got == stacks_counter - 1, "bad stacks_counter or non-closed stacks around"); # endif diff --git a/rpython/translator/c/src/thread.c b/rpython/translator/c/src/thread.c --- a/rpython/translator/c/src/thread.c +++ b/rpython/translator/c/src/thread.c @@ -9,9 +9,14 @@ #include "common_header.h" #endif +#ifdef PYPY_USE_ASMGCC +# include "common_header.h" +# include "structdef.h" +# include "forwarddecl.h" +#endif + #ifdef _WIN32 #include "src/thread_nt.c" #else #include "src/thread_pthread.c" #endif - diff --git a/rpython/translator/c/src/thread.h b/rpython/translator/c/src/thread.h --- a/rpython/translator/c/src/thread.h +++ b/rpython/translator/c/src/thread.h @@ -24,9 +24,26 @@ #endif /* !_WIN32 */ -long RPyGilAllocate(void); +void RPyGilAllocate(void); long RPyGilYieldThread(void); -void RPyGilRelease(void); void RPyGilAcquire(void); +#define RPyGilRelease _RPyGilRelease +#define RPyFetchFastGil _RPyFetchFastGil + +#ifdef PYPY_USE_ASMGCC +# define RPY_FASTGIL_LOCKED(x) (x == 1) +#else +# define RPY_FASTGIL_LOCKED(x) (x != 0) +#endif + +extern long rpy_fastgil; + +static inline void _RPyGilRelease(void) { + assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); + rpy_fastgil = 0; +} +static inline long *_RPyFetchFastGil(void) { + return &rpy_fastgil; +} #endif diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c new file mode 100644 --- /dev/null +++ b/rpython/translator/c/src/thread_gil.c @@ -0,0 +1,177 @@ + +/* Idea: + + - "The GIL" is a composite concept. There are two locks, and "the + GIL is locked" when both are locked. + + - The first lock is a simple global variable 'rpy_fastgil'. With + shadowstack, we use the most portable definition: 0 means unlocked + and != 0 means locked. With asmgcc, 0 means unlocked but only 1 + means locked. A different value means unlocked too, but the value + is used by the JIT to contain the stack top for stack root scanning. + + - The second lock is a regular mutex. In the fast path, it is never + unlocked. Remember that "the GIL is unlocked" means that either + the first or the second lock is unlocked. It should never be the + case that both are unlocked at the same time. + + - Let's call "thread 1" the thread with the GIL. Whenever it does an + external function call, it sets 'rpy_fastgil' to 0 (unlocked). + This is the cheapest way to release the GIL. When it returns from + the function call, this thread attempts to atomically change + 'rpy_fastgil' to 1. In the common case where it works, thread 1 + has got the GIL back and so continues to run. + + - Say "thread 2" is eagerly waiting for thread 1 to become blocked in + some long-running call. Regularly, it checks if 'rpy_fastgil' is 0 + and tries to atomically change it to 1. If it succeeds, it means + that the GIL was not previously locked. Thread 2 has now got the GIL. + + - If there are more than 2 threads, the rest is really sleeping by + waiting on the 'mutex_gil_stealer' held by thread 2. + + - An additional mechanism is used for when thread 1 wants to + explicitly yield the GIL to thread 2: it does so by releasing + 'mutex_gil' (which is otherwise not released) but keeping the + value of 'rpy_fastgil' to 1. +*/ + +long rpy_fastgil = 1; +long rpy_waiting_threads = -42; /* GIL not initialized */ +static mutex_t mutex_gil_stealer; +static mutex_t mutex_gil; + +void RPyGilAllocate(void) +{ + assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); + mutex_init(&mutex_gil_stealer); + mutex_init(&mutex_gil); + mutex_lock(&mutex_gil); + rpy_waiting_threads = 0; +} + +void RPyGilAcquire(void) +{ + /* Acquires the GIL. + + XXX Note: this function saves and restores 'errno'. This is + needed for now because it may be *followed* by reading the + 'errno', although it's kind of bogus: it should be read before + calling RPyGilAcquire(). + */ + long old_fastgil = lock_test_and_set(&rpy_fastgil, 1); + + if (!RPY_FASTGIL_LOCKED(old_fastgil)) { + /* The fastgil was not previously locked: success. + 'mutex_gil' should still be locked at this point. + */ + } + else { + /* Otherwise, another thread is busy with the GIL. */ + SAVE_ERRNO(); + + /* Register me as one of the threads that is actively waiting + for the GIL. The number of such threads is found in + rpy_waiting_threads. */ + assert(rpy_waiting_threads >= 0); + atomic_increment(&rpy_waiting_threads); + + /* Enter the waiting queue from the end. Assuming a roughly + first-in-first-out order, this will nicely give the threads + a round-robin chance. + */ + mutex_lock(&mutex_gil_stealer); + + /* We are now the stealer thread. Steals! */ + while (1) { + /* Sleep for one interval of time. We may be woken up earlier + if 'mutex_gil' is released. + */ + if (mutex_lock_timeout(&mutex_gil, 0.0001)) { /* 0.1 ms... */ + /* We arrive here if 'mutex_gil' was recently released + and we just relocked it. + */ + old_fastgil = 0; + break; + } + + /* Busy-looping here. Try to look again if 'rpy_fastgil' is + released. + */ + if (!RPY_FASTGIL_LOCKED(rpy_fastgil)) { + old_fastgil = lock_test_and_set(&rpy_fastgil, 1); + if (!RPY_FASTGIL_LOCKED(old_fastgil)) + /* yes, got a non-held value! Now we hold it. */ + break; + } + /* Otherwise, loop back. */ + } + atomic_decrement(&rpy_waiting_threads); + mutex_unlock(&mutex_gil_stealer); + + RESTORE_ERRNO(); + } + assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); + +#ifdef PYPY_USE_ASMGCC + if (old_fastgil != 0) { + /* this case only occurs from the JIT compiler */ + struct pypy_ASM_FRAMEDATA_HEAD0 *new = + (struct pypy_ASM_FRAMEDATA_HEAD0 *)old_fastgil; + struct pypy_ASM_FRAMEDATA_HEAD0 *root = &pypy_g_ASM_FRAMEDATA_HEAD; + struct pypy_ASM_FRAMEDATA_HEAD0 *next = root->as_next; + new->as_next = next; + new->as_prev = root; + root->as_next = new; + next->as_prev = new; + } +#else + assert(old_fastgil == 0); +#endif +} + +long RPyGilYieldThread(void) +{ + /* can be called even before RPyGilAllocate(), but in this case, + 'rpy_waiting_threads' will be -42. */ + assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); + if (rpy_waiting_threads <= 0) + return 0; + + /* Explicitly release the 'mutex_gil'. + */ + mutex_unlock(&mutex_gil); + + /* Now nobody has got the GIL, because 'mutex_gil' is released (but + rpy_fastgil is still locked). Call RPyGilAcquire(). It will + enqueue ourselves at the end of the 'mutex_gil_stealer' queue. + If there is no other waiting thread, it will fall through both + its mutex_lock() and mutex_lock_timeout() now. But that's + unlikely, because we tested above that 'rpy_waiting_threads > 0'. + */ + RPyGilAcquire(); + return 1; +} + +/********** for tests only **********/ + +/* These functions are usually defined as a macros RPyXyz() in thread.h + which get translated into calls to _RpyXyz(). But for tests we need + the real functions to exists in the library as well. +*/ + +#undef RPyGilRelease +void RPyGilRelease(void) +{ + /* Releases the GIL in order to do an external function call. + We assume that the common case is that the function call is + actually very short, and optimize accordingly. + */ + _RPyGilRelease(); +} + +#undef RPyFetchFastGil +long *RPyFetchFastGil(void) +{ + return _RPyFetchFastGil(); +} diff --git a/rpython/translator/c/src/thread_nt.c b/rpython/translator/c/src/thread_nt.c --- a/rpython/translator/c/src/thread_nt.c +++ b/rpython/translator/c/src/thread_nt.c @@ -196,50 +196,40 @@ /* GIL code */ /************************************************************/ -static volatile LONG pending_acquires = -1; -static CRITICAL_SECTION mutex_gil; -static HANDLE cond_gil; +typedef HANDLE mutex_t; /* a semaphore, on Windows */ -long RPyGilAllocate(void) -{ - pending_acquires = 0; - InitializeCriticalSection(&mutex_gil); - EnterCriticalSection(&mutex_gil); - cond_gil = CreateEvent (NULL, FALSE, FALSE, NULL); - return 1; +static void gil_fatal(const char *msg) { + fprintf(stderr, "Fatal error in the GIL: %s\n", msg); + abort(); } -long RPyGilYieldThread(void) -{ - /* can be called even before RPyGilAllocate(), but in this case, - pending_acquires will be -1 */ - if (pending_acquires <= 0) - return 0; - InterlockedIncrement(&pending_acquires); - PulseEvent(cond_gil); - - /* hack: the three following lines do a pthread_cond_wait(), and - normally specifying a timeout of INFINITE would be fine. But the - first and second operations are not done atomically, so there is a - (small) risk that PulseEvent misses the WaitForSingleObject(). - In this case the process will just sleep a few milliseconds. */ - LeaveCriticalSection(&mutex_gil); - WaitForSingleObject(cond_gil, 15); - EnterCriticalSection(&mutex_gil); - - InterlockedDecrement(&pending_acquires); - return 1; +static inline void mutex_init(mutex_t *mutex) { + *mutex = CreateSemaphore(NULL, 1, 1, NULL); + if (*mutex == NULL) + gil_fatal("CreateSemaphore failed"); } -void RPyGilRelease(void) -{ - LeaveCriticalSection(&mutex_gil); - PulseEvent(cond_gil); +static inline void mutex_lock(mutex_t *mutex) { + WaitForSingleObject(*mutex, INFINITE); } -void RPyGilAcquire(void) +static inline void mutex_unlock(mutex_t *mutex) { + ReleaseSemaphore(*mutex, 1, NULL); +} + +static inline int mutex_lock_timeout(mutex_t *mutex, double delay) { - InterlockedIncrement(&pending_acquires); - EnterCriticalSection(&mutex_gil); - InterlockedDecrement(&pending_acquires); + DWORD result = WaitForSingleObject(*mutex, (DWORD)(delay * 1000.0 + 0.999)); + return (result != WAIT_TIMEOUT); } + +#define lock_test_and_set(ptr, value) InterlockedExchangeAcquire(ptr, value) +#define atomic_increment(ptr) InterlockedIncrement(ptr) +#define atomic_decrement(ptr) InterlockedDecrement(ptr) + +#define SAVE_ERRNO() int saved_errno = errno; \ + DWORD saved_lasterr = GetLastError() +#define RESTORE_ERRNO() errno = saved_errno; \ + SetLastError(saved_lasterr) + +#include "src/thread_gil.c" diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c --- a/rpython/translator/c/src/thread_pthread.c +++ b/rpython/translator/c/src/thread_pthread.c @@ -472,29 +472,7 @@ /* GIL code */ /************************************************************/ -#ifdef __llvm__ -# define HAS_ATOMIC_ADD -#endif - -#ifdef __GNUC__ -# if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) -# define HAS_ATOMIC_ADD -# endif -#endif - -#ifdef HAS_ATOMIC_ADD -# define atomic_add __sync_fetch_and_add -#else -# if defined(__amd64__) -# define atomic_add(ptr, value) asm volatile ("lock addq %0, %1" \ - : : "ri"(value), "m"(*(ptr)) : "memory") -# elif defined(__i386__) -# define atomic_add(ptr, value) asm volatile ("lock addl %0, %1" \ - : : "ri"(value), "m"(*(ptr)) : "memory") -# else -# error "Please use gcc >= 4.1 or write a custom 'asm' for your CPU." -# endif -#endif +#include #define ASSERT_STATUS(call) \ if (call != 0) { \ @@ -502,88 +480,44 @@ abort(); \ } -static void _debug_print(const char *msg) +static inline void timespec_add(struct timespec *t, double incr) { -#if 0 - int col = (int)pthread_self(); - col = 31 + ((col / 8) % 8); - fprintf(stderr, "\033[%dm%s\033[0m", col, msg); -#endif + /* assumes that "incr" is not too large, less than 1 second */ + long nsec = t->tv_nsec + (long)(incr * 1000000000.0); + if (nsec >= 1000000000) { + t->tv_sec += 1; + nsec -= 1000000000; + assert(nsec < 1000000000); + } + t->tv_nsec = nsec; } -static volatile long pending_acquires = -1; -static pthread_mutex_t mutex_gil; -static pthread_cond_t cond_gil; +typedef pthread_mutex_t mutex_t; -static void assert_has_the_gil(void) -{ -#ifdef RPY_ASSERT - assert(pthread_mutex_trylock(&mutex_gil) != 0); - assert(pending_acquires >= 0); -#endif +static inline void mutex_init(mutex_t *mutex) { + ASSERT_STATUS(pthread_mutex_init(mutex, pthread_mutexattr_default)); } - -long RPyGilAllocate(void) -{ - int status, error = 0; - _debug_print("RPyGilAllocate\n"); - pending_acquires = -1; - - status = pthread_mutex_init(&mutex_gil, - pthread_mutexattr_default); - CHECK_STATUS("pthread_mutex_init[GIL]"); - - status = pthread_cond_init(&cond_gil, - pthread_condattr_default); - CHECK_STATUS("pthread_cond_init[GIL]"); - - if (error == 0) { - pending_acquires = 0; - RPyGilAcquire(); - } - return (error == 0); +static inline void mutex_lock(mutex_t *mutex) { + ASSERT_STATUS(pthread_mutex_lock(mutex)); } - -long RPyGilYieldThread(void) -{ - /* can be called even before RPyGilAllocate(), but in this case, - pending_acquires will be -1 */ -#ifdef RPY_ASSERT - if (pending_acquires >= 0) - assert_has_the_gil(); -#endif - if (pending_acquires <= 0) +static inline void mutex_unlock(mutex_t *mutex) { + ASSERT_STATUS(pthread_mutex_unlock(mutex)); +} +static inline int mutex_lock_timeout(mutex_t *mutex, double delay) { + struct timespec t; + clock_gettime(CLOCK_REALTIME, &t); + timespec_add(&t, delay); + int error_from_timedlock = pthread_mutex_timedlock(mutex, &t); + if (error_from_timedlock == ETIMEDOUT) return 0; - atomic_add(&pending_acquires, 1L); - _debug_print("{"); - ASSERT_STATUS(pthread_cond_signal(&cond_gil)); - ASSERT_STATUS(pthread_cond_wait(&cond_gil, &mutex_gil)); - _debug_print("}"); - atomic_add(&pending_acquires, -1L); - assert_has_the_gil(); + ASSERT_STATUS(error_from_timedlock); return 1; } +#define lock_test_and_set(ptr, value) __sync_lock_test_and_set(ptr, value) +#define atomic_increment(ptr) __sync_fetch_and_add(ptr, 1) +#define atomic_decrement(ptr) __sync_fetch_and_sub(ptr, 1) -void RPyGilRelease(void) -{ - _debug_print("RPyGilRelease\n"); -#ifdef RPY_ASSERT - assert(pending_acquires >= 0); -#endif - assert_has_the_gil(); - ASSERT_STATUS(pthread_mutex_unlock(&mutex_gil)); - ASSERT_STATUS(pthread_cond_signal(&cond_gil)); -} +#define SAVE_ERRNO() int saved_errno = errno +#define RESTORE_ERRNO() errno = saved_errno -void RPyGilAcquire(void) -{ - _debug_print("about to RPyGilAcquire...\n"); -#ifdef RPY_ASSERT - assert(pending_acquires >= 0); -#endif - atomic_add(&pending_acquires, 1L); - ASSERT_STATUS(pthread_mutex_lock(&mutex_gil)); - atomic_add(&pending_acquires, -1L); - assert_has_the_gil(); - _debug_print("RPyGilAcquire\n"); -} +#include "src/thread_gil.c" From noreply at buildbot.pypy.org Wed Jun 25 21:35:53 2014 From: noreply at buildbot.pypy.org (arigo) Date: Wed, 25 Jun 2014 21:35:53 +0200 (CEST) Subject: [pypy-commit] pypy default: Document branch Message-ID: <20140625193553.0A8691D34B9@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72235:c76af9b7eea9 Date: 2014-06-25 21:35 +0200 http://bitbucket.org/pypy/pypy/changeset/c76af9b7eea9/ Log: Document branch diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -30,3 +30,12 @@ x86-64, the JIT backend has a special optimization that lets it emit directly a single MOV from a %gs- or %fs-based address. It seems actually to give a good boost in performance. + +.. branch: fast-gil +A faster way to handle the GIL, particularly in JIT code. The GIL is +now a composite of two concepts: a global number (it's just set from +1 to 0 and back around CALL_RELEASE_GIL), and a real mutex. If there +are threads waiting to acquire the GIL, one of them is actively +checking the global number every 0.1 ms to 1 ms. Overall, JIT loops +full of external function calls now run a bit faster (if no thread was +started yet), or a *lot* faster (if threads were started already). From noreply at buildbot.pypy.org Thu Jun 26 00:57:42 2014 From: noreply at buildbot.pypy.org (ltratt) Date: Thu, 26 Jun 2014 00:57:42 +0200 (CEST) Subject: [pypy-commit] pypy default: The UNIX file type is opaque. Message-ID: <20140625225742.62C121D2A7E@cobra.cs.uni-duesseldorf.de> Author: Laurence Tratt Branch: Changeset: r72236:324c9d701969 Date: 2014-06-25 23:55 +0100 http://bitbucket.org/pypy/pypy/changeset/324c9d701969/ Log: The UNIX file type is opaque. Unless an opaque pointer is used, RPython generates code which can call C functions which are really macros which expand to horrible things. Put another way: without this, things don't work on OpenBSD. diff --git a/rpython/rlib/rfile.py b/rpython/rlib/rfile.py --- a/rpython/rlib/rfile.py +++ b/rpython/rlib/rfile.py @@ -32,32 +32,32 @@ config = platform.configure(CConfig) OFF_T = config['off_t'] -FILE = lltype.Struct('FILE') # opaque type maybe +FILEP = rffi.COpaquePtr("FILE") -c_open = llexternal('fopen', [rffi.CCHARP, rffi.CCHARP], lltype.Ptr(FILE)) -c_close = llexternal('fclose', [lltype.Ptr(FILE)], rffi.INT, releasegil=False) +c_open = llexternal('fopen', [rffi.CCHARP, rffi.CCHARP], FILEP) +c_close = llexternal('fclose', [FILEP], rffi.INT, releasegil=False) c_fwrite = llexternal('fwrite', [rffi.CCHARP, rffi.SIZE_T, rffi.SIZE_T, - lltype.Ptr(FILE)], rffi.SIZE_T) + FILEP], rffi.SIZE_T) c_fread = llexternal('fread', [rffi.CCHARP, rffi.SIZE_T, rffi.SIZE_T, - lltype.Ptr(FILE)], rffi.SIZE_T) -c_feof = llexternal('feof', [lltype.Ptr(FILE)], rffi.INT) -c_ferror = llexternal('ferror', [lltype.Ptr(FILE)], rffi.INT) -c_clearerror = llexternal('clearerr', [lltype.Ptr(FILE)], lltype.Void) -c_fseek = llexternal('fseek', [lltype.Ptr(FILE), rffi.LONG, rffi.INT], + FILEP], rffi.SIZE_T) +c_feof = llexternal('feof', [FILEP], rffi.INT) +c_ferror = llexternal('ferror', [FILEP], rffi.INT) +c_clearerror = llexternal('clearerr', [FILEP], lltype.Void) +c_fseek = llexternal('fseek', [FILEP, rffi.LONG, rffi.INT], rffi.INT) -c_tmpfile = llexternal('tmpfile', [], lltype.Ptr(FILE)) -c_fileno = llexternal(fileno, [lltype.Ptr(FILE)], rffi.INT) +c_tmpfile = llexternal('tmpfile', [], FILEP) +c_fileno = llexternal(fileno, [FILEP], rffi.INT) c_fdopen = llexternal(('_' if os.name == 'nt' else '') + 'fdopen', - [rffi.INT, rffi.CCHARP], lltype.Ptr(FILE)) -c_ftell = llexternal('ftell', [lltype.Ptr(FILE)], rffi.LONG) -c_fflush = llexternal('fflush', [lltype.Ptr(FILE)], rffi.INT) + [rffi.INT, rffi.CCHARP], FILEP) +c_ftell = llexternal('ftell', [FILEP], rffi.LONG) +c_fflush = llexternal('fflush', [FILEP], rffi.INT) c_ftruncate = llexternal(ftruncate, [rffi.INT, OFF_T], rffi.INT, macro=True) -c_fgets = llexternal('fgets', [rffi.CCHARP, rffi.INT, lltype.Ptr(FILE)], +c_fgets = llexternal('fgets', [rffi.CCHARP, rffi.INT, FILEP], rffi.CCHARP) -c_popen = llexternal('popen', [rffi.CCHARP, rffi.CCHARP], lltype.Ptr(FILE)) -c_pclose = llexternal('pclose', [lltype.Ptr(FILE)], rffi.INT, releasegil=False) +c_popen = llexternal('popen', [rffi.CCHARP, rffi.CCHARP], FILEP) +c_pclose = llexternal('pclose', [FILEP], rffi.INT, releasegil=False) BASE_BUF_SIZE = 4096 BASE_LINE_SIZE = 100 @@ -157,7 +157,7 @@ ll_f = self.ll_file if ll_f: # double close is allowed - self.ll_file = lltype.nullptr(FILE) + self.ll_file = lltype.nullptr(FILEP.TO) res = self._do_close(ll_f) if res == -1: errno = rposix.get_errno() From noreply at buildbot.pypy.org Thu Jun 26 10:19:33 2014 From: noreply at buildbot.pypy.org (arigo) Date: Thu, 26 Jun 2014 10:19:33 +0200 (CEST) Subject: [pypy-commit] pypy default: Shot in the dark, but this might help on *BSD platforms. Message-ID: <20140626081933.9CE671C12F0@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72237:288d756d83a5 Date: 2014-06-26 10:18 +0200 http://bitbucket.org/pypy/pypy/changeset/288d756d83a5/ Log: Shot in the dark, but this might help on *BSD platforms. diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c --- a/rpython/translator/c/src/thread_pthread.c +++ b/rpython/translator/c/src/thread_pthread.c @@ -332,6 +332,9 @@ p->locked = was_locked; p = next; } + /* Also reinitialize the 'mutex_gil' mutexes, and resets the + number of other waiting threads to zero. */ + RPyGilAllocate(); } int RPyThreadLockInit(struct RPyOpaque_ThreadLock *lock) From noreply at buildbot.pypy.org Thu Jun 26 10:58:11 2014 From: noreply at buildbot.pypy.org (arigo) Date: Thu, 26 Jun 2014 10:58:11 +0200 (CEST) Subject: [pypy-commit] pypy default: Fix the variable name: no longer necessarily a string Message-ID: <20140626085811.D61031D236F@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72238:2e180d0ed7a4 Date: 2014-06-26 10:57 +0200 http://bitbucket.org/pypy/pypy/changeset/2e180d0ed7a4/ Log: Fix the variable name: no longer necessarily a string diff --git a/pypy/module/_cffi_backend/ctypefunc.py b/pypy/module/_cffi_backend/ctypefunc.py --- a/pypy/module/_cffi_backend/ctypefunc.py +++ b/pypy/module/_cffi_backend/ctypefunc.py @@ -156,8 +156,8 @@ data = rffi.ptradd(buffer, cif_descr.exchange_args[i]) flag = get_mustfree_flag(data) if flag == 1: - raw_string = rffi.cast(rffi.CCHARPP, data)[0] - lltype.free(raw_string, flavor='raw') + raw_cdata = rffi.cast(rffi.CCHARPP, data)[0] + lltype.free(raw_cdata, flavor='raw') lltype.free(buffer, flavor='raw') return w_res From noreply at buildbot.pypy.org Thu Jun 26 12:38:36 2014 From: noreply at buildbot.pypy.org (arigo) Date: Thu, 26 Jun 2014 12:38:36 +0200 (CEST) Subject: [pypy-commit] pypy jit-get-errno: A branch to add (again Linux-only) JIT support for get_errno() and set_errno(). Message-ID: <20140626103836.D04AF1D2D55@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: jit-get-errno Changeset: r72239:75b357f02e82 Date: 2014-06-26 10:59 +0200 http://bitbucket.org/pypy/pypy/changeset/75b357f02e82/ Log: A branch to add (again Linux-only) JIT support for get_errno() and set_errno(). From noreply at buildbot.pypy.org Thu Jun 26 12:38:38 2014 From: noreply at buildbot.pypy.org (arigo) Date: Thu, 26 Jun 2014 12:38:38 +0200 (CEST) Subject: [pypy-commit] pypy jit-get-errno: Add the oopspecs Message-ID: <20140626103838.1F9E11D2D55@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: jit-get-errno Changeset: r72240:029578092143 Date: 2014-06-26 11:18 +0200 http://bitbucket.org/pypy/pypy/changeset/029578092143/ Log: Add the oopspecs diff --git a/rpython/jit/codewriter/effectinfo.py b/rpython/jit/codewriter/effectinfo.py --- a/rpython/jit/codewriter/effectinfo.py +++ b/rpython/jit/codewriter/effectinfo.py @@ -23,6 +23,8 @@ OS_SHRINK_ARRAY = 3 # rgc.ll_shrink_array OS_DICT_LOOKUP = 4 # ll_dict_lookup OS_THREADLOCALREF_GET = 5 # llop.threadlocalref_get + OS_GET_ERRNO = 6 # rposix.get_errno + OS_SET_ERRNO = 7 # rposix.set_errno # OS_STR_CONCAT = 22 # "stroruni.concat" OS_STR_SLICE = 23 # "stroruni.slice" diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py --- a/rpython/jit/codewriter/jtransform.py +++ b/rpython/jit/codewriter/jtransform.py @@ -438,6 +438,8 @@ elif oopspec_name.endswith('dict.lookup'): # also ordereddict.lookup prepare = self._handle_dict_lookup_call + elif oopspec_name.startswith('rposix.'): + prepare = self._handle_rposix_call else: prepare = self.prepare_builtin_call try: @@ -1898,6 +1900,16 @@ else: raise NotImplementedError(oopspec_name) + def _handle_rposix_call(self, op, oopspec_name, args): + if oopspec_name == 'rposix.get_errno': + return self._handle_oopspec_call(op, args, EffectInfo.OS_GET_ERRNO, + EffectInfo.EF_CANNOT_RAISE) + elif oopspec_name == 'rposix.set_errno': + return self._handle_oopspec_call(op, args, EffectInfo.OS_SET_ERRNO, + EffectInfo.EF_CANNOT_RAISE) + else: + raise NotImplementedError(oopspec_name) + def rewrite_op_jit_force_quasi_immutable(self, op): v_inst, c_fieldname = op.args descr1 = self.cpu.fielddescrof(v_inst.concretetype.TO, diff --git a/rpython/jit/codewriter/test/test_jtransform.py b/rpython/jit/codewriter/test/test_jtransform.py --- a/rpython/jit/codewriter/test/test_jtransform.py +++ b/rpython/jit/codewriter/test/test_jtransform.py @@ -148,6 +148,8 @@ EI.OS_RAW_MALLOC_VARSIZE_CHAR: ([INT], ARRAYPTR), EI.OS_RAW_FREE: ([ARRAYPTR], lltype.Void), EI.OS_THREADLOCALREF_GET: ([], rclass.OBJECTPTR), + EI.OS_GET_ERRNO: ([], INT), + EI.OS_SET_ERRNO: ([INT], lltype.Void), } argtypes = argtypes[oopspecindex] assert argtypes[0] == [v.concretetype for v in op.args[1:]] @@ -156,7 +158,9 @@ assert extraeffect == EI.EF_ELIDABLE_CAN_RAISE elif oopspecindex == EI.OS_RAW_MALLOC_VARSIZE_CHAR: assert extraeffect == EI.EF_CAN_RAISE - elif oopspecindex == EI.OS_RAW_FREE: + elif oopspecindex in (EI.OS_RAW_FREE, + EI.OS_GET_ERRNO, + EI.OS_SET_ERRNO): assert extraeffect == EI.EF_CANNOT_RAISE elif oopspecindex == EI.OS_THREADLOCALREF_GET: assert extraeffect == EI.EF_LOOPINVARIANT @@ -1320,6 +1324,38 @@ assert op0.args[2] == 'calldescr-%d' % OS_THREADLOCALREF_GET assert op0.result == v2 +def test_get_errno(): + # test that the oopspec is present and correctly transformed + from rpython.rlib import rposix + FUNC = lltype.FuncType([], lltype.Signed) + func = lltype.functionptr(FUNC, 'get_errno', _callable=rposix.get_errno) + v3 = varoftype(lltype.Signed) + op = SpaceOperation('direct_call', [const(func)], v3) + tr = Transformer(FakeCPU(), FakeBuiltinCallControl()) + op1 = tr.rewrite_operation(op) + assert op1.opname == 'residual_call_r_i' + assert op1.args[0].value == func + assert op1.args[1] == ListOfKind('ref', []) + assert op1.args[2] == 'calldescr-%d' % effectinfo.EffectInfo.OS_GET_ERRNO + assert op1.result == v3 + +def test_set_errno(): + # test that the oopspec is present and correctly transformed + from rpython.rlib import rposix + FUNC = lltype.FuncType([lltype.Signed], lltype.Void) + func = lltype.functionptr(FUNC, 'set_errno', _callable=rposix.set_errno) + v1 = varoftype(lltype.Signed) + v3 = varoftype(lltype.Void) + op = SpaceOperation('direct_call', [const(func), v1], v3) + tr = Transformer(FakeCPU(), FakeBuiltinCallControl()) + op1 = tr.rewrite_operation(op) + assert op1.opname == 'residual_call_ir_v' + assert op1.args[0].value == func + assert op1.args[1] == ListOfKind('int', [v1]) + assert op1.args[2] == ListOfKind('ref', []) + assert op1.args[3] == 'calldescr-%d' % effectinfo.EffectInfo.OS_SET_ERRNO + assert op1.result == v3 + def test_unknown_operation(): op = SpaceOperation('foobar', [], varoftype(lltype.Void)) tr = Transformer() diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py --- a/rpython/rlib/rposix.py +++ b/rpython/rlib/rposix.py @@ -98,9 +98,11 @@ # the default wrapper for set_errno is not suitable for use in critical places # like around GIL handling logic, so we provide our own wrappers. + at jit.oopspec("rposix.get_errno()") def get_errno(): return intmask(_get_errno()) + at jit.oopspec("rposix.set_errno(errno)") def set_errno(errno): _set_errno(rffi.cast(INT, errno)) From noreply at buildbot.pypy.org Thu Jun 26 12:38:39 2014 From: noreply at buildbot.pypy.org (arigo) Date: Thu, 26 Jun 2014 12:38:39 +0200 (CEST) Subject: [pypy-commit] pypy jit-get-errno: Speed up the reads and writes of 'errno'. Message-ID: <20140626103839.937341D2D55@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: jit-get-errno Changeset: r72241:c8b2c8ee757a Date: 2014-06-26 12:35 +0200 http://bitbucket.org/pypy/pypy/changeset/c8b2c8ee757a/ Log: Speed up the reads and writes of 'errno'. diff --git a/rpython/jit/backend/llsupport/test/ztranslation_test.py b/rpython/jit/backend/llsupport/test/ztranslation_test.py --- a/rpython/jit/backend/llsupport/test/ztranslation_test.py +++ b/rpython/jit/backend/llsupport/test/ztranslation_test.py @@ -3,7 +3,7 @@ from rpython.rlib.jit import JitDriver, unroll_parameters, set_param from rpython.rlib.jit import PARAMETERS, dont_look_inside from rpython.rlib.jit import promote -from rpython.rlib import jit_hooks +from rpython.rlib import jit_hooks, rposix from rpython.rlib.objectmodel import keepalive_until_here from rpython.rlib.rthread import ThreadLocalReference from rpython.jit.backend.detect_cpu import getcpuclass @@ -24,6 +24,7 @@ # - full optimizer # - floats neg and abs # - threadlocalref_get + # - get_errno, set_errno class Frame(object): _virtualizable_ = ['i'] @@ -64,6 +65,8 @@ if k - abs(j): raise ValueError if k - abs(-j): raise ValueError if t.get().nine != 9: raise ValueError + rposix.set_errno(total) + if rposix.get_errno() != total: raise ValueError return chr(total % 253) # from rpython.rtyper.lltypesystem import lltype, rffi diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -2325,12 +2325,38 @@ ed = effectinfo.extradescrs[0] assert isinstance(ed, ThreadLocalRefDescr) addr1 = rffi.cast(lltype.Signed, ed.get_tlref_addr()) + # 'addr1' is the address is the current thread, but we assume that + # it is a thread-local at a constant offset from %fs/%gs. addr0 = stmtlocal.threadlocal_base() addr = addr1 - addr0 assert rx86.fits_in_32bits(addr) mc = self.mc - mc.writechar(stmtlocal.SEGMENT_TL) # prefix - mc.MOV_rj(resloc.value, addr) + mc.writechar(stmtlocal.SEGMENT_TL) # prefix: %fs or %gs + mc.MOV_rj(resloc.value, addr) # memory read + + def get_set_errno(self, op, loc, issue_a_write): + # this function is only called on Linux + from rpython.jit.backend.x86 import stmtlocal + addr = stmtlocal.get_errno_tl() + assert rx86.fits_in_32bits(addr) + mc = self.mc + mc.writechar(stmtlocal.SEGMENT_TL) # prefix: %fs or %gs + # !!important: the *next* instruction must be the one using 'addr'!! + if issue_a_write: + if isinstance(loc, RegLoc): + mc.MOV32_jr(addr, loc.value) # memory write from reg + else: + assert isinstance(loc, ImmedLoc) + newvalue = loc.value + newvalue = rffi.cast(rffi.INT, newvalue) + newvalue = rffi.cast(lltype.Signed, newvalue) + mc.MOV32_ji(addr, newvalue) # memory write immediate + else: + assert isinstance(loc, RegLoc) + if IS_X86_32: + mc.MOV_rj(loc.value, addr) # memory read + elif IS_X86_64: + mc.MOVSX32_rj(loc.value, addr) # memory read, sign-extend genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -693,6 +693,7 @@ self.perform_math(op, [loc0], loc0) TLREF_SUPPORT = sys.platform.startswith('linux') + ERRNO_SUPPORT = sys.platform.startswith('linux') def _consider_threadlocalref_get(self, op): if self.TLREF_SUPPORT: @@ -701,6 +702,22 @@ else: self._consider_call(op) + def _consider_get_errno(self, op): + if self.ERRNO_SUPPORT: + resloc = self.force_allocate_reg(op.result) + self.assembler.get_set_errno(op, resloc, issue_a_write=False) + else: + self._consider_call(op) + + def _consider_set_errno(self, op): + if self.ERRNO_SUPPORT: + # op.getarg(0) is the function set_errno; op.getarg(1) is + # the new errno value + loc0 = self.rm.make_sure_var_in_reg(op.getarg(1)) + self.assembler.get_set_errno(op, loc0, issue_a_write=True) + else: + self._consider_call(op) + def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None): # we need to save registers on the stack: # @@ -780,6 +797,10 @@ return self._consider_math_sqrt(op) if oopspecindex == EffectInfo.OS_THREADLOCALREF_GET: return self._consider_threadlocalref_get(op) + if oopspecindex == EffectInfo.OS_GET_ERRNO: + return self._consider_get_errno(op) + if oopspecindex == EffectInfo.OS_SET_ERRNO: + return self._consider_set_errno(op) self._consider_call(op) def consider_call_may_force(self, op, guard_op): diff --git a/rpython/jit/backend/x86/stmtlocal.py b/rpython/jit/backend/x86/stmtlocal.py --- a/rpython/jit/backend/x86/stmtlocal.py +++ b/rpython/jit/backend/x86/stmtlocal.py @@ -21,6 +21,10 @@ asm("%s" : "=r"(result)); return result; } +static long pypy__get_errno_tl(void) +{ + return ((long)&errno) - pypy__threadlocal_base(); +} ''' % _instruction]) @@ -30,3 +34,10 @@ compilation_info=eci, _nowrapper=True, ) #transactionsafe=True) + +get_errno_tl = rffi.llexternal( + 'pypy__get_errno_tl', + [], lltype.Signed, + compilation_info=eci, + _nowrapper=True, + ) #transactionsafe=True) From noreply at buildbot.pypy.org Thu Jun 26 12:38:40 2014 From: noreply at buildbot.pypy.org (arigo) Date: Thu, 26 Jun 2014 12:38:40 +0200 (CEST) Subject: [pypy-commit] pypy jit-get-errno: Ready for merge Message-ID: <20140626103840.C95711D2D55@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: jit-get-errno Changeset: r72242:a7f15cf97f53 Date: 2014-06-26 12:36 +0200 http://bitbucket.org/pypy/pypy/changeset/a7f15cf97f53/ Log: Ready for merge From noreply at buildbot.pypy.org Thu Jun 26 12:38:42 2014 From: noreply at buildbot.pypy.org (arigo) Date: Thu, 26 Jun 2014 12:38:42 +0200 (CEST) Subject: [pypy-commit] pypy default: hg merge jit-get-errno Message-ID: <20140626103842.273531D2D55@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72243:37c876808e80 Date: 2014-06-26 12:36 +0200 http://bitbucket.org/pypy/pypy/changeset/37c876808e80/ Log: hg merge jit-get-errno Optimize the errno handling in the JIT, notably around external function calls. Linux-only. diff --git a/rpython/jit/backend/llsupport/test/ztranslation_test.py b/rpython/jit/backend/llsupport/test/ztranslation_test.py --- a/rpython/jit/backend/llsupport/test/ztranslation_test.py +++ b/rpython/jit/backend/llsupport/test/ztranslation_test.py @@ -3,7 +3,7 @@ from rpython.rlib.jit import JitDriver, unroll_parameters, set_param from rpython.rlib.jit import PARAMETERS, dont_look_inside from rpython.rlib.jit import promote -from rpython.rlib import jit_hooks +from rpython.rlib import jit_hooks, rposix from rpython.rlib.objectmodel import keepalive_until_here from rpython.rlib.rthread import ThreadLocalReference from rpython.jit.backend.detect_cpu import getcpuclass @@ -24,6 +24,7 @@ # - full optimizer # - floats neg and abs # - threadlocalref_get + # - get_errno, set_errno class Frame(object): _virtualizable_ = ['i'] @@ -64,6 +65,8 @@ if k - abs(j): raise ValueError if k - abs(-j): raise ValueError if t.get().nine != 9: raise ValueError + rposix.set_errno(total) + if rposix.get_errno() != total: raise ValueError return chr(total % 253) # from rpython.rtyper.lltypesystem import lltype, rffi diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -2325,12 +2325,38 @@ ed = effectinfo.extradescrs[0] assert isinstance(ed, ThreadLocalRefDescr) addr1 = rffi.cast(lltype.Signed, ed.get_tlref_addr()) + # 'addr1' is the address is the current thread, but we assume that + # it is a thread-local at a constant offset from %fs/%gs. addr0 = stmtlocal.threadlocal_base() addr = addr1 - addr0 assert rx86.fits_in_32bits(addr) mc = self.mc - mc.writechar(stmtlocal.SEGMENT_TL) # prefix - mc.MOV_rj(resloc.value, addr) + mc.writechar(stmtlocal.SEGMENT_TL) # prefix: %fs or %gs + mc.MOV_rj(resloc.value, addr) # memory read + + def get_set_errno(self, op, loc, issue_a_write): + # this function is only called on Linux + from rpython.jit.backend.x86 import stmtlocal + addr = stmtlocal.get_errno_tl() + assert rx86.fits_in_32bits(addr) + mc = self.mc + mc.writechar(stmtlocal.SEGMENT_TL) # prefix: %fs or %gs + # !!important: the *next* instruction must be the one using 'addr'!! + if issue_a_write: + if isinstance(loc, RegLoc): + mc.MOV32_jr(addr, loc.value) # memory write from reg + else: + assert isinstance(loc, ImmedLoc) + newvalue = loc.value + newvalue = rffi.cast(rffi.INT, newvalue) + newvalue = rffi.cast(lltype.Signed, newvalue) + mc.MOV32_ji(addr, newvalue) # memory write immediate + else: + assert isinstance(loc, RegLoc) + if IS_X86_32: + mc.MOV_rj(loc.value, addr) # memory read + elif IS_X86_64: + mc.MOVSX32_rj(loc.value, addr) # memory read, sign-extend genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -693,6 +693,7 @@ self.perform_math(op, [loc0], loc0) TLREF_SUPPORT = sys.platform.startswith('linux') + ERRNO_SUPPORT = sys.platform.startswith('linux') def _consider_threadlocalref_get(self, op): if self.TLREF_SUPPORT: @@ -701,6 +702,22 @@ else: self._consider_call(op) + def _consider_get_errno(self, op): + if self.ERRNO_SUPPORT: + resloc = self.force_allocate_reg(op.result) + self.assembler.get_set_errno(op, resloc, issue_a_write=False) + else: + self._consider_call(op) + + def _consider_set_errno(self, op): + if self.ERRNO_SUPPORT: + # op.getarg(0) is the function set_errno; op.getarg(1) is + # the new errno value + loc0 = self.rm.make_sure_var_in_reg(op.getarg(1)) + self.assembler.get_set_errno(op, loc0, issue_a_write=True) + else: + self._consider_call(op) + def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None): # we need to save registers on the stack: # @@ -780,6 +797,10 @@ return self._consider_math_sqrt(op) if oopspecindex == EffectInfo.OS_THREADLOCALREF_GET: return self._consider_threadlocalref_get(op) + if oopspecindex == EffectInfo.OS_GET_ERRNO: + return self._consider_get_errno(op) + if oopspecindex == EffectInfo.OS_SET_ERRNO: + return self._consider_set_errno(op) self._consider_call(op) def consider_call_may_force(self, op, guard_op): diff --git a/rpython/jit/backend/x86/stmtlocal.py b/rpython/jit/backend/x86/stmtlocal.py --- a/rpython/jit/backend/x86/stmtlocal.py +++ b/rpython/jit/backend/x86/stmtlocal.py @@ -21,6 +21,10 @@ asm("%s" : "=r"(result)); return result; } +static long pypy__get_errno_tl(void) +{ + return ((long)&errno) - pypy__threadlocal_base(); +} ''' % _instruction]) @@ -30,3 +34,10 @@ compilation_info=eci, _nowrapper=True, ) #transactionsafe=True) + +get_errno_tl = rffi.llexternal( + 'pypy__get_errno_tl', + [], lltype.Signed, + compilation_info=eci, + _nowrapper=True, + ) #transactionsafe=True) diff --git a/rpython/jit/codewriter/effectinfo.py b/rpython/jit/codewriter/effectinfo.py --- a/rpython/jit/codewriter/effectinfo.py +++ b/rpython/jit/codewriter/effectinfo.py @@ -23,6 +23,8 @@ OS_SHRINK_ARRAY = 3 # rgc.ll_shrink_array OS_DICT_LOOKUP = 4 # ll_dict_lookup OS_THREADLOCALREF_GET = 5 # llop.threadlocalref_get + OS_GET_ERRNO = 6 # rposix.get_errno + OS_SET_ERRNO = 7 # rposix.set_errno # OS_STR_CONCAT = 22 # "stroruni.concat" OS_STR_SLICE = 23 # "stroruni.slice" diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py --- a/rpython/jit/codewriter/jtransform.py +++ b/rpython/jit/codewriter/jtransform.py @@ -438,6 +438,8 @@ elif oopspec_name.endswith('dict.lookup'): # also ordereddict.lookup prepare = self._handle_dict_lookup_call + elif oopspec_name.startswith('rposix.'): + prepare = self._handle_rposix_call else: prepare = self.prepare_builtin_call try: @@ -1898,6 +1900,16 @@ else: raise NotImplementedError(oopspec_name) + def _handle_rposix_call(self, op, oopspec_name, args): + if oopspec_name == 'rposix.get_errno': + return self._handle_oopspec_call(op, args, EffectInfo.OS_GET_ERRNO, + EffectInfo.EF_CANNOT_RAISE) + elif oopspec_name == 'rposix.set_errno': + return self._handle_oopspec_call(op, args, EffectInfo.OS_SET_ERRNO, + EffectInfo.EF_CANNOT_RAISE) + else: + raise NotImplementedError(oopspec_name) + def rewrite_op_jit_force_quasi_immutable(self, op): v_inst, c_fieldname = op.args descr1 = self.cpu.fielddescrof(v_inst.concretetype.TO, diff --git a/rpython/jit/codewriter/test/test_jtransform.py b/rpython/jit/codewriter/test/test_jtransform.py --- a/rpython/jit/codewriter/test/test_jtransform.py +++ b/rpython/jit/codewriter/test/test_jtransform.py @@ -148,6 +148,8 @@ EI.OS_RAW_MALLOC_VARSIZE_CHAR: ([INT], ARRAYPTR), EI.OS_RAW_FREE: ([ARRAYPTR], lltype.Void), EI.OS_THREADLOCALREF_GET: ([], rclass.OBJECTPTR), + EI.OS_GET_ERRNO: ([], INT), + EI.OS_SET_ERRNO: ([INT], lltype.Void), } argtypes = argtypes[oopspecindex] assert argtypes[0] == [v.concretetype for v in op.args[1:]] @@ -156,7 +158,9 @@ assert extraeffect == EI.EF_ELIDABLE_CAN_RAISE elif oopspecindex == EI.OS_RAW_MALLOC_VARSIZE_CHAR: assert extraeffect == EI.EF_CAN_RAISE - elif oopspecindex == EI.OS_RAW_FREE: + elif oopspecindex in (EI.OS_RAW_FREE, + EI.OS_GET_ERRNO, + EI.OS_SET_ERRNO): assert extraeffect == EI.EF_CANNOT_RAISE elif oopspecindex == EI.OS_THREADLOCALREF_GET: assert extraeffect == EI.EF_LOOPINVARIANT @@ -1320,6 +1324,38 @@ assert op0.args[2] == 'calldescr-%d' % OS_THREADLOCALREF_GET assert op0.result == v2 +def test_get_errno(): + # test that the oopspec is present and correctly transformed + from rpython.rlib import rposix + FUNC = lltype.FuncType([], lltype.Signed) + func = lltype.functionptr(FUNC, 'get_errno', _callable=rposix.get_errno) + v3 = varoftype(lltype.Signed) + op = SpaceOperation('direct_call', [const(func)], v3) + tr = Transformer(FakeCPU(), FakeBuiltinCallControl()) + op1 = tr.rewrite_operation(op) + assert op1.opname == 'residual_call_r_i' + assert op1.args[0].value == func + assert op1.args[1] == ListOfKind('ref', []) + assert op1.args[2] == 'calldescr-%d' % effectinfo.EffectInfo.OS_GET_ERRNO + assert op1.result == v3 + +def test_set_errno(): + # test that the oopspec is present and correctly transformed + from rpython.rlib import rposix + FUNC = lltype.FuncType([lltype.Signed], lltype.Void) + func = lltype.functionptr(FUNC, 'set_errno', _callable=rposix.set_errno) + v1 = varoftype(lltype.Signed) + v3 = varoftype(lltype.Void) + op = SpaceOperation('direct_call', [const(func), v1], v3) + tr = Transformer(FakeCPU(), FakeBuiltinCallControl()) + op1 = tr.rewrite_operation(op) + assert op1.opname == 'residual_call_ir_v' + assert op1.args[0].value == func + assert op1.args[1] == ListOfKind('int', [v1]) + assert op1.args[2] == ListOfKind('ref', []) + assert op1.args[3] == 'calldescr-%d' % effectinfo.EffectInfo.OS_SET_ERRNO + assert op1.result == v3 + def test_unknown_operation(): op = SpaceOperation('foobar', [], varoftype(lltype.Void)) tr = Transformer() diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py --- a/rpython/rlib/rposix.py +++ b/rpython/rlib/rposix.py @@ -98,9 +98,11 @@ # the default wrapper for set_errno is not suitable for use in critical places # like around GIL handling logic, so we provide our own wrappers. + at jit.oopspec("rposix.get_errno()") def get_errno(): return intmask(_get_errno()) + at jit.oopspec("rposix.set_errno(errno)") def set_errno(errno): _set_errno(rffi.cast(INT, errno)) From noreply at buildbot.pypy.org Thu Jun 26 12:38:43 2014 From: noreply at buildbot.pypy.org (arigo) Date: Thu, 26 Jun 2014 12:38:43 +0200 (CEST) Subject: [pypy-commit] pypy default: Document branch Message-ID: <20140626103843.748DB1D2D55@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72244:b756870f257e Date: 2014-06-26 12:37 +0200 http://bitbucket.org/pypy/pypy/changeset/b756870f257e/ Log: Document branch diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -39,3 +39,7 @@ checking the global number every 0.1 ms to 1 ms. Overall, JIT loops full of external function calls now run a bit faster (if no thread was started yet), or a *lot* faster (if threads were started already). + +.. branch: jit-get-errno +Optimize the errno handling in the JIT, notably around external +function calls. Linux-only. From noreply at buildbot.pypy.org Thu Jun 26 14:30:28 2014 From: noreply at buildbot.pypy.org (arigo) Date: Thu, 26 Jun 2014 14:30:28 +0200 (CEST) Subject: [pypy-commit] pypy default: Update comment Message-ID: <20140626123028.4A08C1C33EC@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72245:8a9c64ba35dc Date: 2014-06-26 14:29 +0200 http://bitbucket.org/pypy/pypy/changeset/8a9c64ba35dc/ Log: Update comment diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c --- a/rpython/translator/c/src/thread_gil.c +++ b/rpython/translator/c/src/thread_gil.c @@ -54,10 +54,10 @@ { /* Acquires the GIL. - XXX Note: this function saves and restores 'errno'. This is - needed for now because it may be *followed* by reading the - 'errno', although it's kind of bogus: it should be read before - calling RPyGilAcquire(). + Note: in the slow path, this function saves and restores 'errno'. + This is needed for now because it may be *followed* by reading + the 'errno'. It's a bit strange, because we could read the errno + before calling RPyGilAcquire(), but it's simpler this way. */ long old_fastgil = lock_test_and_set(&rpy_fastgil, 1); From noreply at buildbot.pypy.org Thu Jun 26 20:57:00 2014 From: noreply at buildbot.pypy.org (rlamy) Date: Thu, 26 Jun 2014 20:57:00 +0200 (CEST) Subject: [pypy-commit] pypy scalar-operations: fix performance of ufunc(scalar, scalar) Message-ID: <20140626185700.D8F3C1C0026@cobra.cs.uni-duesseldorf.de> Author: Ronan Lamy Branch: scalar-operations Changeset: r72246:0620f0c12772 Date: 2014-06-26 18:10 +0100 http://bitbucket.org/pypy/pypy/changeset/0620f0c12772/ Log: fix performance of ufunc(scalar, scalar) diff --git a/pypy/module/micronumpy/base.py b/pypy/module/micronumpy/base.py --- a/pypy/module/micronumpy/base.py +++ b/pypy/module/micronumpy/base.py @@ -18,7 +18,12 @@ pass -class W_NDimArray(W_Root): +class W_NumpyObject(W_Root): + """Base class for ndarrays and scalars (aka boxes).""" + _attrs_ = [] + + +class W_NDimArray(W_NumpyObject): __metaclass__ = extendabletype def __init__(self, implementation): diff --git a/pypy/module/micronumpy/boxes.py b/pypy/module/micronumpy/boxes.py --- a/pypy/module/micronumpy/boxes.py +++ b/pypy/module/micronumpy/boxes.py @@ -1,4 +1,3 @@ -from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec from pypy.interpreter.mixedmodule import MixedModule @@ -14,7 +13,7 @@ from rpython.rtyper.lltypesystem import lltype, rffi from rpython.tool.sourcetools import func_with_new_name from pypy.module.micronumpy import constants as NPY -from pypy.module.micronumpy.base import W_NDimArray +from pypy.module.micronumpy.base import W_NDimArray, W_NumpyObject from pypy.module.micronumpy.concrete import VoidBoxStorage from pypy.module.micronumpy.flagsobj import W_FlagsObject @@ -126,7 +125,7 @@ return ret -class W_GenericBox(W_Root): +class W_GenericBox(W_NumpyObject): _attrs_ = ['w_flags'] def descr__new__(space, w_subtype, __args__): @@ -136,6 +135,12 @@ def get_dtype(self, space): return self._get_dtype(space) + def is_scalar(self): + return True + + def get_scalar_value(self): + return self + def item(self, space): return self.get_dtype(space).itemtype.to_builtin_type(space, self) diff --git a/pypy/module/micronumpy/ufuncs.py b/pypy/module/micronumpy/ufuncs.py --- a/pypy/module/micronumpy/ufuncs.py +++ b/pypy/module/micronumpy/ufuncs.py @@ -385,10 +385,15 @@ else: [w_lhs, w_rhs] = args_w w_out = None - w_lhs = convert_to_array(space, w_lhs) - w_rhs = convert_to_array(space, w_rhs) - w_ldtype = w_lhs.get_dtype() - w_rdtype = w_rhs.get_dtype() + if (isinstance(w_lhs, boxes.W_GenericBox) and + isinstance(w_rhs, boxes.W_GenericBox)): + w_ldtype = w_lhs.get_dtype(space) + w_rdtype = w_rhs.get_dtype(space) + else: + w_lhs = convert_to_array(space, w_lhs) + w_rhs = convert_to_array(space, w_rhs) + w_ldtype = w_lhs.get_dtype() + w_rdtype = w_rhs.get_dtype() if w_ldtype.is_str() and w_rdtype.is_str() and \ self.comparison_func: pass @@ -451,6 +456,8 @@ else: out = arr return out + assert isinstance(w_lhs, W_NDimArray) + assert isinstance(w_rhs, W_NDimArray) new_shape = shape_agreement(space, w_lhs.get_shape(), w_rhs) new_shape = shape_agreement(space, new_shape, out, broadcast_down=False) return loop.call2(space, new_shape, self.func, calc_dtype, From noreply at buildbot.pypy.org Thu Jun 26 22:06:12 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Thu, 26 Jun 2014 22:06:12 +0200 (CEST) Subject: [pypy-commit] pypy gc_no_cleanup_nursery: remove _clear() from gc functions and operations and passed test_gc_transform Message-ID: <20140626200612.BA9911D2A7B@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc_no_cleanup_nursery Changeset: r72247:9f27ec7bf56e Date: 2014-06-26 16:03 -0400 http://bitbucket.org/pypy/pypy/changeset/9f27ec7bf56e/ Log: remove _clear() from gc functions and operations and passed test_gc_transform diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py --- a/rpython/jit/backend/llsupport/gc.py +++ b/rpython/jit/backend/llsupport/gc.py @@ -410,7 +410,7 @@ if self.DEBUG: self._random_usage_of_xmm_registers() type_id = rffi.cast(llgroup.HALFWORD, 0) # missing here - return llop1.do_malloc_fixedsize_clear(llmemory.GCREF, + return llop1.do_malloc_fixedsize(llmemory.GCREF, type_id, size, False, False, False) @@ -423,7 +423,7 @@ assert num_elem >= 0, 'num_elem should be >= 0' type_id = llop.extract_ushort(llgroup.HALFWORD, tid) check_typeid(type_id) - return llop1.do_malloc_varsize_clear( + return llop1.do_malloc_varsize( llmemory.GCREF, type_id, num_elem, self.standard_array_basesize, itemsize, self.standard_array_length_ofs) @@ -437,7 +437,7 @@ occur e.g. with arrays of floats on Win32.""" type_id = llop.extract_ushort(llgroup.HALFWORD, tid) check_typeid(type_id) - return llop1.do_malloc_varsize_clear( + return llop1.do_malloc_varsize( llmemory.GCREF, type_id, num_elem, basesize, itemsize, lengthofs) self.generate_function('malloc_array_nonstandard', @@ -455,7 +455,7 @@ def malloc_str(length): type_id = llop.extract_ushort(llgroup.HALFWORD, str_type_id) - return llop1.do_malloc_varsize_clear( + return llop1.do_malloc_varsize( llmemory.GCREF, type_id, length, str_basesize, str_itemsize, str_ofs_length) @@ -464,7 +464,7 @@ def malloc_unicode(length): type_id = llop.extract_ushort(llgroup.HALFWORD, unicode_type_id) - return llop1.do_malloc_varsize_clear( + return llop1.do_malloc_varsize( llmemory.GCREF, type_id, length, unicode_basesize, unicode_itemsize, unicode_ofs_length) @@ -479,7 +479,7 @@ self._random_usage_of_xmm_registers() type_id = llop.extract_ushort(llgroup.HALFWORD, tid) check_typeid(type_id) - return llop1.do_malloc_fixedsize_clear(llmemory.GCREF, + return llop1.do_malloc_fixedsize(llmemory.GCREF, type_id, size, False, False, False) self.generate_function('malloc_big_fixedsize', malloc_big_fixedsize, @@ -490,7 +490,7 @@ llop1 = self.llop1 type_id = llop.extract_ushort(llgroup.HALFWORD, sizedescr.tid) check_typeid(type_id) - return llop1.do_malloc_fixedsize_clear(llmemory.GCREF, + return llop1.do_malloc_fixedsize(llmemory.GCREF, type_id, sizedescr.size, False, False, False) @@ -499,7 +499,7 @@ llop1 = self.llop1 type_id = llop.extract_ushort(llgroup.HALFWORD, arraydescr.tid) check_typeid(type_id) - return llop1.do_malloc_varsize_clear(llmemory.GCREF, + return llop1.do_malloc_varsize(llmemory.GCREF, type_id, num_elem, arraydescr.basesize, arraydescr.itemsize, diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -520,7 +520,7 @@ debug_stop("gc-debug") - def malloc_fixedsize_clear(self, typeid, size, + def malloc_fixedsize(self, typeid, size, needs_finalizer=False, is_finalizer_light=False, contains_weakptr=False): @@ -572,7 +572,7 @@ return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF) - def malloc_varsize_clear(self, typeid, length, size, itemsize, + def malloc_varsize(self, typeid, length, size, itemsize, offset_to_length): size_gc_header = self.gcheaderbuilder.size_gc_header nonvarsize = size_gc_header + size diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -273,16 +273,17 @@ s_gcref = SomePtr(llmemory.GCREF) gcdata = self.gcdata translator = self.translator + if hasattr(GCClass, 'malloc_fixedsize_clear'): + malloc_fixedsize_clear_meth = GCClass.malloc_fixedsize_clear.im_func + self.malloc_fixedsize_clear_ptr = getfn( + malloc_fixedsize_clear_meth, + [s_gc, s_typeid16, + annmodel.SomeInteger(nonneg=True), + annmodel.SomeBool(), + annmodel.SomeBool(), + annmodel.SomeBool()], s_gcref, + inline = False) - malloc_fixedsize_clear_meth = GCClass.malloc_fixedsize_clear.im_func - self.malloc_fixedsize_clear_ptr = getfn( - malloc_fixedsize_clear_meth, - [s_gc, s_typeid16, - annmodel.SomeInteger(nonneg=True), - annmodel.SomeBool(), - annmodel.SomeBool(), - annmodel.SomeBool()], s_gcref, - inline = False) if hasattr(GCClass, 'malloc_fixedsize'): malloc_fixedsize_meth = GCClass.malloc_fixedsize.im_func self.malloc_fixedsize_ptr = getfn( @@ -296,10 +297,16 @@ else: malloc_fixedsize_meth = None self.malloc_fixedsize_ptr = self.malloc_fixedsize_clear_ptr - self.malloc_varsize_clear_ptr = getfn( - GCClass.malloc_varsize_clear.im_func, - [s_gc, s_typeid16] - + [annmodel.SomeInteger(nonneg=True) for i in range(4)], s_gcref) + if hasattr(GCClass, 'malloc_varsize'): + self.malloc_varsize_ptr = getfn( + GCClass.malloc_varsize.im_func, + [s_gc, s_typeid16] + + [annmodel.SomeInteger(nonneg=True) for i in range(4)], s_gcref) + else: + self.malloc_varsize_ptr = getfn( + GCClass.malloc_varsize_clear.im_func, + [s_gc, s_typeid16] + + [annmodel.SomeInteger(nonneg=True) for i in range(4)], s_gcref) self.collect_ptr = getfn(GCClass.collect.im_func, [s_gc, annmodel.SomeInteger()], annmodel.s_None) self.can_move_ptr = getfn(GCClass.can_move.im_func, @@ -358,24 +365,28 @@ # in some GCs we can also inline the common case of # malloc_varsize(typeid, length, (3 constant sizes), True, False) + self.malloc_varsize_fast_ptr = None if getattr(GCClass, 'inline_simple_malloc_varsize', False): # make a copy of this function so that it gets annotated # independently and the constants are folded inside - malloc_varsize_clear_fast = func_with_new_name( - GCClass.malloc_varsize_clear.im_func, - "malloc_varsize_clear_fast") + if hasattr(GCClass, 'malloc_varsize'): + malloc_varsize_fast = func_with_new_name( + GCClass.malloc_varsize.im_func, + "malloc_varsize_fast") + elif hasattr(GCClass, 'malloc_varsize_clear'): + malloc_varsize_fast = func_with_new_name( + GCClass.malloc_varsize_clear.im_func, + "malloc_varsize_clear_fast") s_False = annmodel.SomeBool() s_False.const = False - self.malloc_varsize_clear_fast_ptr = getfn( - malloc_varsize_clear_fast, + self.malloc_varsize_fast_ptr = getfn( + malloc_varsize_fast, [s_gc, s_typeid16, - annmodel.SomeInteger(nonneg=True), - annmodel.SomeInteger(nonneg=True), - annmodel.SomeInteger(nonneg=True), - annmodel.SomeInteger(nonneg=True)], s_gcref, + annmodel.SomeInteger(nonneg=True), + annmodel.SomeInteger(nonneg=True), + annmodel.SomeInteger(nonneg=True), + annmodel.SomeInteger(nonneg=True)], s_gcref, inline = True) - else: - self.malloc_varsize_clear_fast_ptr = None if getattr(GCClass, 'malloc_varsize_nonmovable', False): malloc_nonmovable = func_with_new_name( @@ -654,14 +665,15 @@ has_light_finalizer) if not op.opname.endswith('_varsize') and not flags.get('varsize'): - #malloc_ptr = self.malloc_fixedsize_ptr zero = flags.get('zero', False) if (self.malloc_fast_ptr is not None and not c_has_finalizer.value and (self.malloc_fast_is_clearing or not zero)): malloc_ptr = self.malloc_fast_ptr - elif zero: - malloc_ptr = self.malloc_fixedsize_clear_ptr + + #elif zero: + #malloc_ptr = self.malloc_fixedsize_clear_ptr + else: malloc_ptr = self.malloc_fixedsize_ptr args = [self.c_const_gc, c_type_id, c_size, @@ -681,10 +693,10 @@ malloc_ptr = self.malloc_varsize_nonmovable_ptr args = [self.c_const_gc, c_type_id, v_length] else: - if self.malloc_varsize_clear_fast_ptr is not None: - malloc_ptr = self.malloc_varsize_clear_fast_ptr + if self.malloc_varsize_fast_ptr is not None: + malloc_ptr = self.malloc_varsize_fast_ptr else: - malloc_ptr = self.malloc_varsize_clear_ptr + malloc_ptr = self.malloc_varsize_ptr args = [self.c_const_gc, c_type_id, v_length, c_size, c_varitemsize, c_ofstolength] livevars = self.push_roots(hop) @@ -830,28 +842,28 @@ hop.genop("direct_call", [self.root_walker.gc_start_fresh_new_state_ptr]) - def gct_do_malloc_fixedsize_clear(self, hop): + def gct_do_malloc_fixedsize(self, hop): # used by the JIT (see rpython.jit.backend.llsupport.gc) op = hop.spaceop [v_typeid, v_size, v_has_finalizer, v_has_light_finalizer, v_contains_weakptr] = op.args livevars = self.push_roots(hop) hop.genop("direct_call", - [self.malloc_fixedsize_clear_ptr, self.c_const_gc, + [self.malloc_fixedsize_ptr, self.c_const_gc, v_typeid, v_size, v_has_finalizer, v_has_light_finalizer, v_contains_weakptr], resultvar=op.result) self.pop_roots(hop, livevars) - def gct_do_malloc_varsize_clear(self, hop): + def gct_do_malloc_varsize(self, hop): # used by the JIT (see rpython.jit.backend.llsupport.gc) op = hop.spaceop [v_typeid, v_length, v_size, v_itemsize, v_offset_to_length] = op.args livevars = self.push_roots(hop) hop.genop("direct_call", - [self.malloc_varsize_clear_ptr, self.c_const_gc, + [self.malloc_varsize_ptr, self.c_const_gc, v_typeid, v_length, v_size, v_itemsize, v_offset_to_length], resultvar=op.result) diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -450,8 +450,8 @@ 'jit_conditional_call': LLOp(), 'get_exception_addr': LLOp(), 'get_exc_value_addr': LLOp(), - 'do_malloc_fixedsize_clear':LLOp(canmallocgc=True), - 'do_malloc_varsize_clear': LLOp(canmallocgc=True), + 'do_malloc_fixedsize':LLOp(canmallocgc=True), + 'do_malloc_varsize': LLOp(canmallocgc=True), 'get_write_barrier_failing_case': LLOp(sideeffects=False), 'get_write_barrier_from_array_failing_case': LLOp(sideeffects=False), 'gc_get_type_info_group': LLOp(sideeffects=False), From noreply at buildbot.pypy.org Thu Jun 26 22:38:20 2014 From: noreply at buildbot.pypy.org (arigo) Date: Thu, 26 Jun 2014 22:38:20 +0200 (CEST) Subject: [pypy-commit] pypy.org extradoc: update the values Message-ID: <20140626203820.33DD31D2A7E@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: extradoc Changeset: r513:68e36f039aa9 Date: 2014-06-26 22:38 +0200 http://bitbucket.org/pypy/pypy.org/changeset/68e36f039aa9/ Log: update the values diff --git a/don1.html b/don1.html --- a/don1.html +++ b/don1.html @@ -9,13 +9,13 @@ - $51688 of $105000 (49.2%) + $51969 of $105000 (49.5%)
    diff --git a/don4.html b/don4.html --- a/don4.html +++ b/don4.html @@ -9,7 +9,7 @@ @@ -17,7 +17,7 @@ 2nd call: - $2829 of $80000 (3.5%) + $2959 of $80000 (3.7%)
    From noreply at buildbot.pypy.org Thu Jun 26 22:39:29 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Thu, 26 Jun 2014 22:39:29 +0200 (CEST) Subject: [pypy-commit] pypy gc_no_cleanup_nursery: fix the test_direct tests Message-ID: <20140626203929.156911C0026@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc_no_cleanup_nursery Changeset: r72248:8b1c02cea49c Date: 2014-06-26 16:37 -0400 http://bitbucket.org/pypy/pypy/changeset/8b1c02cea49c/ Log: fix the test_direct tests diff --git a/rpython/memory/gc/base.py b/rpython/memory/gc/base.py --- a/rpython/memory/gc/base.py +++ b/rpython/memory/gc/base.py @@ -149,14 +149,14 @@ assert not needs_finalizer itemsize = self.varsize_item_sizes(typeid) offset_to_length = self.varsize_offset_to_length(typeid) - if zero or not hasattr(self, 'malloc_varsize'): + if zero and not hasattr(self, 'malloc_varsize'): malloc_varsize = self.malloc_varsize_clear else: malloc_varsize = self.malloc_varsize ref = malloc_varsize(typeid, length, size, itemsize, offset_to_length) else: - if zero or not hasattr(self, 'malloc_fixedsize'): + if zero and not hasattr(self, 'malloc_fixedsize'): malloc_fixedsize = self.malloc_fixedsize_clear else: malloc_fixedsize = self.malloc_fixedsize diff --git a/rpython/memory/test/test_transformed_gc.py b/rpython/memory/test/test_transformed_gc.py --- a/rpython/memory/test/test_transformed_gc.py +++ b/rpython/memory/test/test_transformed_gc.py @@ -767,7 +767,7 @@ def g(): r = lltype.malloc(P) r.x = 1 - p = llop.do_malloc_fixedsize_clear(llmemory.GCREF) # placeholder + p = llop.do_malloc_fixedsize(llmemory.GCREF) # placeholder p = lltype.cast_opaque_ptr(lltype.Ptr(P), p) p.x = r.x return p.x @@ -794,10 +794,10 @@ type_id = layoutbuilder.get_type_id(P) # - # now fix the do_malloc_fixedsize_clear in the graph of g + # now fix the do_malloc_fixedsize in the graph of g graph = graphof(translator, g) for op in graph.startblock.operations: - if op.opname == 'do_malloc_fixedsize_clear': + if op.opname == 'do_malloc_fixedsize': op.args = [Constant(type_id, llgroup.HALFWORD), Constant(llmemory.sizeof(P), lltype.Signed), Constant(False, lltype.Bool), # has_finalizer @@ -815,7 +815,7 @@ def define_do_malloc_operations_in_call(cls): P = lltype.GcStruct('P', ('x', lltype.Signed)) def g(): - llop.do_malloc_fixedsize_clear(llmemory.GCREF) # placeholder + llop.do_malloc_fixedsize(llmemory.GCREF) # placeholder def f(): q = lltype.malloc(P) q.x = 1 @@ -831,10 +831,10 @@ layoutbuilder = cls.ensure_layoutbuilder(translator) type_id = layoutbuilder.get_type_id(P) # - # now fix the do_malloc_fixedsize_clear in the graph of g + # now fix the do_malloc_fixedsize in the graph of g graph = graphof(translator, g) for op in graph.startblock.operations: - if op.opname == 'do_malloc_fixedsize_clear': + if op.opname == 'do_malloc_fixedsize': op.args = [Constant(type_id, llgroup.HALFWORD), Constant(llmemory.sizeof(P), lltype.Signed), Constant(False, lltype.Bool), # has_finalizer From noreply at buildbot.pypy.org Fri Jun 27 00:03:20 2014 From: noreply at buildbot.pypy.org (wenzhuman) Date: Fri, 27 Jun 2014 00:03:20 +0200 (CEST) Subject: [pypy-commit] pypy gc_no_cleanup_nursery: remove the zero flag and passed all the tests under rpython/memory/test Message-ID: <20140626220320.72BF61D236F@cobra.cs.uni-duesseldorf.de> Author: wenzhuman Branch: gc_no_cleanup_nursery Changeset: r72249:96df1fcb42a2 Date: 2014-06-26 17:56 -0400 http://bitbucket.org/pypy/pypy/changeset/96df1fcb42a2/ Log: remove the zero flag and passed all the tests under rpython/memory/test diff --git a/rpython/memory/gc/base.py b/rpython/memory/gc/base.py --- a/rpython/memory/gc/base.py +++ b/rpython/memory/gc/base.py @@ -133,6 +133,8 @@ """For testing. The interface used by the gctransformer is the four malloc_[fixed,var]size[_clear]() functions. """ + #TODO:check if the zero flag is unuseful now. If so, remove it + # Rules about fallbacks in case of missing malloc methods: # * malloc_fixedsize_clear() and malloc_varsize_clear() are mandatory # * malloc_fixedsize() and malloc_varsize() fallback to the above @@ -149,14 +151,14 @@ assert not needs_finalizer itemsize = self.varsize_item_sizes(typeid) offset_to_length = self.varsize_offset_to_length(typeid) - if zero and not hasattr(self, 'malloc_varsize'): + if not hasattr(self, 'malloc_varsize'): malloc_varsize = self.malloc_varsize_clear else: malloc_varsize = self.malloc_varsize ref = malloc_varsize(typeid, length, size, itemsize, offset_to_length) else: - if zero and not hasattr(self, 'malloc_fixedsize'): + if not hasattr(self, 'malloc_fixedsize'): malloc_fixedsize = self.malloc_fixedsize_clear else: malloc_fixedsize = self.malloc_fixedsize diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -665,15 +665,12 @@ has_light_finalizer) if not op.opname.endswith('_varsize') and not flags.get('varsize'): + #TODO:check if it's safe to remove the zero-flag zero = flags.get('zero', False) if (self.malloc_fast_ptr is not None and not c_has_finalizer.value and (self.malloc_fast_is_clearing or not zero)): malloc_ptr = self.malloc_fast_ptr - - #elif zero: - #malloc_ptr = self.malloc_fixedsize_clear_ptr - else: malloc_ptr = self.malloc_fixedsize_ptr args = [self.c_const_gc, c_type_id, c_size, From noreply at buildbot.pypy.org Fri Jun 27 07:41:02 2014 From: noreply at buildbot.pypy.org (mattip) Date: Fri, 27 Jun 2014 07:41:02 +0200 (CEST) Subject: [pypy-commit] pypy default: attempt fixes for win32 Message-ID: <20140627054102.132CD1C305D@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: Changeset: r72250:a2dd3da64cac Date: 2014-06-27 08:40 +0300 http://bitbucket.org/pypy/pypy/changeset/a2dd3da64cac/ Log: attempt fixes for win32 diff --git a/rpython/translator/c/src/thread.h b/rpython/translator/c/src/thread.h --- a/rpython/translator/c/src/thread.h +++ b/rpython/translator/c/src/thread.h @@ -12,6 +12,7 @@ #ifdef _WIN32 #include "thread_nt.h" +#define inline _inline #else /* We should check if unistd.h defines _POSIX_THREADS, but sometimes diff --git a/rpython/translator/c/src/thread_nt.c b/rpython/translator/c/src/thread_nt.c --- a/rpython/translator/c/src/thread_nt.c +++ b/rpython/translator/c/src/thread_nt.c @@ -223,7 +223,12 @@ return (result != WAIT_TIMEOUT); } +#ifdef _M_IA64 +/* On Itanium, use 'acquire' memory ordering semantics */ #define lock_test_and_set(ptr, value) InterlockedExchangeAcquire(ptr, value) +#else +#define lock_test_and_set(ptr, value) InterlockedExchange(ptr, value) +#endif #define atomic_increment(ptr) InterlockedIncrement(ptr) #define atomic_decrement(ptr) InterlockedDecrement(ptr) From noreply at buildbot.pypy.org Fri Jun 27 11:59:03 2014 From: noreply at buildbot.pypy.org (arigo) Date: Fri, 27 Jun 2014 11:59:03 +0200 (CEST) Subject: [pypy-commit] pypy default: Test and fix for llexternal(macro=True) seen by the JIT. Message-ID: <20140627095903.A0AE81C345F@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72251:5fe7c3c02218 Date: 2014-06-27 11:58 +0200 http://bitbucket.org/pypy/pypy/changeset/5fe7c3c02218/ Log: Test and fix for llexternal(macro=True) seen by the JIT. diff --git a/rpython/jit/backend/llsupport/test/ztranslation_test.py b/rpython/jit/backend/llsupport/test/ztranslation_test.py --- a/rpython/jit/backend/llsupport/test/ztranslation_test.py +++ b/rpython/jit/backend/llsupport/test/ztranslation_test.py @@ -10,6 +10,9 @@ from rpython.jit.backend.test.support import CCompiledMixin from rpython.jit.codewriter.policy import StopAtXPolicy from rpython.config.config import ConfigError +from rpython.translator.tool.cbuild import ExternalCompilationInfo +from rpython.rtyper.lltypesystem import lltype, rffi + class TranslationTest(CCompiledMixin): CPUClass = getcpuclass() @@ -25,6 +28,7 @@ # - floats neg and abs # - threadlocalref_get # - get_errno, set_errno + # - llexternal with macro=True class Frame(object): _virtualizable_ = ['i'] @@ -36,9 +40,15 @@ pass t = ThreadLocalReference(Foo) - @dont_look_inside - def myabs(x): - return abs(x) + eci = ExternalCompilationInfo(post_include_bits=[''' +#define pypy_my_fabs(x) fabs(x) +''']) + myabs1 = rffi.llexternal('pypy_my_fabs', [lltype.Float], + lltype.Float, macro=True, releasegil=False, + compilation_info=eci) + myabs2 = rffi.llexternal('pypy_my_fabs', [lltype.Float], + lltype.Float, macro=True, releasegil=True, + compilation_info=eci) jitdriver = JitDriver(greens = [], reds = ['total', 'frame', 'j'], @@ -61,7 +71,7 @@ frame.i -= 1 j *= -0.712 if j + (-j): raise ValueError - k = myabs(j) + k = myabs1(myabs2(j)) if k - abs(j): raise ValueError if k - abs(-j): raise ValueError if t.get().nine != 9: raise ValueError @@ -69,7 +79,6 @@ if rposix.get_errno() != total: raise ValueError return chr(total % 253) # - from rpython.rtyper.lltypesystem import lltype, rffi from rpython.rlib.libffi import types, CDLL, ArgChain from rpython.rlib.test.test_clibffi import get_libm_name libm_name = get_libm_name(sys.platform) diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -95,6 +95,8 @@ name, macro, ext_type, compilation_info) else: _callable = ll2ctypes.LL2CtypesCallable(ext_type, calling_conv) + else: + assert macro is None, "'macro' is useless if you specify '_callable'" if elidable_function: _callable._elidable_function_ = True kwds = {} @@ -172,7 +174,13 @@ call_external_function._dont_inline_ = True call_external_function._annspecialcase_ = 'specialize:ll' call_external_function._gctransformer_hint_close_stack_ = True - call_external_function._call_aroundstate_target_ = funcptr + # + # '_call_aroundstate_target_' is used by the JIT to generate a + # CALL_RELEASE_GIL directly to 'funcptr'. This doesn't work if + # 'funcptr' might be a C macro, though. + if macro is None: + call_external_function._call_aroundstate_target_ = funcptr + # call_external_function = func_with_new_name(call_external_function, 'ccall_' + name) # don't inline, as a hack to guarantee that no GC pointer is alive @@ -180,7 +188,16 @@ else: # if we don't have to invoke the aroundstate, we can just call # the low-level function pointer carelessly - call_external_function = funcptr + if macro is None: + call_external_function = funcptr + else: + # ...well, unless it's a macro, in which case we still have + # to hide it from the JIT... + @jit.dont_look_inside + def call_external_function(*args): + return funcptr(*args) + call_external_function = func_with_new_name(call_external_function, + 'ccall_' + name) unrolling_arg_tps = unrolling_iterable(enumerate(args)) def wrapper(*args): From noreply at buildbot.pypy.org Fri Jun 27 14:09:03 2014 From: noreply at buildbot.pypy.org (arigo) Date: Fri, 27 Jun 2014 14:09:03 +0200 (CEST) Subject: [pypy-commit] pypy default: Add sanity checks: an empty path would generate the option "-L" or "-I", Message-ID: <20140627120903.E25B11C0542@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72252:8a35207a7342 Date: 2014-06-27 14:08 +0200 http://bitbucket.org/pypy/pypy/changeset/8a35207a7342/ Log: Add sanity checks: an empty path would generate the option "-L" or "-I", which alone would break the command-line completely, by eating whatever the next option is. diff --git a/rpython/translator/platform/posix.py b/rpython/translator/platform/posix.py --- a/rpython/translator/platform/posix.py +++ b/rpython/translator/platform/posix.py @@ -22,9 +22,11 @@ return ['-l%s' % lib for lib in libraries] def _libdirs(self, library_dirs): + assert '' not in library_dirs return ['-L%s' % ldir for ldir in library_dirs] def _includedirs(self, include_dirs): + assert '' not in include_dirs return ['-I%s' % idir for idir in include_dirs] def _linkfiles(self, link_files): From noreply at buildbot.pypy.org Fri Jun 27 15:01:39 2014 From: noreply at buildbot.pypy.org (ltratt) Date: Fri, 27 Jun 2014 15:01:39 +0200 (CEST) Subject: [pypy-commit] pypy default: htons and friends are macros on OpenBSD. Message-ID: <20140627130139.4B6E81D27E0@cobra.cs.uni-duesseldorf.de> Author: Laurence Tratt Branch: Changeset: r72253:cad6c535d3a5 Date: 2014-06-27 13:59 +0100 http://bitbucket.org/pypy/pypy/changeset/cad6c535d3a5/ Log: htons and friends are macros on OpenBSD. diff --git a/rpython/rlib/_rsocket_rffi.py b/rpython/rlib/_rsocket_rffi.py --- a/rpython/rlib/_rsocket_rffi.py +++ b/rpython/rlib/_rsocket_rffi.py @@ -493,10 +493,16 @@ getnameinfo = external('getnameinfo', [sockaddr_ptr, socklen_t, CCHARP, size_t, CCHARP, size_t, rffi.INT], rffi.INT) -htonl = external('htonl', [rffi.UINT], rffi.UINT, releasegil=False) -htons = external('htons', [rffi.USHORT], rffi.USHORT, releasegil=False) -ntohl = external('ntohl', [rffi.UINT], rffi.UINT, releasegil=False) -ntohs = external('ntohs', [rffi.USHORT], rffi.USHORT, releasegil=False) +if sys.platform.startswith("openbsd"): + htonl = external('htonl', [rffi.UINT], rffi.UINT, releasegil=False, macro=True) + htons = external('htons', [rffi.USHORT], rffi.USHORT, releasegil=False, macro=True) + ntohl = external('ntohl', [rffi.UINT], rffi.UINT, releasegil=False, macro=True) + ntohs = external('ntohs', [rffi.USHORT], rffi.USHORT, releasegil=False, macro=True) +else: + htonl = external('htonl', [rffi.UINT], rffi.UINT, releasegil=False) + htons = external('htons', [rffi.USHORT], rffi.USHORT, releasegil=False) + ntohl = external('ntohl', [rffi.UINT], rffi.UINT, releasegil=False) + ntohs = external('ntohs', [rffi.USHORT], rffi.USHORT, releasegil=False) if _POSIX: inet_aton = external('inet_aton', [CCHARP, lltype.Ptr(in_addr)], From noreply at buildbot.pypy.org Fri Jun 27 22:35:21 2014 From: noreply at buildbot.pypy.org (arigo) Date: Fri, 27 Jun 2014 22:35:21 +0200 (CEST) Subject: [pypy-commit] pypy default: Update FAQ Message-ID: <20140627203521.3EDF31C0542@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72254:66bf42b5d051 Date: 2014-06-27 22:34 +0200 http://bitbucket.org/pypy/pypy/changeset/66bf42b5d051/ Log: Update FAQ diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst --- a/pypy/doc/faq.rst +++ b/pypy/doc/faq.rst @@ -465,9 +465,13 @@ This is documented (here__ and here__). It needs 4 GB of RAM to run "rpython targetpypystandalone" on top of PyPy, a bit more when running -on CPython. If you have less than 4 GB it will just swap forever (or -fail if you don't have enough swap). On 32-bit, divide the numbers by -two. +on top of CPython. If you have less than 4 GB free, it will just swap +forever (or fail if you don't have enough swap). And we mean *free:* +if the machine has 4 GB *in total,* then it will swap. + +On 32-bit, divide the numbers by two. (We didn't try recently, but in +the past it was possible to compile a 32-bit version on a 2 GB Linux +machine with nothing else running: no Gnome/KDE, for example.) .. __: http://pypy.org/download.html#building-from-source .. __: https://pypy.readthedocs.org/en/latest/getting-started-python.html#translating-the-pypy-python-interpreter From noreply at buildbot.pypy.org Fri Jun 27 23:13:59 2014 From: noreply at buildbot.pypy.org (arigo) Date: Fri, 27 Jun 2014 23:13:59 +0200 (CEST) Subject: [pypy-commit] pypy default: Untested, fix attempt for OS/X Message-ID: <20140627211359.194F51C31CF@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72255:d617093b49a1 Date: 2014-06-27 23:12 +0200 http://bitbucket.org/pypy/pypy/changeset/d617093b49a1/ Log: Untested, fix attempt for OS/X diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c --- a/rpython/translator/c/src/thread_pthread.c +++ b/rpython/translator/c/src/thread_pthread.c @@ -508,7 +508,14 @@ } static inline int mutex_lock_timeout(mutex_t *mutex, double delay) { struct timespec t; +#if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0 clock_gettime(CLOCK_REALTIME, &t); +#else + struct timeval tv; + RPY_GETTIMEOFDAY(&tv); + t.tv_sec = tv.tv_sec; + t.tv_nsec = tv.tv_usec * 1000 + 999; +#endif timespec_add(&t, delay); int error_from_timedlock = pthread_mutex_timedlock(mutex, &t); if (error_from_timedlock == ETIMEDOUT) From noreply at buildbot.pypy.org Sat Jun 28 01:33:58 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sat, 28 Jun 2014 01:33:58 +0200 (CEST) Subject: [pypy-commit] pypy default: Some Posix systems don't have pthread_mutex_timedlock() but only Message-ID: <20140627233358.486CD1C0542@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72256:36093fab641a Date: 2014-06-28 01:33 +0200 http://bitbucket.org/pypy/pypy/changeset/36093fab641a/ Log: Some Posix systems don't have pthread_mutex_timedlock() but only pthread_cond_timedwait(). Jump through hoops to optionally adapt the logic to the latter case. diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c --- a/rpython/translator/c/src/thread_gil.c +++ b/rpython/translator/c/src/thread_gil.c @@ -38,15 +38,14 @@ long rpy_fastgil = 1; long rpy_waiting_threads = -42; /* GIL not initialized */ -static mutex_t mutex_gil_stealer; -static mutex_t mutex_gil; +static mutex1_t mutex_gil_stealer; +static mutex2_t mutex_gil; void RPyGilAllocate(void) { assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); - mutex_init(&mutex_gil_stealer); - mutex_init(&mutex_gil); - mutex_lock(&mutex_gil); + mutex1_init(&mutex_gil_stealer); + mutex2_init_locked(&mutex_gil); rpy_waiting_threads = 0; } @@ -80,14 +79,15 @@ first-in-first-out order, this will nicely give the threads a round-robin chance. */ - mutex_lock(&mutex_gil_stealer); + mutex1_lock(&mutex_gil_stealer); + mutex2_loop_start(&mutex_gil); /* We are now the stealer thread. Steals! */ while (1) { /* Sleep for one interval of time. We may be woken up earlier if 'mutex_gil' is released. */ - if (mutex_lock_timeout(&mutex_gil, 0.0001)) { /* 0.1 ms... */ + if (mutex2_lock_timeout(&mutex_gil, 0.0001)) { /* 0.1 ms... */ /* We arrive here if 'mutex_gil' was recently released and we just relocked it. */ @@ -107,7 +107,8 @@ /* Otherwise, loop back. */ } atomic_decrement(&rpy_waiting_threads); - mutex_unlock(&mutex_gil_stealer); + mutex2_loop_stop(&mutex_gil); + mutex1_unlock(&mutex_gil_stealer); RESTORE_ERRNO(); } @@ -140,7 +141,7 @@ /* Explicitly release the 'mutex_gil'. */ - mutex_unlock(&mutex_gil); + mutex2_unlock(&mutex_gil); /* Now nobody has got the GIL, because 'mutex_gil' is released (but rpy_fastgil is still locked). Call RPyGilAcquire(). It will diff --git a/rpython/translator/c/src/thread_nt.c b/rpython/translator/c/src/thread_nt.c --- a/rpython/translator/c/src/thread_nt.c +++ b/rpython/translator/c/src/thread_nt.c @@ -196,33 +196,46 @@ /* GIL code */ /************************************************************/ -typedef HANDLE mutex_t; /* a semaphore, on Windows */ +typedef HANDLE mutex2_t; /* a semaphore, on Windows */ static void gil_fatal(const char *msg) { fprintf(stderr, "Fatal error in the GIL: %s\n", msg); abort(); } -static inline void mutex_init(mutex_t *mutex) { +static inline void mutex2_init(mutex2_t *mutex) { *mutex = CreateSemaphore(NULL, 1, 1, NULL); if (*mutex == NULL) gil_fatal("CreateSemaphore failed"); } -static inline void mutex_lock(mutex_t *mutex) { +static inline void mutex2_lock(mutex2_t *mutex) { WaitForSingleObject(*mutex, INFINITE); } -static inline void mutex_unlock(mutex_t *mutex) { +static inline void mutex2_unlock(mutex2_t *mutex) { ReleaseSemaphore(*mutex, 1, NULL); } -static inline int mutex_lock_timeout(mutex_t *mutex, double delay) +static inline void mutex2_init_locked(mutex2_t *mutex) { + mutex2_init(mutex); + mutex2_lock(mutex); +} + +static inline void mutex2_loop_start(mutex2_t *mutex) { } +static inline void mutex2_loop_stop(mutex2_t *mutex) { } + +static inline int mutex2_lock_timeout(mutex2_t *mutex, double delay) { DWORD result = WaitForSingleObject(*mutex, (DWORD)(delay * 1000.0 + 0.999)); return (result != WAIT_TIMEOUT); } +#define mutex1_t mutex2_t +#define mutex1_init mutex2_init +#define mutex1_lock mutex2_lock +#define mutex1_unlock mutex2_unlock + #ifdef _M_IA64 /* On Itanium, use 'acquire' memory ordering semantics */ #define lock_test_and_set(ptr, value) InterlockedExchangeAcquire(ptr, value) diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c --- a/rpython/translator/c/src/thread_pthread.c +++ b/rpython/translator/c/src/thread_pthread.c @@ -479,7 +479,7 @@ #define ASSERT_STATUS(call) \ if (call != 0) { \ - fprintf(stderr, "Fatal error: " #call "\n"); \ + perror("Fatal error: " #call); \ abort(); \ } @@ -495,27 +495,38 @@ t->tv_nsec = nsec; } -typedef pthread_mutex_t mutex_t; +typedef pthread_mutex_t mutex1_t; -static inline void mutex_init(mutex_t *mutex) { +static inline void mutex1_init(mutex1_t *mutex) { ASSERT_STATUS(pthread_mutex_init(mutex, pthread_mutexattr_default)); } -static inline void mutex_lock(mutex_t *mutex) { +static inline void mutex1_lock(mutex1_t *mutex) { ASSERT_STATUS(pthread_mutex_lock(mutex)); } -static inline void mutex_unlock(mutex_t *mutex) { +static inline void mutex1_unlock(mutex1_t *mutex) { ASSERT_STATUS(pthread_mutex_unlock(mutex)); } -static inline int mutex_lock_timeout(mutex_t *mutex, double delay) { + +/************************************************************/ +#if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0 +/************************************************************/ + +#define mutex2_t mutex1_t +#define mutex2_init mutex1_init +#define mutex2_lock mutex1_lock +#define mutex2_unlock mutex1_unlock + +static inline void mutex2_init_locked(mutex2_t *mutex) { + mutex2_init(mutex); + mutex2_lock(mutex); +} + +static inline void mutex2_loop_start(mutex2_t *mutex) { } +static inline void mutex2_loop_stop(mutex2_t *mutex) { } + +static inline int mutex2_lock_timeout(mutex2_t *mutex, double delay) { struct timespec t; -#if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0 clock_gettime(CLOCK_REALTIME, &t); -#else - struct timeval tv; - RPY_GETTIMEOFDAY(&tv); - t.tv_sec = tv.tv_sec; - t.tv_nsec = tv.tv_usec * 1000 + 999; -#endif timespec_add(&t, delay); int error_from_timedlock = pthread_mutex_timedlock(mutex, &t); if (error_from_timedlock == ETIMEDOUT) @@ -523,6 +534,58 @@ ASSERT_STATUS(error_from_timedlock); return 1; } + +/************************************************************/ +#else +/************************************************************/ + +typedef struct { + char locked; + pthread_mutex_t mut; + pthread_cond_t cond; +} mutex2_t; + +static inline void mutex2_init_locked(mutex2_t *mutex) { + mutex->locked = 1; + ASSERT_STATUS(pthread_mutex_init(&mutex->mut, pthread_mutexattr_default)); + ASSERT_STATUS(pthread_cond_init(&mutex->cond, pthread_condattr_default)); +} +static inline void mutex2_unlock(mutex2_t *mutex) { + ASSERT_STATUS(pthread_mutex_lock(&mutex->mut)); + mutex->locked = 0; + ASSERT_STATUS(pthread_mutex_unlock(&mutex->mut)); + ASSERT_STATUS(pthread_cond_signal(&mutex->cond)); +} +static inline void mutex2_loop_start(mutex2_t *mutex) { + ASSERT_STATUS(pthread_mutex_lock(&mutex->mut)); +} +static inline void mutex2_loop_stop(mutex2_t *mutex) { + ASSERT_STATUS(pthread_mutex_unlock(&mutex->mut)); +} +static inline int mutex2_lock_timeout(mutex2_t *mutex, double delay) { + if (mutex->locked) { + struct timespec t; + struct timeval tv; + RPY_GETTIMEOFDAY(&tv); + t.tv_sec = tv.tv_sec; + t.tv_nsec = tv.tv_usec * 1000 + 999; + timespec_add(&t, delay); + int error_from_timedwait = pthread_cond_timedwait( + &mutex->cond, &mutex->mut, &t); + if (error_from_timedwait != ETIMEDOUT) { + ASSERT_STATUS(error_from_timedwait); + } + } + int result = !mutex->locked; + mutex->locked = 1; + return result; +} + +/************************************************************/ +#endif /* _POSIX_TIMERS */ +/************************************************************/ + + #define lock_test_and_set(ptr, value) __sync_lock_test_and_set(ptr, value) #define atomic_increment(ptr) __sync_fetch_and_add(ptr, 1) #define atomic_decrement(ptr) __sync_fetch_and_sub(ptr, 1) From noreply at buildbot.pypy.org Sat Jun 28 01:35:29 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sat, 28 Jun 2014 01:35:29 +0200 (CEST) Subject: [pypy-commit] pypy default: Add comment Message-ID: <20140627233529.733571C305D@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72257:00bee81d1159 Date: 2014-06-28 01:35 +0200 http://bitbucket.org/pypy/pypy/changeset/00bee81d1159/ Log: Add comment diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c --- a/rpython/translator/c/src/thread_pthread.c +++ b/rpython/translator/c/src/thread_pthread.c @@ -510,6 +510,10 @@ /************************************************************/ #if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0 /************************************************************/ +/* NB. the test above should cover two features: clock_gettime() and + pthread_mutex_timedlock(). It's unclear that there is a measurable + benefit in using pthread_mutex_timedlock(), but there is certainly + one in using clock_gettime(). */ #define mutex2_t mutex1_t #define mutex2_init mutex1_init From noreply at buildbot.pypy.org Sat Jun 28 01:42:02 2014 From: noreply at buildbot.pypy.org (alex_gaynor) Date: Sat, 28 Jun 2014 01:42:02 +0200 (CEST) Subject: [pypy-commit] pypy default: Removed the usage of RSocket as a mixin. Message-ID: <20140627234202.4A3D61C305D@cobra.cs.uni-duesseldorf.de> Author: Alex Gaynor Branch: Changeset: r72258:c5b97e40c361 Date: 2014-06-27 16:40 -0700 http://bitbucket.org/pypy/pypy/changeset/c5b97e40c361/ Log: Removed the usage of RSocket as a mixin. This has several advantages: * It removes very confusing subclassing, in favor of simple composition * It lets us mark the the finalizers as light_finalizers diff --git a/pypy/module/_socket/__init__.py b/pypy/module/_socket/__init__.py --- a/pypy/module/_socket/__init__.py +++ b/pypy/module/_socket/__init__.py @@ -6,8 +6,8 @@ } interpleveldefs = { - 'SocketType': 'interp_socket.W_RSocket', - 'socket' : 'interp_socket.W_RSocket', + 'SocketType': 'interp_socket.W_Socket', + 'socket' : 'interp_socket.W_Socket', 'error' : 'interp_socket.get_error(space, "error")', 'herror' : 'interp_socket.get_error(space, "herror")', 'gaierror' : 'interp_socket.get_error(space, "gaierror")', diff --git a/pypy/module/_socket/interp_func.py b/pypy/module/_socket/interp_func.py --- a/pypy/module/_socket/interp_func.py +++ b/pypy/module/_socket/interp_func.py @@ -1,8 +1,12 @@ -from pypy.interpreter.gateway import unwrap_spec, WrappedDefault -from pypy.module._socket.interp_socket import converted_error, W_RSocket, addr_as_object, ipaddr_from_object from rpython.rlib import rsocket from rpython.rlib.rsocket import SocketError, INVALID_SOCKET + from pypy.interpreter.error import OperationError +from pypy.interpreter.gateway import unwrap_spec, WrappedDefault +from pypy.module._socket.interp_socket import ( + converted_error, W_Socket, addr_as_object, ipaddr_from_object +) + def gethostname(space): """gethostname() -> string @@ -136,10 +140,10 @@ The remaining arguments are the same as for socket(). """ try: - sock = rsocket.fromfd(fd, family, type, proto, W_RSocket) + sock = rsocket.fromfd(fd, family, type, proto) except SocketError, e: raise converted_error(space, e) - return space.wrap(sock) + return space.wrap(W_Socket(sock)) @unwrap_spec(family=int, type=int, proto=int) def socketpair(space, family=rsocket.socketpair_default_family, @@ -153,10 +157,13 @@ AF_UNIX if defined on the platform; otherwise, the default is AF_INET. """ try: - sock1, sock2 = rsocket.socketpair(family, type, proto, W_RSocket) + sock1, sock2 = rsocket.socketpair(family, type, proto) except SocketError, e: raise converted_error(space, e) - return space.newtuple([space.wrap(sock1), space.wrap(sock2)]) + return space.newtuple([ + space.wrap(W_Socket(sock1)), + space.wrap(W_Socket(sock2)) + ]) # The following 4 functions refuse all negative numbers, like CPython 2.6. # They could also check that the argument is not too large, but CPython 2.6 diff --git a/pypy/module/_socket/interp_socket.py b/pypy/module/_socket/interp_socket.py --- a/pypy/module/_socket/interp_socket.py +++ b/pypy/module/_socket/interp_socket.py @@ -1,14 +1,18 @@ +from rpython.rlib import rsocket +from rpython.rlib.rarithmetic import intmask +from rpython.rlib.rsocket import ( + RSocket, AF_INET, SOCK_STREAM, SocketError, SocketErrorWithErrno, + RSocketError +) +from rpython.rtyper.lltypesystem import lltype, rffi + +from pypy.interpreter import gateway from pypy.interpreter.baseobjspace import W_Root -from pypy.interpreter.typedef import TypeDef, make_weakref_descr,\ - interp_attrproperty +from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault -from rpython.rlib.rarithmetic import intmask -from rpython.rtyper.lltypesystem import lltype, rffi -from rpython.rlib import rsocket -from rpython.rlib.rsocket import RSocket, AF_INET, SOCK_STREAM -from rpython.rlib.rsocket import SocketError, SocketErrorWithErrno, RSocketError -from pypy.interpreter.error import OperationError, oefmt -from pypy.interpreter import gateway +from pypy.interpreter.typedef import ( + GetSetProperty, TypeDef, make_weakref_descr +) # XXX Hack to seperate rpython and pypy @@ -124,10 +128,18 @@ return addr -class W_RSocket(W_Root, RSocket): - def __del__(self): - self.clear_all_weakrefs() - RSocket.__del__(self) +class W_Socket(W_Root): + def __init__(self, sock): + self.sock = sock + + def get_type_w(self, space): + return space.wrap(self.sock.type) + + def get_proto_w(self, space): + return space.wrap(self.sock.proto) + + def get_family_w(self, space): + return space.wrap(self.sock.family) def accept_w(self, space): """accept() -> (socket object, address info) @@ -137,22 +149,22 @@ info is a pair (hostaddr, port). """ try: - fd, addr = self.accept() + fd, addr = self.sock.accept() sock = rsocket.make_socket( - fd, self.family, self.type, self.proto, W_RSocket) - return space.newtuple([space.wrap(sock), + fd, self.sock.family, self.sock.type, self.sock.proto) + return space.newtuple([space.wrap(W_Socket(sock)), addr_as_object(addr, sock.fd, space)]) - except SocketError, e: + except SocketError as e: raise converted_error(space, e) # convert an Address into an app-level object def addr_as_object(self, space, address): - return addr_as_object(address, self.fd, space) + return addr_as_object(address, self.sock.fd, space) # convert an app-level object into an Address # based on the current socket's family def addr_from_object(self, space, w_address): - return addr_from_object(self.family, space, w_address) + return addr_from_object(self.sock.family, space, w_address) def bind_w(self, space, w_addr): """bind(address) @@ -162,8 +174,8 @@ sockets the address is a tuple (ifname, proto [,pkttype [,hatype]]) """ try: - self.bind(self.addr_from_object(space, w_addr)) - except SocketError, e: + self.sock.bind(self.addr_from_object(space, w_addr)) + except SocketError as e: raise converted_error(space, e) def close_w(self, space): @@ -172,7 +184,7 @@ Close the socket. It cannot be used after this call. """ try: - self.close() + self.sock.close() except SocketError: # cpython doesn't return any errors on close pass @@ -184,8 +196,8 @@ is a pair (host, port). """ try: - self.connect(self.addr_from_object(space, w_addr)) - except SocketError, e: + self.sock.connect(self.addr_from_object(space, w_addr)) + except SocketError as e: raise converted_error(space, e) def connect_ex_w(self, space, w_addr): @@ -196,15 +208,16 @@ """ try: addr = self.addr_from_object(space, w_addr) - except SocketError, e: + except SocketError as e: raise converted_error(space, e) - error = self.connect_ex(addr) + error = self.sock.connect_ex(addr) return space.wrap(error) def dup_w(self, space): try: - return self.dup(W_RSocket) - except SocketError, e: + sock = self.sock.dup() + return W_Socket(sock) + except SocketError as e: raise converted_error(space, e) def fileno_w(self, space): @@ -212,7 +225,7 @@ Return the integer file descriptor of the socket. """ - return space.wrap(intmask(self.fd)) + return space.wrap(intmask(self.sock.fd)) def getpeername_w(self, space): """getpeername() -> address info @@ -221,9 +234,9 @@ info is a pair (hostaddr, port). """ try: - addr = self.getpeername() - return addr_as_object(addr, self.fd, space) - except SocketError, e: + addr = self.sock.getpeername() + return addr_as_object(addr, self.sock.fd, space) + except SocketError as e: raise converted_error(space, e) def getsockname_w(self, space): @@ -233,9 +246,9 @@ info is a pair (hostaddr, port). """ try: - addr = self.getsockname() - return addr_as_object(addr, self.fd, space) - except SocketError, e: + addr = self.sock.getsockname() + return addr_as_object(addr, self.sock.fd, space) + except SocketError as e: raise converted_error(space, e) @unwrap_spec(level=int, optname=int) @@ -248,11 +261,11 @@ """ if w_buflen is None: try: - return space.wrap(self.getsockopt_int(level, optname)) - except SocketError, e: + return space.wrap(self.sock.getsockopt_int(level, optname)) + except SocketError as e: raise converted_error(space, e) buflen = space.int_w(w_buflen) - return space.wrap(self.getsockopt(level, optname, buflen)) + return space.wrap(self.sock.getsockopt(level, optname, buflen)) def gettimeout_w(self, space): """gettimeout() -> timeout @@ -260,7 +273,7 @@ Returns the timeout in floating seconds associated with socket operations. A timeout of None indicates that timeouts on socket """ - timeout = self.gettimeout() + timeout = self.sock.gettimeout() if timeout < 0.0: return space.w_None return space.wrap(timeout) @@ -274,8 +287,8 @@ will allow before refusing new connections. """ try: - self.listen(backlog) - except SocketError, e: + self.sock.listen(backlog) + except SocketError as e: raise converted_error(space, e) @unwrap_spec(w_mode = WrappedDefault("r"), @@ -298,8 +311,8 @@ the remote end is closed and all data is read, return the empty string. """ try: - data = self.recv(buffersize, flags) - except SocketError, e: + data = self.sock.recv(buffersize, flags) + except SocketError as e: raise converted_error(space, e) return space.wrap(data) @@ -310,13 +323,13 @@ Like recv(buffersize, flags) but also return the sender's address info. """ try: - data, addr = self.recvfrom(buffersize, flags) + data, addr = self.sock.recvfrom(buffersize, flags) if addr: - w_addr = addr_as_object(addr, self.fd, space) + w_addr = addr_as_object(addr, self.sock.fd, space) else: w_addr = space.w_None return space.newtuple([space.wrap(data), w_addr]) - except SocketError, e: + except SocketError as e: raise converted_error(space, e) @unwrap_spec(data='bufferstr', flags=int) @@ -328,8 +341,8 @@ sent; this may be less than len(data) if the network is busy. """ try: - count = self.send(data, flags) - except SocketError, e: + count = self.sock.send(data, flags) + except SocketError as e: raise converted_error(space, e) return space.wrap(count) @@ -343,8 +356,9 @@ to tell how much data has been sent. """ try: - self.sendall(data, flags, space.getexecutioncontext().checksignals) - except SocketError, e: + self.sock.sendall( + data, flags, space.getexecutioncontext().checksignals) + except SocketError as e: raise converted_error(space, e) @unwrap_spec(data='bufferstr') @@ -364,8 +378,8 @@ w_addr = w_param3 try: addr = self.addr_from_object(space, w_addr) - count = self.sendto(data, flags, addr) - except SocketError, e: + count = self.sock.sendto(data, flags, addr) + except SocketError as e: raise converted_error(space, e) return space.wrap(count) @@ -377,7 +391,7 @@ setblocking(True) is equivalent to settimeout(None); setblocking(False) is equivalent to settimeout(0.0). """ - self.setblocking(flag) + self.sock.setblocking(flag) @unwrap_spec(level=int, optname=int) def setsockopt_w(self, space, level, optname, w_optval): @@ -391,13 +405,13 @@ except: optval = space.str_w(w_optval) try: - self.setsockopt(level, optname, optval) - except SocketError, e: + self.sock.setsockopt(level, optname, optval) + except SocketError as e: raise converted_error(space, e) return try: - self.setsockopt_int(level, optname, optval) - except SocketError, e: + self.sock.setsockopt_int(level, optname, optval) + except SocketError as e: raise converted_error(space, e) def settimeout_w(self, space, w_timeout): @@ -415,7 +429,7 @@ if timeout < 0.0: raise OperationError(space.w_ValueError, space.wrap('Timeout value out of range')) - self.settimeout(timeout) + self.sock.settimeout(timeout) @unwrap_spec(nbytes=int, flags=int) def recv_into_w(self, space, w_buffer, nbytes=0, flags=0): @@ -424,8 +438,8 @@ if nbytes == 0 or nbytes > lgt: nbytes = lgt try: - return space.wrap(self.recvinto(rwbuffer, nbytes, flags)) - except SocketError, e: + return space.wrap(self.sock.recvinto(rwbuffer, nbytes, flags)) + except SocketError as e: raise converted_error(space, e) @unwrap_spec(nbytes=int, flags=int) @@ -435,13 +449,13 @@ if nbytes == 0 or nbytes > lgt: nbytes = lgt try: - readlgt, addr = self.recvfrom_into(rwbuffer, nbytes, flags) + readlgt, addr = self.sock.recvfrom_into(rwbuffer, nbytes, flags) if addr: - w_addr = addr_as_object(addr, self.fd, space) + w_addr = addr_as_object(addr, self.sock.fd, space) else: w_addr = space.w_None return space.newtuple([space.wrap(readlgt), w_addr]) - except SocketError, e: + except SocketError as e: raise converted_error(space, e) @unwrap_spec(cmd=int) @@ -473,7 +487,7 @@ option_ptr.c_keepaliveinterval = space.uint_w(w_interval) res = _c.WSAIoctl( - self.fd, cmd, value_ptr, value_size, + self.sock.fd, cmd, value_ptr, value_size, rffi.NULL, 0, recv_ptr, rffi.NULL, rffi.NULL) if res < 0: raise converted_error(space, rsocket.last_error()) @@ -494,8 +508,8 @@ (flag == SHUT_RDWR). """ try: - self.shutdown(how) - except SocketError, e: + self.sock.shutdown(how) + except SocketError as e: raise converted_error(space, e) #------------------------------------------------------------ @@ -536,12 +550,13 @@ @unwrap_spec(family=int, type=int, proto=int) def newsocket(space, w_subtype, family=AF_INET, type=SOCK_STREAM, proto=0): - sock = space.allocate_instance(W_RSocket, w_subtype) + self = space.allocate_instance(W_Socket, w_subtype) try: - W_RSocket.__init__(sock, family, type, proto) - except SocketError, e: + sock = RSocket(family, type, proto) + except SocketError as e: raise converted_error(space, e) - return space.wrap(sock) + W_Socket.__init__(self, sock) + return space.wrap(self) descr_socket_new = interp2app(newsocket) # ____________________________________________________________ @@ -597,10 +612,10 @@ socketmethods = {} for methodname in socketmethodnames: - method = getattr(W_RSocket, methodname + '_w') + method = getattr(W_Socket, methodname + '_w') socketmethods[methodname] = interp2app(method) -W_RSocket.typedef = TypeDef("_socket.socket", +W_Socket.typedef = TypeDef("_socket.socket", __doc__ = """\ socket([family[, type[, proto]]]) -> socket object @@ -639,9 +654,9 @@ [*] not available on all platforms!""", __new__ = descr_socket_new, - __weakref__ = make_weakref_descr(W_RSocket), - type = interp_attrproperty('type', W_RSocket), - proto = interp_attrproperty('proto', W_RSocket), - family = interp_attrproperty('family', W_RSocket), + __weakref__ = make_weakref_descr(W_Socket), + type = GetSetProperty(W_Socket.get_type_w), + proto = GetSetProperty(W_Socket.get_proto_w), + family = GetSetProperty(W_Socket.get_family_w), ** socketmethods ) diff --git a/pypy/tool/gcdump.py b/pypy/tool/gcdump.py --- a/pypy/tool/gcdump.py +++ b/pypy/tool/gcdump.py @@ -43,7 +43,7 @@ def print_summary(self): items = self.summary.items() - items.sort(key=lambda(typenum, stat): stat[1]) # sort by totalsize + items.sort(key=lambda (typenum, stat): stat[1]) # sort by totalsize totalsize = 0 for typenum, stat in items: totalsize += stat[1] diff --git a/rpython/rlib/rsocket.py b/rpython/rlib/rsocket.py --- a/rpython/rlib/rsocket.py +++ b/rpython/rlib/rsocket.py @@ -15,17 +15,18 @@ # It's unclear if makefile() and SSL support belong here or only as # app-level code for PyPy. +from rpython.rlib import _rsocket_rffi as _c, jit, rgc from rpython.rlib.objectmodel import instantiate, keepalive_until_here -from rpython.rlib import _rsocket_rffi as _c from rpython.rlib.rarithmetic import intmask, r_uint from rpython.rlib.rthread import dummy_lock from rpython.rtyper.lltypesystem import lltype, rffi from rpython.rtyper.lltypesystem.rffi import sizeof, offsetof -INVALID_SOCKET = _c.INVALID_SOCKET -from rpython.rlib import jit + + # Usage of @jit.dont_look_inside in this file is possibly temporary # and only because some lltypes declared in _rsocket_rffi choke the # JIT's codewriter right now (notably, FixedSizeArray). +INVALID_SOCKET = _c.INVALID_SOCKET def mallocbuf(buffersize): @@ -86,6 +87,7 @@ self.addr_p = addr self.addrlen = addrlen + @rgc.must_be_light_finalizer def __del__(self): if self.addr_p: lltype.free(self.addr_p, flavor='raw', track_allocation=False) @@ -493,8 +495,8 @@ class RSocket(object): """RPython-level socket object. """ - _mixin_ = True # for interp_socket.py fd = _c.INVALID_SOCKET + def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0, fd=_c.INVALID_SOCKET): """Create a new socket.""" @@ -509,6 +511,7 @@ self.proto = proto self.timeout = defaults.timeout + @rgc.must_be_light_finalizer def __del__(self): fd = self.fd if fd != _c.INVALID_SOCKET: From noreply at buildbot.pypy.org Sat Jun 28 01:42:03 2014 From: noreply at buildbot.pypy.org (alex_gaynor) Date: Sat, 28 Jun 2014 01:42:03 +0200 (CEST) Subject: [pypy-commit] pypy default: merged upstream Message-ID: <20140627234203.8A2751C305D@cobra.cs.uni-duesseldorf.de> Author: Alex Gaynor Branch: Changeset: r72259:50a337a2e4f2 Date: 2014-06-27 16:41 -0700 http://bitbucket.org/pypy/pypy/changeset/50a337a2e4f2/ Log: merged upstream diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c --- a/rpython/translator/c/src/thread_gil.c +++ b/rpython/translator/c/src/thread_gil.c @@ -38,15 +38,14 @@ long rpy_fastgil = 1; long rpy_waiting_threads = -42; /* GIL not initialized */ -static mutex_t mutex_gil_stealer; -static mutex_t mutex_gil; +static mutex1_t mutex_gil_stealer; +static mutex2_t mutex_gil; void RPyGilAllocate(void) { assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); - mutex_init(&mutex_gil_stealer); - mutex_init(&mutex_gil); - mutex_lock(&mutex_gil); + mutex1_init(&mutex_gil_stealer); + mutex2_init_locked(&mutex_gil); rpy_waiting_threads = 0; } @@ -80,14 +79,15 @@ first-in-first-out order, this will nicely give the threads a round-robin chance. */ - mutex_lock(&mutex_gil_stealer); + mutex1_lock(&mutex_gil_stealer); + mutex2_loop_start(&mutex_gil); /* We are now the stealer thread. Steals! */ while (1) { /* Sleep for one interval of time. We may be woken up earlier if 'mutex_gil' is released. */ - if (mutex_lock_timeout(&mutex_gil, 0.0001)) { /* 0.1 ms... */ + if (mutex2_lock_timeout(&mutex_gil, 0.0001)) { /* 0.1 ms... */ /* We arrive here if 'mutex_gil' was recently released and we just relocked it. */ @@ -107,7 +107,8 @@ /* Otherwise, loop back. */ } atomic_decrement(&rpy_waiting_threads); - mutex_unlock(&mutex_gil_stealer); + mutex2_loop_stop(&mutex_gil); + mutex1_unlock(&mutex_gil_stealer); RESTORE_ERRNO(); } @@ -140,7 +141,7 @@ /* Explicitly release the 'mutex_gil'. */ - mutex_unlock(&mutex_gil); + mutex2_unlock(&mutex_gil); /* Now nobody has got the GIL, because 'mutex_gil' is released (but rpy_fastgil is still locked). Call RPyGilAcquire(). It will diff --git a/rpython/translator/c/src/thread_nt.c b/rpython/translator/c/src/thread_nt.c --- a/rpython/translator/c/src/thread_nt.c +++ b/rpython/translator/c/src/thread_nt.c @@ -196,33 +196,46 @@ /* GIL code */ /************************************************************/ -typedef HANDLE mutex_t; /* a semaphore, on Windows */ +typedef HANDLE mutex2_t; /* a semaphore, on Windows */ static void gil_fatal(const char *msg) { fprintf(stderr, "Fatal error in the GIL: %s\n", msg); abort(); } -static inline void mutex_init(mutex_t *mutex) { +static inline void mutex2_init(mutex2_t *mutex) { *mutex = CreateSemaphore(NULL, 1, 1, NULL); if (*mutex == NULL) gil_fatal("CreateSemaphore failed"); } -static inline void mutex_lock(mutex_t *mutex) { +static inline void mutex2_lock(mutex2_t *mutex) { WaitForSingleObject(*mutex, INFINITE); } -static inline void mutex_unlock(mutex_t *mutex) { +static inline void mutex2_unlock(mutex2_t *mutex) { ReleaseSemaphore(*mutex, 1, NULL); } -static inline int mutex_lock_timeout(mutex_t *mutex, double delay) +static inline void mutex2_init_locked(mutex2_t *mutex) { + mutex2_init(mutex); + mutex2_lock(mutex); +} + +static inline void mutex2_loop_start(mutex2_t *mutex) { } +static inline void mutex2_loop_stop(mutex2_t *mutex) { } + +static inline int mutex2_lock_timeout(mutex2_t *mutex, double delay) { DWORD result = WaitForSingleObject(*mutex, (DWORD)(delay * 1000.0 + 0.999)); return (result != WAIT_TIMEOUT); } +#define mutex1_t mutex2_t +#define mutex1_init mutex2_init +#define mutex1_lock mutex2_lock +#define mutex1_unlock mutex2_unlock + #ifdef _M_IA64 /* On Itanium, use 'acquire' memory ordering semantics */ #define lock_test_and_set(ptr, value) InterlockedExchangeAcquire(ptr, value) diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c --- a/rpython/translator/c/src/thread_pthread.c +++ b/rpython/translator/c/src/thread_pthread.c @@ -479,7 +479,7 @@ #define ASSERT_STATUS(call) \ if (call != 0) { \ - fprintf(stderr, "Fatal error: " #call "\n"); \ + perror("Fatal error: " #call); \ abort(); \ } @@ -495,27 +495,42 @@ t->tv_nsec = nsec; } -typedef pthread_mutex_t mutex_t; +typedef pthread_mutex_t mutex1_t; -static inline void mutex_init(mutex_t *mutex) { +static inline void mutex1_init(mutex1_t *mutex) { ASSERT_STATUS(pthread_mutex_init(mutex, pthread_mutexattr_default)); } -static inline void mutex_lock(mutex_t *mutex) { +static inline void mutex1_lock(mutex1_t *mutex) { ASSERT_STATUS(pthread_mutex_lock(mutex)); } -static inline void mutex_unlock(mutex_t *mutex) { +static inline void mutex1_unlock(mutex1_t *mutex) { ASSERT_STATUS(pthread_mutex_unlock(mutex)); } -static inline int mutex_lock_timeout(mutex_t *mutex, double delay) { + +/************************************************************/ +#if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0 +/************************************************************/ +/* NB. the test above should cover two features: clock_gettime() and + pthread_mutex_timedlock(). It's unclear that there is a measurable + benefit in using pthread_mutex_timedlock(), but there is certainly + one in using clock_gettime(). */ + +#define mutex2_t mutex1_t +#define mutex2_init mutex1_init +#define mutex2_lock mutex1_lock +#define mutex2_unlock mutex1_unlock + +static inline void mutex2_init_locked(mutex2_t *mutex) { + mutex2_init(mutex); + mutex2_lock(mutex); +} + +static inline void mutex2_loop_start(mutex2_t *mutex) { } +static inline void mutex2_loop_stop(mutex2_t *mutex) { } + +static inline int mutex2_lock_timeout(mutex2_t *mutex, double delay) { struct timespec t; -#if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0 clock_gettime(CLOCK_REALTIME, &t); -#else - struct timeval tv; - RPY_GETTIMEOFDAY(&tv); - t.tv_sec = tv.tv_sec; - t.tv_nsec = tv.tv_usec * 1000 + 999; -#endif timespec_add(&t, delay); int error_from_timedlock = pthread_mutex_timedlock(mutex, &t); if (error_from_timedlock == ETIMEDOUT) @@ -523,6 +538,58 @@ ASSERT_STATUS(error_from_timedlock); return 1; } + +/************************************************************/ +#else +/************************************************************/ + +typedef struct { + char locked; + pthread_mutex_t mut; + pthread_cond_t cond; +} mutex2_t; + +static inline void mutex2_init_locked(mutex2_t *mutex) { + mutex->locked = 1; + ASSERT_STATUS(pthread_mutex_init(&mutex->mut, pthread_mutexattr_default)); + ASSERT_STATUS(pthread_cond_init(&mutex->cond, pthread_condattr_default)); +} +static inline void mutex2_unlock(mutex2_t *mutex) { + ASSERT_STATUS(pthread_mutex_lock(&mutex->mut)); + mutex->locked = 0; + ASSERT_STATUS(pthread_mutex_unlock(&mutex->mut)); + ASSERT_STATUS(pthread_cond_signal(&mutex->cond)); +} +static inline void mutex2_loop_start(mutex2_t *mutex) { + ASSERT_STATUS(pthread_mutex_lock(&mutex->mut)); +} +static inline void mutex2_loop_stop(mutex2_t *mutex) { + ASSERT_STATUS(pthread_mutex_unlock(&mutex->mut)); +} +static inline int mutex2_lock_timeout(mutex2_t *mutex, double delay) { + if (mutex->locked) { + struct timespec t; + struct timeval tv; + RPY_GETTIMEOFDAY(&tv); + t.tv_sec = tv.tv_sec; + t.tv_nsec = tv.tv_usec * 1000 + 999; + timespec_add(&t, delay); + int error_from_timedwait = pthread_cond_timedwait( + &mutex->cond, &mutex->mut, &t); + if (error_from_timedwait != ETIMEDOUT) { + ASSERT_STATUS(error_from_timedwait); + } + } + int result = !mutex->locked; + mutex->locked = 1; + return result; +} + +/************************************************************/ +#endif /* _POSIX_TIMERS */ +/************************************************************/ + + #define lock_test_and_set(ptr, value) __sync_lock_test_and_set(ptr, value) #define atomic_increment(ptr) __sync_fetch_and_add(ptr, 1) #define atomic_decrement(ptr) __sync_fetch_and_sub(ptr, 1) From noreply at buildbot.pypy.org Sat Jun 28 02:05:35 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sat, 28 Jun 2014 02:05:35 +0200 (CEST) Subject: [pypy-commit] pypy default: Kill the two logics. Keep only two ways to getting the time. Message-ID: <20140628000535.BE8EA1D27E0@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72260:37989f72ce3c Date: 2014-06-28 02:04 +0200 http://bitbucket.org/pypy/pypy/changeset/37989f72ce3c/ Log: Kill the two logics. Keep only two ways to getting the time. diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c --- a/rpython/translator/c/src/thread_pthread.c +++ b/rpython/translator/c/src/thread_pthread.c @@ -483,8 +483,16 @@ abort(); \ } -static inline void timespec_add(struct timespec *t, double incr) +static inline void timespec_delay(struct timespec *t, double incr) { +#ifdef CLOCK_REALTIME + clock_gettime(CLOCK_REALTIME, &t); +#else + struct timeval tv; + RPY_GETTIMEOFDAY(&tv); + t->tv_sec = tv.tv_sec; + t->tv_nsec = tv.tv_usec * 1000 + 999; +#endif /* assumes that "incr" is not too large, less than 1 second */ long nsec = t->tv_nsec + (long)(incr * 1000000000.0); if (nsec >= 1000000000) { @@ -507,42 +515,6 @@ ASSERT_STATUS(pthread_mutex_unlock(mutex)); } -/************************************************************/ -#if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0 -/************************************************************/ -/* NB. the test above should cover two features: clock_gettime() and - pthread_mutex_timedlock(). It's unclear that there is a measurable - benefit in using pthread_mutex_timedlock(), but there is certainly - one in using clock_gettime(). */ - -#define mutex2_t mutex1_t -#define mutex2_init mutex1_init -#define mutex2_lock mutex1_lock -#define mutex2_unlock mutex1_unlock - -static inline void mutex2_init_locked(mutex2_t *mutex) { - mutex2_init(mutex); - mutex2_lock(mutex); -} - -static inline void mutex2_loop_start(mutex2_t *mutex) { } -static inline void mutex2_loop_stop(mutex2_t *mutex) { } - -static inline int mutex2_lock_timeout(mutex2_t *mutex, double delay) { - struct timespec t; - clock_gettime(CLOCK_REALTIME, &t); - timespec_add(&t, delay); - int error_from_timedlock = pthread_mutex_timedlock(mutex, &t); - if (error_from_timedlock == ETIMEDOUT) - return 0; - ASSERT_STATUS(error_from_timedlock); - return 1; -} - -/************************************************************/ -#else -/************************************************************/ - typedef struct { char locked; pthread_mutex_t mut; @@ -569,11 +541,7 @@ static inline int mutex2_lock_timeout(mutex2_t *mutex, double delay) { if (mutex->locked) { struct timespec t; - struct timeval tv; - RPY_GETTIMEOFDAY(&tv); - t.tv_sec = tv.tv_sec; - t.tv_nsec = tv.tv_usec * 1000 + 999; - timespec_add(&t, delay); + timespec_delay(&t, delay); int error_from_timedwait = pthread_cond_timedwait( &mutex->cond, &mutex->mut, &t); if (error_from_timedwait != ETIMEDOUT) { @@ -585,11 +553,6 @@ return result; } -/************************************************************/ -#endif /* _POSIX_TIMERS */ -/************************************************************/ - - #define lock_test_and_set(ptr, value) __sync_lock_test_and_set(ptr, value) #define atomic_increment(ptr) __sync_fetch_and_add(ptr, 1) #define atomic_decrement(ptr) __sync_fetch_and_sub(ptr, 1) From noreply at buildbot.pypy.org Sat Jun 28 10:58:26 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sat, 28 Jun 2014 10:58:26 +0200 (CEST) Subject: [pypy-commit] pypy default: Oups, fix -- can't use the GC here, at least in some tests Message-ID: <20140628085826.D6A651D2861@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72261:0bbd2a9b36a6 Date: 2014-06-28 10:57 +0200 http://bitbucket.org/pypy/pypy/changeset/0bbd2a9b36a6/ Log: Oups, fix -- can't use the GC here, at least in some tests diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -193,11 +193,20 @@ else: # ...well, unless it's a macro, in which case we still have # to hide it from the JIT... - @jit.dont_look_inside - def call_external_function(*args): - return funcptr(*args) + argnames = ', '.join(['a%d' % i for i in range(len(args))]) + source = py.code.Source(""" + def call_external_function(%(argnames)s): + return funcptr(%(argnames)s) + """ % locals()) + miniglobals = {'funcptr': funcptr, + '__name__': __name__, + } + exec source.compile() in miniglobals + call_external_function = miniglobals['call_external_function'] call_external_function = func_with_new_name(call_external_function, 'ccall_' + name) + call_external_function = jit.dont_look_inside( + call_external_function) unrolling_arg_tps = unrolling_iterable(enumerate(args)) def wrapper(*args): From noreply at buildbot.pypy.org Sat Jun 28 10:58:28 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sat, 28 Jun 2014 10:58:28 +0200 (CEST) Subject: [pypy-commit] pypy default: merge heads Message-ID: <20140628085828.749C71D2861@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72262:ce8c750062dc Date: 2014-06-28 10:57 +0200 http://bitbucket.org/pypy/pypy/changeset/ce8c750062dc/ Log: merge heads diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst --- a/pypy/doc/faq.rst +++ b/pypy/doc/faq.rst @@ -465,9 +465,13 @@ This is documented (here__ and here__). It needs 4 GB of RAM to run "rpython targetpypystandalone" on top of PyPy, a bit more when running -on CPython. If you have less than 4 GB it will just swap forever (or -fail if you don't have enough swap). On 32-bit, divide the numbers by -two. +on top of CPython. If you have less than 4 GB free, it will just swap +forever (or fail if you don't have enough swap). And we mean *free:* +if the machine has 4 GB *in total,* then it will swap. + +On 32-bit, divide the numbers by two. (We didn't try recently, but in +the past it was possible to compile a 32-bit version on a 2 GB Linux +machine with nothing else running: no Gnome/KDE, for example.) .. __: http://pypy.org/download.html#building-from-source .. __: https://pypy.readthedocs.org/en/latest/getting-started-python.html#translating-the-pypy-python-interpreter diff --git a/pypy/module/_socket/__init__.py b/pypy/module/_socket/__init__.py --- a/pypy/module/_socket/__init__.py +++ b/pypy/module/_socket/__init__.py @@ -6,8 +6,8 @@ } interpleveldefs = { - 'SocketType': 'interp_socket.W_RSocket', - 'socket' : 'interp_socket.W_RSocket', + 'SocketType': 'interp_socket.W_Socket', + 'socket' : 'interp_socket.W_Socket', 'error' : 'interp_socket.get_error(space, "error")', 'herror' : 'interp_socket.get_error(space, "herror")', 'gaierror' : 'interp_socket.get_error(space, "gaierror")', diff --git a/pypy/module/_socket/interp_func.py b/pypy/module/_socket/interp_func.py --- a/pypy/module/_socket/interp_func.py +++ b/pypy/module/_socket/interp_func.py @@ -1,8 +1,12 @@ -from pypy.interpreter.gateway import unwrap_spec, WrappedDefault -from pypy.module._socket.interp_socket import converted_error, W_RSocket, addr_as_object, ipaddr_from_object from rpython.rlib import rsocket from rpython.rlib.rsocket import SocketError, INVALID_SOCKET + from pypy.interpreter.error import OperationError +from pypy.interpreter.gateway import unwrap_spec, WrappedDefault +from pypy.module._socket.interp_socket import ( + converted_error, W_Socket, addr_as_object, ipaddr_from_object +) + def gethostname(space): """gethostname() -> string @@ -136,10 +140,10 @@ The remaining arguments are the same as for socket(). """ try: - sock = rsocket.fromfd(fd, family, type, proto, W_RSocket) + sock = rsocket.fromfd(fd, family, type, proto) except SocketError, e: raise converted_error(space, e) - return space.wrap(sock) + return space.wrap(W_Socket(sock)) @unwrap_spec(family=int, type=int, proto=int) def socketpair(space, family=rsocket.socketpair_default_family, @@ -153,10 +157,13 @@ AF_UNIX if defined on the platform; otherwise, the default is AF_INET. """ try: - sock1, sock2 = rsocket.socketpair(family, type, proto, W_RSocket) + sock1, sock2 = rsocket.socketpair(family, type, proto) except SocketError, e: raise converted_error(space, e) - return space.newtuple([space.wrap(sock1), space.wrap(sock2)]) + return space.newtuple([ + space.wrap(W_Socket(sock1)), + space.wrap(W_Socket(sock2)) + ]) # The following 4 functions refuse all negative numbers, like CPython 2.6. # They could also check that the argument is not too large, but CPython 2.6 diff --git a/pypy/module/_socket/interp_socket.py b/pypy/module/_socket/interp_socket.py --- a/pypy/module/_socket/interp_socket.py +++ b/pypy/module/_socket/interp_socket.py @@ -1,14 +1,18 @@ +from rpython.rlib import rsocket +from rpython.rlib.rarithmetic import intmask +from rpython.rlib.rsocket import ( + RSocket, AF_INET, SOCK_STREAM, SocketError, SocketErrorWithErrno, + RSocketError +) +from rpython.rtyper.lltypesystem import lltype, rffi + +from pypy.interpreter import gateway from pypy.interpreter.baseobjspace import W_Root -from pypy.interpreter.typedef import TypeDef, make_weakref_descr,\ - interp_attrproperty +from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault -from rpython.rlib.rarithmetic import intmask -from rpython.rtyper.lltypesystem import lltype, rffi -from rpython.rlib import rsocket -from rpython.rlib.rsocket import RSocket, AF_INET, SOCK_STREAM -from rpython.rlib.rsocket import SocketError, SocketErrorWithErrno, RSocketError -from pypy.interpreter.error import OperationError, oefmt -from pypy.interpreter import gateway +from pypy.interpreter.typedef import ( + GetSetProperty, TypeDef, make_weakref_descr +) # XXX Hack to seperate rpython and pypy @@ -124,10 +128,18 @@ return addr -class W_RSocket(W_Root, RSocket): - def __del__(self): - self.clear_all_weakrefs() - RSocket.__del__(self) +class W_Socket(W_Root): + def __init__(self, sock): + self.sock = sock + + def get_type_w(self, space): + return space.wrap(self.sock.type) + + def get_proto_w(self, space): + return space.wrap(self.sock.proto) + + def get_family_w(self, space): + return space.wrap(self.sock.family) def accept_w(self, space): """accept() -> (socket object, address info) @@ -137,22 +149,22 @@ info is a pair (hostaddr, port). """ try: - fd, addr = self.accept() + fd, addr = self.sock.accept() sock = rsocket.make_socket( - fd, self.family, self.type, self.proto, W_RSocket) - return space.newtuple([space.wrap(sock), + fd, self.sock.family, self.sock.type, self.sock.proto) + return space.newtuple([space.wrap(W_Socket(sock)), addr_as_object(addr, sock.fd, space)]) - except SocketError, e: + except SocketError as e: raise converted_error(space, e) # convert an Address into an app-level object def addr_as_object(self, space, address): - return addr_as_object(address, self.fd, space) + return addr_as_object(address, self.sock.fd, space) # convert an app-level object into an Address # based on the current socket's family def addr_from_object(self, space, w_address): - return addr_from_object(self.family, space, w_address) + return addr_from_object(self.sock.family, space, w_address) def bind_w(self, space, w_addr): """bind(address) @@ -162,8 +174,8 @@ sockets the address is a tuple (ifname, proto [,pkttype [,hatype]]) """ try: - self.bind(self.addr_from_object(space, w_addr)) - except SocketError, e: + self.sock.bind(self.addr_from_object(space, w_addr)) + except SocketError as e: raise converted_error(space, e) def close_w(self, space): @@ -172,7 +184,7 @@ Close the socket. It cannot be used after this call. """ try: - self.close() + self.sock.close() except SocketError: # cpython doesn't return any errors on close pass @@ -184,8 +196,8 @@ is a pair (host, port). """ try: - self.connect(self.addr_from_object(space, w_addr)) - except SocketError, e: + self.sock.connect(self.addr_from_object(space, w_addr)) + except SocketError as e: raise converted_error(space, e) def connect_ex_w(self, space, w_addr): @@ -196,15 +208,16 @@ """ try: addr = self.addr_from_object(space, w_addr) - except SocketError, e: + except SocketError as e: raise converted_error(space, e) - error = self.connect_ex(addr) + error = self.sock.connect_ex(addr) return space.wrap(error) def dup_w(self, space): try: - return self.dup(W_RSocket) - except SocketError, e: + sock = self.sock.dup() + return W_Socket(sock) + except SocketError as e: raise converted_error(space, e) def fileno_w(self, space): @@ -212,7 +225,7 @@ Return the integer file descriptor of the socket. """ - return space.wrap(intmask(self.fd)) + return space.wrap(intmask(self.sock.fd)) def getpeername_w(self, space): """getpeername() -> address info @@ -221,9 +234,9 @@ info is a pair (hostaddr, port). """ try: - addr = self.getpeername() - return addr_as_object(addr, self.fd, space) - except SocketError, e: + addr = self.sock.getpeername() + return addr_as_object(addr, self.sock.fd, space) + except SocketError as e: raise converted_error(space, e) def getsockname_w(self, space): @@ -233,9 +246,9 @@ info is a pair (hostaddr, port). """ try: - addr = self.getsockname() - return addr_as_object(addr, self.fd, space) - except SocketError, e: + addr = self.sock.getsockname() + return addr_as_object(addr, self.sock.fd, space) + except SocketError as e: raise converted_error(space, e) @unwrap_spec(level=int, optname=int) @@ -248,11 +261,11 @@ """ if w_buflen is None: try: - return space.wrap(self.getsockopt_int(level, optname)) - except SocketError, e: + return space.wrap(self.sock.getsockopt_int(level, optname)) + except SocketError as e: raise converted_error(space, e) buflen = space.int_w(w_buflen) - return space.wrap(self.getsockopt(level, optname, buflen)) + return space.wrap(self.sock.getsockopt(level, optname, buflen)) def gettimeout_w(self, space): """gettimeout() -> timeout @@ -260,7 +273,7 @@ Returns the timeout in floating seconds associated with socket operations. A timeout of None indicates that timeouts on socket """ - timeout = self.gettimeout() + timeout = self.sock.gettimeout() if timeout < 0.0: return space.w_None return space.wrap(timeout) @@ -274,8 +287,8 @@ will allow before refusing new connections. """ try: - self.listen(backlog) - except SocketError, e: + self.sock.listen(backlog) + except SocketError as e: raise converted_error(space, e) @unwrap_spec(w_mode = WrappedDefault("r"), @@ -298,8 +311,8 @@ the remote end is closed and all data is read, return the empty string. """ try: - data = self.recv(buffersize, flags) - except SocketError, e: + data = self.sock.recv(buffersize, flags) + except SocketError as e: raise converted_error(space, e) return space.wrap(data) @@ -310,13 +323,13 @@ Like recv(buffersize, flags) but also return the sender's address info. """ try: - data, addr = self.recvfrom(buffersize, flags) + data, addr = self.sock.recvfrom(buffersize, flags) if addr: - w_addr = addr_as_object(addr, self.fd, space) + w_addr = addr_as_object(addr, self.sock.fd, space) else: w_addr = space.w_None return space.newtuple([space.wrap(data), w_addr]) - except SocketError, e: + except SocketError as e: raise converted_error(space, e) @unwrap_spec(data='bufferstr', flags=int) @@ -328,8 +341,8 @@ sent; this may be less than len(data) if the network is busy. """ try: - count = self.send(data, flags) - except SocketError, e: + count = self.sock.send(data, flags) + except SocketError as e: raise converted_error(space, e) return space.wrap(count) @@ -343,8 +356,9 @@ to tell how much data has been sent. """ try: - self.sendall(data, flags, space.getexecutioncontext().checksignals) - except SocketError, e: + self.sock.sendall( + data, flags, space.getexecutioncontext().checksignals) + except SocketError as e: raise converted_error(space, e) @unwrap_spec(data='bufferstr') @@ -364,8 +378,8 @@ w_addr = w_param3 try: addr = self.addr_from_object(space, w_addr) - count = self.sendto(data, flags, addr) - except SocketError, e: + count = self.sock.sendto(data, flags, addr) + except SocketError as e: raise converted_error(space, e) return space.wrap(count) @@ -377,7 +391,7 @@ setblocking(True) is equivalent to settimeout(None); setblocking(False) is equivalent to settimeout(0.0). """ - self.setblocking(flag) + self.sock.setblocking(flag) @unwrap_spec(level=int, optname=int) def setsockopt_w(self, space, level, optname, w_optval): @@ -391,13 +405,13 @@ except: optval = space.str_w(w_optval) try: - self.setsockopt(level, optname, optval) - except SocketError, e: + self.sock.setsockopt(level, optname, optval) + except SocketError as e: raise converted_error(space, e) return try: - self.setsockopt_int(level, optname, optval) - except SocketError, e: + self.sock.setsockopt_int(level, optname, optval) + except SocketError as e: raise converted_error(space, e) def settimeout_w(self, space, w_timeout): @@ -415,7 +429,7 @@ if timeout < 0.0: raise OperationError(space.w_ValueError, space.wrap('Timeout value out of range')) - self.settimeout(timeout) + self.sock.settimeout(timeout) @unwrap_spec(nbytes=int, flags=int) def recv_into_w(self, space, w_buffer, nbytes=0, flags=0): @@ -424,8 +438,8 @@ if nbytes == 0 or nbytes > lgt: nbytes = lgt try: - return space.wrap(self.recvinto(rwbuffer, nbytes, flags)) - except SocketError, e: + return space.wrap(self.sock.recvinto(rwbuffer, nbytes, flags)) + except SocketError as e: raise converted_error(space, e) @unwrap_spec(nbytes=int, flags=int) @@ -435,13 +449,13 @@ if nbytes == 0 or nbytes > lgt: nbytes = lgt try: - readlgt, addr = self.recvfrom_into(rwbuffer, nbytes, flags) + readlgt, addr = self.sock.recvfrom_into(rwbuffer, nbytes, flags) if addr: - w_addr = addr_as_object(addr, self.fd, space) + w_addr = addr_as_object(addr, self.sock.fd, space) else: w_addr = space.w_None return space.newtuple([space.wrap(readlgt), w_addr]) - except SocketError, e: + except SocketError as e: raise converted_error(space, e) @unwrap_spec(cmd=int) @@ -473,7 +487,7 @@ option_ptr.c_keepaliveinterval = space.uint_w(w_interval) res = _c.WSAIoctl( - self.fd, cmd, value_ptr, value_size, + self.sock.fd, cmd, value_ptr, value_size, rffi.NULL, 0, recv_ptr, rffi.NULL, rffi.NULL) if res < 0: raise converted_error(space, rsocket.last_error()) @@ -494,8 +508,8 @@ (flag == SHUT_RDWR). """ try: - self.shutdown(how) - except SocketError, e: + self.sock.shutdown(how) + except SocketError as e: raise converted_error(space, e) #------------------------------------------------------------ @@ -536,12 +550,13 @@ @unwrap_spec(family=int, type=int, proto=int) def newsocket(space, w_subtype, family=AF_INET, type=SOCK_STREAM, proto=0): - sock = space.allocate_instance(W_RSocket, w_subtype) + self = space.allocate_instance(W_Socket, w_subtype) try: - W_RSocket.__init__(sock, family, type, proto) - except SocketError, e: + sock = RSocket(family, type, proto) + except SocketError as e: raise converted_error(space, e) - return space.wrap(sock) + W_Socket.__init__(self, sock) + return space.wrap(self) descr_socket_new = interp2app(newsocket) # ____________________________________________________________ @@ -597,10 +612,10 @@ socketmethods = {} for methodname in socketmethodnames: - method = getattr(W_RSocket, methodname + '_w') + method = getattr(W_Socket, methodname + '_w') socketmethods[methodname] = interp2app(method) -W_RSocket.typedef = TypeDef("_socket.socket", +W_Socket.typedef = TypeDef("_socket.socket", __doc__ = """\ socket([family[, type[, proto]]]) -> socket object @@ -639,9 +654,9 @@ [*] not available on all platforms!""", __new__ = descr_socket_new, - __weakref__ = make_weakref_descr(W_RSocket), - type = interp_attrproperty('type', W_RSocket), - proto = interp_attrproperty('proto', W_RSocket), - family = interp_attrproperty('family', W_RSocket), + __weakref__ = make_weakref_descr(W_Socket), + type = GetSetProperty(W_Socket.get_type_w), + proto = GetSetProperty(W_Socket.get_proto_w), + family = GetSetProperty(W_Socket.get_family_w), ** socketmethods ) diff --git a/pypy/tool/gcdump.py b/pypy/tool/gcdump.py --- a/pypy/tool/gcdump.py +++ b/pypy/tool/gcdump.py @@ -43,7 +43,7 @@ def print_summary(self): items = self.summary.items() - items.sort(key=lambda(typenum, stat): stat[1]) # sort by totalsize + items.sort(key=lambda (typenum, stat): stat[1]) # sort by totalsize totalsize = 0 for typenum, stat in items: totalsize += stat[1] diff --git a/rpython/rlib/_rsocket_rffi.py b/rpython/rlib/_rsocket_rffi.py --- a/rpython/rlib/_rsocket_rffi.py +++ b/rpython/rlib/_rsocket_rffi.py @@ -493,10 +493,16 @@ getnameinfo = external('getnameinfo', [sockaddr_ptr, socklen_t, CCHARP, size_t, CCHARP, size_t, rffi.INT], rffi.INT) -htonl = external('htonl', [rffi.UINT], rffi.UINT, releasegil=False) -htons = external('htons', [rffi.USHORT], rffi.USHORT, releasegil=False) -ntohl = external('ntohl', [rffi.UINT], rffi.UINT, releasegil=False) -ntohs = external('ntohs', [rffi.USHORT], rffi.USHORT, releasegil=False) +if sys.platform.startswith("openbsd"): + htonl = external('htonl', [rffi.UINT], rffi.UINT, releasegil=False, macro=True) + htons = external('htons', [rffi.USHORT], rffi.USHORT, releasegil=False, macro=True) + ntohl = external('ntohl', [rffi.UINT], rffi.UINT, releasegil=False, macro=True) + ntohs = external('ntohs', [rffi.USHORT], rffi.USHORT, releasegil=False, macro=True) +else: + htonl = external('htonl', [rffi.UINT], rffi.UINT, releasegil=False) + htons = external('htons', [rffi.USHORT], rffi.USHORT, releasegil=False) + ntohl = external('ntohl', [rffi.UINT], rffi.UINT, releasegil=False) + ntohs = external('ntohs', [rffi.USHORT], rffi.USHORT, releasegil=False) if _POSIX: inet_aton = external('inet_aton', [CCHARP, lltype.Ptr(in_addr)], diff --git a/rpython/rlib/rsocket.py b/rpython/rlib/rsocket.py --- a/rpython/rlib/rsocket.py +++ b/rpython/rlib/rsocket.py @@ -15,17 +15,18 @@ # It's unclear if makefile() and SSL support belong here or only as # app-level code for PyPy. +from rpython.rlib import _rsocket_rffi as _c, jit, rgc from rpython.rlib.objectmodel import instantiate, keepalive_until_here -from rpython.rlib import _rsocket_rffi as _c from rpython.rlib.rarithmetic import intmask, r_uint from rpython.rlib.rthread import dummy_lock from rpython.rtyper.lltypesystem import lltype, rffi from rpython.rtyper.lltypesystem.rffi import sizeof, offsetof -INVALID_SOCKET = _c.INVALID_SOCKET -from rpython.rlib import jit + + # Usage of @jit.dont_look_inside in this file is possibly temporary # and only because some lltypes declared in _rsocket_rffi choke the # JIT's codewriter right now (notably, FixedSizeArray). +INVALID_SOCKET = _c.INVALID_SOCKET def mallocbuf(buffersize): @@ -86,6 +87,7 @@ self.addr_p = addr self.addrlen = addrlen + @rgc.must_be_light_finalizer def __del__(self): if self.addr_p: lltype.free(self.addr_p, flavor='raw', track_allocation=False) @@ -493,8 +495,8 @@ class RSocket(object): """RPython-level socket object. """ - _mixin_ = True # for interp_socket.py fd = _c.INVALID_SOCKET + def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0, fd=_c.INVALID_SOCKET): """Create a new socket.""" @@ -509,6 +511,7 @@ self.proto = proto self.timeout = defaults.timeout + @rgc.must_be_light_finalizer def __del__(self): fd = self.fd if fd != _c.INVALID_SOCKET: diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c --- a/rpython/translator/c/src/thread_gil.c +++ b/rpython/translator/c/src/thread_gil.c @@ -38,15 +38,14 @@ long rpy_fastgil = 1; long rpy_waiting_threads = -42; /* GIL not initialized */ -static mutex_t mutex_gil_stealer; -static mutex_t mutex_gil; +static mutex1_t mutex_gil_stealer; +static mutex2_t mutex_gil; void RPyGilAllocate(void) { assert(RPY_FASTGIL_LOCKED(rpy_fastgil)); - mutex_init(&mutex_gil_stealer); - mutex_init(&mutex_gil); - mutex_lock(&mutex_gil); + mutex1_init(&mutex_gil_stealer); + mutex2_init_locked(&mutex_gil); rpy_waiting_threads = 0; } @@ -80,14 +79,15 @@ first-in-first-out order, this will nicely give the threads a round-robin chance. */ - mutex_lock(&mutex_gil_stealer); + mutex1_lock(&mutex_gil_stealer); + mutex2_loop_start(&mutex_gil); /* We are now the stealer thread. Steals! */ while (1) { /* Sleep for one interval of time. We may be woken up earlier if 'mutex_gil' is released. */ - if (mutex_lock_timeout(&mutex_gil, 0.0001)) { /* 0.1 ms... */ + if (mutex2_lock_timeout(&mutex_gil, 0.0001)) { /* 0.1 ms... */ /* We arrive here if 'mutex_gil' was recently released and we just relocked it. */ @@ -107,7 +107,8 @@ /* Otherwise, loop back. */ } atomic_decrement(&rpy_waiting_threads); - mutex_unlock(&mutex_gil_stealer); + mutex2_loop_stop(&mutex_gil); + mutex1_unlock(&mutex_gil_stealer); RESTORE_ERRNO(); } @@ -140,7 +141,7 @@ /* Explicitly release the 'mutex_gil'. */ - mutex_unlock(&mutex_gil); + mutex2_unlock(&mutex_gil); /* Now nobody has got the GIL, because 'mutex_gil' is released (but rpy_fastgil is still locked). Call RPyGilAcquire(). It will diff --git a/rpython/translator/c/src/thread_nt.c b/rpython/translator/c/src/thread_nt.c --- a/rpython/translator/c/src/thread_nt.c +++ b/rpython/translator/c/src/thread_nt.c @@ -196,33 +196,46 @@ /* GIL code */ /************************************************************/ -typedef HANDLE mutex_t; /* a semaphore, on Windows */ +typedef HANDLE mutex2_t; /* a semaphore, on Windows */ static void gil_fatal(const char *msg) { fprintf(stderr, "Fatal error in the GIL: %s\n", msg); abort(); } -static inline void mutex_init(mutex_t *mutex) { +static inline void mutex2_init(mutex2_t *mutex) { *mutex = CreateSemaphore(NULL, 1, 1, NULL); if (*mutex == NULL) gil_fatal("CreateSemaphore failed"); } -static inline void mutex_lock(mutex_t *mutex) { +static inline void mutex2_lock(mutex2_t *mutex) { WaitForSingleObject(*mutex, INFINITE); } -static inline void mutex_unlock(mutex_t *mutex) { +static inline void mutex2_unlock(mutex2_t *mutex) { ReleaseSemaphore(*mutex, 1, NULL); } -static inline int mutex_lock_timeout(mutex_t *mutex, double delay) +static inline void mutex2_init_locked(mutex2_t *mutex) { + mutex2_init(mutex); + mutex2_lock(mutex); +} + +static inline void mutex2_loop_start(mutex2_t *mutex) { } +static inline void mutex2_loop_stop(mutex2_t *mutex) { } + +static inline int mutex2_lock_timeout(mutex2_t *mutex, double delay) { DWORD result = WaitForSingleObject(*mutex, (DWORD)(delay * 1000.0 + 0.999)); return (result != WAIT_TIMEOUT); } +#define mutex1_t mutex2_t +#define mutex1_init mutex2_init +#define mutex1_lock mutex2_lock +#define mutex1_unlock mutex2_unlock + #ifdef _M_IA64 /* On Itanium, use 'acquire' memory ordering semantics */ #define lock_test_and_set(ptr, value) InterlockedExchangeAcquire(ptr, value) diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c --- a/rpython/translator/c/src/thread_pthread.c +++ b/rpython/translator/c/src/thread_pthread.c @@ -479,12 +479,20 @@ #define ASSERT_STATUS(call) \ if (call != 0) { \ - fprintf(stderr, "Fatal error: " #call "\n"); \ + perror("Fatal error: " #call); \ abort(); \ } -static inline void timespec_add(struct timespec *t, double incr) +static inline void timespec_delay(struct timespec *t, double incr) { +#ifdef CLOCK_REALTIME + clock_gettime(CLOCK_REALTIME, &t); +#else + struct timeval tv; + RPY_GETTIMEOFDAY(&tv); + t->tv_sec = tv.tv_sec; + t->tv_nsec = tv.tv_usec * 1000 + 999; +#endif /* assumes that "incr" is not too large, less than 1 second */ long nsec = t->tv_nsec + (long)(incr * 1000000000.0); if (nsec >= 1000000000) { @@ -495,27 +503,56 @@ t->tv_nsec = nsec; } -typedef pthread_mutex_t mutex_t; +typedef pthread_mutex_t mutex1_t; -static inline void mutex_init(mutex_t *mutex) { +static inline void mutex1_init(mutex1_t *mutex) { ASSERT_STATUS(pthread_mutex_init(mutex, pthread_mutexattr_default)); } -static inline void mutex_lock(mutex_t *mutex) { +static inline void mutex1_lock(mutex1_t *mutex) { ASSERT_STATUS(pthread_mutex_lock(mutex)); } -static inline void mutex_unlock(mutex_t *mutex) { +static inline void mutex1_unlock(mutex1_t *mutex) { ASSERT_STATUS(pthread_mutex_unlock(mutex)); } -static inline int mutex_lock_timeout(mutex_t *mutex, double delay) { - struct timespec t; - clock_gettime(CLOCK_REALTIME, &t); - timespec_add(&t, delay); - int error_from_timedlock = pthread_mutex_timedlock(mutex, &t); - if (error_from_timedlock == ETIMEDOUT) - return 0; - ASSERT_STATUS(error_from_timedlock); - return 1; + +typedef struct { + char locked; + pthread_mutex_t mut; + pthread_cond_t cond; +} mutex2_t; + +static inline void mutex2_init_locked(mutex2_t *mutex) { + mutex->locked = 1; + ASSERT_STATUS(pthread_mutex_init(&mutex->mut, pthread_mutexattr_default)); + ASSERT_STATUS(pthread_cond_init(&mutex->cond, pthread_condattr_default)); } +static inline void mutex2_unlock(mutex2_t *mutex) { + ASSERT_STATUS(pthread_mutex_lock(&mutex->mut)); + mutex->locked = 0; + ASSERT_STATUS(pthread_mutex_unlock(&mutex->mut)); + ASSERT_STATUS(pthread_cond_signal(&mutex->cond)); +} +static inline void mutex2_loop_start(mutex2_t *mutex) { + ASSERT_STATUS(pthread_mutex_lock(&mutex->mut)); +} +static inline void mutex2_loop_stop(mutex2_t *mutex) { + ASSERT_STATUS(pthread_mutex_unlock(&mutex->mut)); +} +static inline int mutex2_lock_timeout(mutex2_t *mutex, double delay) { + if (mutex->locked) { + struct timespec t; + timespec_delay(&t, delay); + int error_from_timedwait = pthread_cond_timedwait( + &mutex->cond, &mutex->mut, &t); + if (error_from_timedwait != ETIMEDOUT) { + ASSERT_STATUS(error_from_timedwait); + } + } + int result = !mutex->locked; + mutex->locked = 1; + return result; +} + #define lock_test_and_set(ptr, value) __sync_lock_test_and_set(ptr, value) #define atomic_increment(ptr) __sync_fetch_and_add(ptr, 1) #define atomic_decrement(ptr) __sync_fetch_and_sub(ptr, 1) diff --git a/rpython/translator/platform/posix.py b/rpython/translator/platform/posix.py --- a/rpython/translator/platform/posix.py +++ b/rpython/translator/platform/posix.py @@ -22,9 +22,11 @@ return ['-l%s' % lib for lib in libraries] def _libdirs(self, library_dirs): + assert '' not in library_dirs return ['-L%s' % ldir for ldir in library_dirs] def _includedirs(self, include_dirs): + assert '' not in include_dirs return ['-I%s' % idir for idir in include_dirs] def _linkfiles(self, link_files): From noreply at buildbot.pypy.org Sat Jun 28 12:25:18 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sat, 28 Jun 2014 12:25:18 +0200 (CEST) Subject: [pypy-commit] pypy default: oups Message-ID: <20140628102518.1C33E1C0026@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72263:f15cd1108f0e Date: 2014-06-28 10:24 +0000 http://bitbucket.org/pypy/pypy/changeset/f15cd1108f0e/ Log: oups diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c --- a/rpython/translator/c/src/thread_pthread.c +++ b/rpython/translator/c/src/thread_pthread.c @@ -486,7 +486,7 @@ static inline void timespec_delay(struct timespec *t, double incr) { #ifdef CLOCK_REALTIME - clock_gettime(CLOCK_REALTIME, &t); + clock_gettime(CLOCK_REALTIME, t); #else struct timeval tv; RPY_GETTIMEOFDAY(&tv); From noreply at buildbot.pypy.org Sat Jun 28 17:20:05 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sat, 28 Jun 2014 17:20:05 +0200 (CEST) Subject: [pypy-commit] pypy default: Potential test fix Message-ID: <20140628152005.1B8751D23F1@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72264:baf99c71c6cb Date: 2014-06-28 16:16 +0200 http://bitbucket.org/pypy/pypy/changeset/baf99c71c6cb/ Log: Potential test fix diff --git a/rpython/memory/test/snippet.py b/rpython/memory/test/snippet.py --- a/rpython/memory/test/snippet.py +++ b/rpython/memory/test/snippet.py @@ -47,22 +47,33 @@ class State: pass state = State() + def age_of(c): + return state.age[ord(c) - ord('a')] + def set_age_of(c, newvalue): + # NB. this used to be a dictionary, but setting into a dict + # consumes memory. This has the effect that this test's + # __del__ methods can consume more memory and potentially + # cause another collection. This would result in objects + # being unexpectedly destroyed at the same 'state.time'. + state.age[ord(c) - ord('a')] = newvalue + class A: def __init__(self, key): self.key = key self.refs = [] def __del__(self): - assert state.age[self.key] == -1 - state.age[self.key] = state.time + from rpython.rlib.debug import debug_print + debug_print("DEL:", self.key) + assert age_of(self.key) == -1 + set_age_of(self.key, state.time) state.progress = True def build_example(input): state.time = 0 - state.age = {} + state.age = [-1] * len(letters) vertices = {} for c in letters: vertices[c] = A(c) - state.age[c] = -1 for c, d in input: vertices[c].refs.append(vertices[d]) @@ -72,6 +83,8 @@ input, components, strict = examples[i] build_example(input) while state.time < len(letters): + from rpython.rlib.debug import debug_print + debug_print("STATE.TIME:", state.time) state.progress = False llop.gc__collect(lltype.Void) if not state.progress: @@ -80,16 +93,16 @@ # summarize the finalization order lst = [] for c in letters: - lst.append('%s:%d' % (c, state.age[c])) + lst.append('%s:%d' % (c, age_of(c))) summary = ', '.join(lst) # check that all instances have been finalized - if -1 in state.age.values(): + if -1 in state.age: return error(i, summary, "not all instances finalized") # check that if a -> b and a and b are not in the same # strong component, then a is finalized strictly before b for c, d in strict: - if state.age[c] >= state.age[d]: + if age_of(c) >= age_of(d): return error(i, summary, "%s should be finalized before %s" % (c, d)) @@ -98,7 +111,7 @@ for component in components: seen = {} for c in component: - age = state.age[c] + age = age_of(c) if age in seen: d = seen[age] return error(i, summary, From noreply at buildbot.pypy.org Sat Jun 28 17:20:06 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sat, 28 Jun 2014 17:20:06 +0200 (CEST) Subject: [pypy-commit] pypy default: Change appending into a list into using a chained list. Message-ID: <20140628152006.5B4A71D23F1@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72265:f7222d0d5407 Date: 2014-06-28 16:17 +0200 http://bitbucket.org/pypy/pypy/changeset/f7222d0d5407/ Log: Change appending into a list into using a chained list. diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py --- a/pypy/interpreter/executioncontext.py +++ b/pypy/interpreter/executioncontext.py @@ -496,6 +496,13 @@ """ +class UserDelCallback(object): + def __init__(self, w_obj, callback, descrname): + self.w_obj = w_obj + self.callback = callback + self.descrname = descrname + self.next = None + class UserDelAction(AsyncAction): """An action that invokes all pending app-level __del__() method. This is done as an action instead of immediately when the @@ -506,12 +513,18 @@ def __init__(self, space): AsyncAction.__init__(self, space) - self.dying_objects = [] + self.dying_objects = None + self.dying_objects_last = None self.finalizers_lock_count = 0 self.enabled_at_app_level = True def register_callback(self, w_obj, callback, descrname): - self.dying_objects.append((w_obj, callback, descrname)) + cb = UserDelCallback(w_obj, callback, descrname) + if self.dying_objects_last is None: + self.dying_objects = cb + else: + self.dying_objects_last.next = cb + self.dying_objects_last = cb self.fire() def perform(self, executioncontext, frame): @@ -525,13 +538,13 @@ # avoid too deep recursions of the kind of __del__ being called # while in the middle of another __del__ call. pending = self.dying_objects - self.dying_objects = [] + self.dying_objects = None + self.dying_objects_last = None space = self.space - for i in range(len(pending)): - w_obj, callback, descrname = pending[i] - pending[i] = (None, None, None) + while pending is not None: try: - callback(w_obj) + pending.callback(pending.w_obj) except OperationError, e: - e.write_unraisable(space, descrname, w_obj) + e.write_unraisable(space, pending.descrname, pending.w_obj) e.clear(space) # break up reference cycles + pending = pending.next From noreply at buildbot.pypy.org Sat Jun 28 17:20:07 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sat, 28 Jun 2014 17:20:07 +0200 (CEST) Subject: [pypy-commit] pypy default: merge heads Message-ID: <20140628152007.9CF781D23F1@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72266:913f4c5ecaaa Date: 2014-06-28 17:19 +0200 http://bitbucket.org/pypy/pypy/changeset/913f4c5ecaaa/ Log: merge heads diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c --- a/rpython/translator/c/src/thread_pthread.c +++ b/rpython/translator/c/src/thread_pthread.c @@ -486,7 +486,7 @@ static inline void timespec_delay(struct timespec *t, double incr) { #ifdef CLOCK_REALTIME - clock_gettime(CLOCK_REALTIME, &t); + clock_gettime(CLOCK_REALTIME, t); #else struct timeval tv; RPY_GETTIMEOFDAY(&tv); From noreply at buildbot.pypy.org Sat Jun 28 17:37:09 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sat, 28 Jun 2014 17:37:09 +0200 (CEST) Subject: [pypy-commit] pypy default: Comment describing how f7222d0d5407 is enough to fix issue 1805. Message-ID: <20140628153709.152B91D23F1@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72267:848aac545036 Date: 2014-06-28 17:36 +0200 http://bitbucket.org/pypy/pypy/changeset/848aac545036/ Log: Comment describing how f7222d0d5407 is enough to fix issue 1805. diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py --- a/pypy/interpreter/executioncontext.py +++ b/pypy/interpreter/executioncontext.py @@ -548,3 +548,23 @@ e.write_unraisable(space, pending.descrname, pending.w_obj) e.clear(space) # break up reference cycles pending = pending.next + # + # Note: 'dying_objects' used to be just a regular list instead + # of a chained list. This was the cause of "leaks" if we have a + # program that constantly creates new objects with finalizers. + # Here is why: say 'dying_objects' is a long list, and there + # are n instances in it. Then we spend some time in this + # function, possibly triggering more GCs, but keeping the list + # of length n alive. Then the list is suddenly freed at the + # end, and we return to the user program. At this point the + # GC limit is still very high, because just before, there was + # a list of length n alive. Assume that the program continues + # to allocate a lot of instances with finalizers. The high GC + # limit means that it could allocate a lot of instances before + # reaching it --- possibly more than n. So the whole procedure + # repeats with higher and higher values of n. + # + # This does not occur in the current implementation because + # there is no list of length n: if n is large, then the GC + # will run several times while walking the list, but it will + # see lower and lower memory usage, with no lower bound of n. From noreply at buildbot.pypy.org Sat Jun 28 19:05:28 2014 From: noreply at buildbot.pypy.org (alex_gaynor) Date: Sat, 28 Jun 2014 19:05:28 +0200 (CEST) Subject: [pypy-commit] pypy default: Mark this __del__ as must-be-light-finalizer Message-ID: <20140628170528.B55F21D34D9@cobra.cs.uni-duesseldorf.de> Author: Alex Gaynor Branch: Changeset: r72268:63f6ae36ba01 Date: 2014-06-28 10:05 -0700 http://bitbucket.org/pypy/pypy/changeset/63f6ae36ba01/ Log: Mark this __del__ as must-be-light-finalizer diff --git a/pypy/module/_cffi_backend/ctypefunc.py b/pypy/module/_cffi_backend/ctypefunc.py --- a/pypy/module/_cffi_backend/ctypefunc.py +++ b/pypy/module/_cffi_backend/ctypefunc.py @@ -4,7 +4,7 @@ import sys -from rpython.rlib import jit, clibffi, jit_libffi +from rpython.rlib import jit, clibffi, jit_libffi, rgc from rpython.rlib.jit_libffi import (CIF_DESCRIPTION, CIF_DESCRIPTION_P, FFI_TYPE, FFI_TYPE_P, FFI_TYPE_PP, SIZE_OF_FFI_ARG) from rpython.rlib.objectmodel import we_are_translated, instantiate @@ -63,6 +63,7 @@ CifDescrBuilder(fvarargs, self.ctitem).rawallocate(ctypefunc) return ctypefunc + @rgc.must_be_light_finalizer def __del__(self): if self.cif_descr: lltype.free(self.cif_descr, flavor='raw') From noreply at buildbot.pypy.org Sat Jun 28 20:30:55 2014 From: noreply at buildbot.pypy.org (rlamy) Date: Sat, 28 Jun 2014 20:30:55 +0200 (CEST) Subject: [pypy-commit] pypy scalar-operations: avoid unnecesary recursion Message-ID: <20140628183055.0525C1C0026@cobra.cs.uni-duesseldorf.de> Author: Ronan Lamy Branch: scalar-operations Changeset: r72269:85a336b9ee2d Date: 2014-06-27 02:10 +0100 http://bitbucket.org/pypy/pypy/changeset/85a336b9ee2d/ Log: avoid unnecesary recursion diff --git a/pypy/module/micronumpy/ctors.py b/pypy/module/micronumpy/ctors.py --- a/pypy/module/micronumpy/ctors.py +++ b/pypy/module/micronumpy/ctors.py @@ -33,12 +33,13 @@ if not isinstance(w_object, W_NDimArray): w___array__ = space.lookup(w_object, "__array__") if w___array__ is not None: - if space.is_none(w_dtype): + if w_dtype is None: w_dtype = space.w_None w_array = space.get_and_call_function(w___array__, w_object, w_dtype) if isinstance(w_array, W_NDimArray): - # feed w_array back into array() for other properties - return array(space, w_array, w_dtype, False, w_order, subok, ndmin) + # continue with w_array, but do further operations in place + w_object = w_array + copy = False else: raise oefmt(space.w_ValueError, "object __array__ method not producing an array") From noreply at buildbot.pypy.org Sat Jun 28 20:30:56 2014 From: noreply at buildbot.pypy.org (rlamy) Date: Sat, 28 Jun 2014 20:30:56 +0200 (CEST) Subject: [pypy-commit] pypy scalar-operations: extract try_array_method() from array() Message-ID: <20140628183056.3C2B51C0026@cobra.cs.uni-duesseldorf.de> Author: Ronan Lamy Branch: scalar-operations Changeset: r72270:6fc0175b3286 Date: 2014-06-27 23:14 +0100 http://bitbucket.org/pypy/pypy/changeset/6fc0175b3286/ Log: extract try_array_method() from array() diff --git a/pypy/module/micronumpy/ctors.py b/pypy/module/micronumpy/ctors.py --- a/pypy/module/micronumpy/ctors.py +++ b/pypy/module/micronumpy/ctors.py @@ -24,6 +24,20 @@ return box +def try_array_method(space, w_object, w_dtype=None): + w___array__ = space.lookup(w_object, "__array__") + if w___array__ is None: + return None + if w_dtype is None: + w_dtype = space.w_None + w_array = space.get_and_call_function(w___array__, w_object, w_dtype) + if isinstance(w_array, W_NDimArray): + return w_array + else: + raise oefmt(space.w_ValueError, + "object __array__ method not producing an array") + + @unwrap_spec(ndmin=int, copy=bool, subok=bool) def array(space, w_object, w_dtype=None, copy=True, w_order=None, subok=False, ndmin=0): @@ -31,18 +45,11 @@ # for anything that isn't already an array, try __array__ method first if not isinstance(w_object, W_NDimArray): - w___array__ = space.lookup(w_object, "__array__") - if w___array__ is not None: - if w_dtype is None: - w_dtype = space.w_None - w_array = space.get_and_call_function(w___array__, w_object, w_dtype) - if isinstance(w_array, W_NDimArray): - # continue with w_array, but do further operations in place - w_object = w_array - copy = False - else: - raise oefmt(space.w_ValueError, - "object __array__ method not producing an array") + w_array = try_array_method(space, w_object, w_dtype) + if w_array is not None: + # continue with w_array, but do further operations in place + w_object = w_array + copy = False dtype = descriptor.decode_w_dtype(space, w_dtype) From noreply at buildbot.pypy.org Sat Jun 28 20:30:57 2014 From: noreply at buildbot.pypy.org (rlamy) Date: Sat, 28 Jun 2014 20:30:57 +0200 (CEST) Subject: [pypy-commit] pypy scalar-operations: simplify handling of np.array()'s ndmin parameter Message-ID: <20140628183057.78F891C0026@cobra.cs.uni-duesseldorf.de> Author: Ronan Lamy Branch: scalar-operations Changeset: r72271:4f27ccc29f40 Date: 2014-06-28 19:30 +0100 http://bitbucket.org/pypy/pypy/changeset/4f27ccc29f40/ Log: simplify handling of np.array()'s ndmin parameter diff --git a/pypy/module/micronumpy/ctors.py b/pypy/module/micronumpy/ctors.py --- a/pypy/module/micronumpy/ctors.py +++ b/pypy/module/micronumpy/ctors.py @@ -41,6 +41,18 @@ @unwrap_spec(ndmin=int, copy=bool, subok=bool) def array(space, w_object, w_dtype=None, copy=True, w_order=None, subok=False, ndmin=0): + w_res = _array(space, w_object, w_dtype, copy, w_order, subok) + shape = w_res.get_shape() + if len(shape) < ndmin: + shape = [1] * (ndmin - len(shape)) + shape + impl = w_res.implementation.set_shape(space, w_res, shape) + if w_res is w_object: + return W_NDimArray(impl) + else: + w_res.implementation = impl + return w_res + +def _array(space, w_object, w_dtype=None, copy=True, w_order=None, subok=False): from pypy.module.micronumpy import strides # for anything that isn't already an array, try __array__ method first @@ -65,19 +77,10 @@ # arrays with correct dtype if isinstance(w_object, W_NDimArray) and \ (space.is_none(w_dtype) or w_object.get_dtype() is dtype): - shape = w_object.get_shape() if copy: - w_ret = w_object.descr_copy(space) + return w_object.descr_copy(space) else: - if ndmin <= len(shape): - return w_object - new_impl = w_object.implementation.set_shape(space, w_object, shape) - w_ret = W_NDimArray(new_impl) - if ndmin > len(shape): - shape = [1] * (ndmin - len(shape)) + shape - w_ret.implementation = w_ret.implementation.set_shape(space, - w_ret, shape) - return w_ret + return w_object # not an array or incorrect dtype shape, elems_w = strides.find_shape_and_elems(space, w_object, dtype) @@ -89,8 +92,6 @@ # promote S0 -> S1, U0 -> U1 dtype = descriptor.variable_dtype(space, dtype.char + '1') - if ndmin > len(shape): - shape = [1] * (ndmin - len(shape)) + shape w_arr = W_NDimArray.from_shape(space, shape, dtype, order=order) if len(elems_w) == 1: w_arr.set_scalar_value(dtype.coerce(space, elems_w[0])) From noreply at buildbot.pypy.org Sun Jun 29 11:06:11 2014 From: noreply at buildbot.pypy.org (arigo) Date: Sun, 29 Jun 2014 11:06:11 +0200 (CEST) Subject: [pypy-commit] cffi default: Add a passing Windows test Message-ID: <20140629090611.56C391C0542@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r1531:ab49d344cfe2 Date: 2014-06-29 11:06 +0200 http://bitbucket.org/cffi/cffi/changeset/ab49d344cfe2/ Log: Add a passing Windows test diff --git a/testing/test_function.py b/testing/test_function.py --- a/testing/test_function.py +++ b/testing/test_function.py @@ -402,3 +402,18 @@ if wr() is not None: import gc; gc.collect() assert wr() is None # 'data' does not leak + + def test_windows_stdcall(self): + if sys.platform != 'win32': + py.test.skip("Windows-only test") + if self.Backend is CTypesBackend: + py.test.skip("not with the ctypes backend") + ffi = FFI(backend=self.Backend()) + ffi.cdef(""" + BOOL QueryPerformanceFrequency(LONGLONG *lpFrequency); + """) + m = ffi.dlopen("Kernel32.dll") + p_freq = ffi.new("LONGLONG *") + res = m.QueryPerformanceFrequency(p_freq) + assert res != 0 + assert p_freq[0] != 0 From noreply at buildbot.pypy.org Mon Jun 30 03:05:48 2014 From: noreply at buildbot.pypy.org (rlamy) Date: Mon, 30 Jun 2014 03:05:48 +0200 (CEST) Subject: [pypy-commit] pypy scalar-operations: Convert ufunc args to scalars rather than arrays when possible Message-ID: <20140630010548.8A2091C024A@cobra.cs.uni-duesseldorf.de> Author: Ronan Lamy Branch: scalar-operations Changeset: r72272:fbf38c2a6bf6 Date: 2014-06-29 19:21 +0100 http://bitbucket.org/pypy/pypy/changeset/fbf38c2a6bf6/ Log: Convert ufunc args to scalars rather than arrays when possible diff --git a/pypy/module/micronumpy/base.py b/pypy/module/micronumpy/base.py --- a/pypy/module/micronumpy/base.py +++ b/pypy/module/micronumpy/base.py @@ -90,6 +90,14 @@ w_val = dtype.coerce(space, space.wrap(0)) return convert_to_array(space, w_val) + @staticmethod + def from_scalar(space, w_scalar): + """Convert a scalar into a 0-dim array""" + dtype = w_scalar.get_dtype(space) + w_arr = W_NDimArray.from_shape(space, [], dtype) + w_arr.set_scalar_value(w_scalar) + return w_arr + def convert_to_array(space, w_obj): from pypy.module.micronumpy.ctors import array diff --git a/pypy/module/micronumpy/ctors.py b/pypy/module/micronumpy/ctors.py --- a/pypy/module/micronumpy/ctors.py +++ b/pypy/module/micronumpy/ctors.py @@ -4,7 +4,8 @@ from rpython.rlib.rstring import strip_spaces from rpython.rtyper.lltypesystem import lltype, rffi from pypy.module.micronumpy import descriptor, loop -from pypy.module.micronumpy.base import W_NDimArray, convert_to_array +from pypy.module.micronumpy.base import ( + W_NDimArray, convert_to_array, W_NumpyObject) from pypy.module.micronumpy.converters import shape_converter @@ -100,6 +101,17 @@ return w_arr +def numpify(space, w_object): + """Convert the object to a W_NumpyObject""" + if isinstance(w_object, W_NumpyObject): + return w_object + w_res = array(space, w_object) + if w_res.is_scalar(): + return w_res.get_scalar_value() + else: + return w_res + + def zeros(space, w_shape, w_dtype=None, w_order=None): dtype = space.interp_w(descriptor.W_Dtype, space.call_function(space.gettypefor(descriptor.W_Dtype), w_dtype)) diff --git a/pypy/module/micronumpy/ufuncs.py b/pypy/module/micronumpy/ufuncs.py --- a/pypy/module/micronumpy/ufuncs.py +++ b/pypy/module/micronumpy/ufuncs.py @@ -7,6 +7,7 @@ from rpython.tool.sourcetools import func_with_new_name from pypy.module.micronumpy import boxes, descriptor, loop, constants as NPY from pypy.module.micronumpy.base import convert_to_array, W_NDimArray +from pypy.module.micronumpy.ctors import numpify from pypy.module.micronumpy.strides import shape_agreement @@ -17,6 +18,13 @@ def done_if_false(dtype, val): return not dtype.itemtype.bool(val) +def _get_dtype(space, w_npyobj): + if isinstance(w_npyobj, boxes.W_GenericBox): + return w_npyobj.get_dtype(space) + else: + assert isinstance(w_npyobj, W_NDimArray) + return w_npyobj.get_dtype() + class W_Ufunc(W_Root): _immutable_fields_ = [ @@ -385,15 +393,10 @@ else: [w_lhs, w_rhs] = args_w w_out = None - if (isinstance(w_lhs, boxes.W_GenericBox) and - isinstance(w_rhs, boxes.W_GenericBox)): - w_ldtype = w_lhs.get_dtype(space) - w_rdtype = w_rhs.get_dtype(space) - else: - w_lhs = convert_to_array(space, w_lhs) - w_rhs = convert_to_array(space, w_rhs) - w_ldtype = w_lhs.get_dtype() - w_rdtype = w_rhs.get_dtype() + w_lhs = numpify(space, w_lhs) + w_rhs = numpify(space, w_rhs) + w_ldtype = _get_dtype(space, w_lhs) + w_rdtype = _get_dtype(space, w_rhs) if w_ldtype.is_str() and w_rdtype.is_str() and \ self.comparison_func: pass @@ -456,7 +459,11 @@ else: out = arr return out + if isinstance(w_lhs, boxes.W_GenericBox): + w_lhs = W_NDimArray.from_scalar(space, w_lhs) assert isinstance(w_lhs, W_NDimArray) + if isinstance(w_rhs, boxes.W_GenericBox): + w_rhs = W_NDimArray.from_scalar(space, w_rhs) assert isinstance(w_rhs, W_NDimArray) new_shape = shape_agreement(space, w_lhs.get_shape(), w_rhs) new_shape = shape_agreement(space, new_shape, out, broadcast_down=False) From noreply at buildbot.pypy.org Mon Jun 30 03:05:49 2014 From: noreply at buildbot.pypy.org (rlamy) Date: Mon, 30 Jun 2014 03:05:49 +0200 (CEST) Subject: [pypy-commit] pypy scalar-operations: Ensure that calling numpify() does not create an array. Message-ID: <20140630010549.B4C671C024A@cobra.cs.uni-duesseldorf.de> Author: Ronan Lamy Branch: scalar-operations Changeset: r72273:4fd576e1ab9d Date: 2014-06-30 02:04 +0100 http://bitbucket.org/pypy/pypy/changeset/4fd576e1ab9d/ Log: Ensure that calling numpify() does not create an array. Fixes issue #1707. diff --git a/pypy/module/micronumpy/ctors.py b/pypy/module/micronumpy/ctors.py --- a/pypy/module/micronumpy/ctors.py +++ b/pypy/module/micronumpy/ctors.py @@ -103,13 +103,29 @@ def numpify(space, w_object): """Convert the object to a W_NumpyObject""" + # XXX: code duplication with _array() + from pypy.module.micronumpy import strides if isinstance(w_object, W_NumpyObject): return w_object - w_res = array(space, w_object) - if w_res.is_scalar(): - return w_res.get_scalar_value() + # for anything that isn't already an array, try __array__ method first + w_array = try_array_method(space, w_object) + if w_array is not None: + return w_array + + shape, elems_w = strides.find_shape_and_elems(space, w_object, None) + dtype = strides.find_dtype_for_seq(space, elems_w, None) + if dtype is None: + dtype = descriptor.get_dtype_cache(space).w_float64dtype + elif dtype.is_str_or_unicode() and dtype.elsize < 1: + # promote S0 -> S1, U0 -> U1 + dtype = descriptor.variable_dtype(space, dtype.char + '1') + + if len(elems_w) == 1: + return dtype.coerce(space, elems_w[0]) else: - return w_res + w_arr = W_NDimArray.from_shape(space, shape, dtype) + loop.assign(space, w_arr, elems_w) + return w_arr def zeros(space, w_shape, w_dtype=None, w_order=None): From noreply at buildbot.pypy.org Mon Jun 30 03:11:34 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 30 Jun 2014 03:11:34 +0200 (CEST) Subject: [pypy-commit] pypy disable_pythonapi: backout removal of sys.dllhandle Message-ID: <20140630011134.116751C024A@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: disable_pythonapi Changeset: r72274:ad184a18b1ef Date: 2014-06-30 03:56 +0300 http://bitbucket.org/pypy/pypy/changeset/ad184a18b1ef/ Log: backout removal of sys.dllhandle diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -65,7 +65,7 @@ signal struct symbol - sys (without sys.dllhandle on windows) + sys termios thread time diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -809,6 +809,8 @@ import ctypes bridge = ctypes.CDLL(str(modulename), mode=ctypes.RTLD_GLOBAL) + space.fromcache(State).install_dll(eci) + # populate static data for name, (typ, expr) in GLOBALS.iteritems(): from pypy.module import cpyext @@ -1002,6 +1004,23 @@ separate_module_sources = [code, struct_source] + if sys.platform == 'win32': + get_pythonapi_source = ''' + #include + HANDLE pypy_get_pythonapi_handle() { + MEMORY_BASIC_INFORMATION mi; + memset(&mi, 0, sizeof(mi)); + + if( !VirtualQueryEx(GetCurrentProcess(), &pypy_get_pythonapi_handle, + &mi, sizeof(mi)) ) + return 0; + + return (HMODULE)mi.AllocationBase; + } + ''' + separate_module_sources.append(get_pythonapi_source) + export_symbols_eci.append('pypy_get_pythonapi_handle') + eci = ExternalCompilationInfo( include_dirs=include_dirs, separate_module_files=[source_dir / "varargwrapper.c", @@ -1046,6 +1065,8 @@ eci = build_eci(False, export_symbols, code) + space.fromcache(State).install_dll(eci) + run_bootstrap_functions(space) setup_va_functions(eci) diff --git a/pypy/module/cpyext/state.py b/pypy/module/cpyext/state.py --- a/pypy/module/cpyext/state.py +++ b/pypy/module/cpyext/state.py @@ -62,6 +62,14 @@ else: api.setup_library(self.space) + def install_dll(self, eci): + """NOT_RPYTHON + Called when the dll has been compiled""" + if sys.platform == 'win32': + self.get_pythonapi_handle = rffi.llexternal( + 'pypy_get_pythonapi_handle', [], DLLHANDLE, + compilation_info=eci) + def startup(self, space): "This function is called when the program really starts" diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -201,6 +201,13 @@ raises(ImportError, cpyext.load_module, "missing.file", "foo") raises(ImportError, cpyext.load_module, self.libc, "invalid.function") + def test_dllhandle(self): + import sys + if sys.platform != "win32" or sys.version_info < (2, 6): + skip("Windows Python >= 2.6 only") + assert sys.dllhandle + assert sys.dllhandle.getaddressindll('cpyexttestErr_NewException') + class AppTestCpythonExtensionBase(LeakCheckingTest): def setup_class(cls): diff --git a/pypy/module/sys/__init__.py b/pypy/module/sys/__init__.py --- a/pypy/module/sys/__init__.py +++ b/pypy/module/sys/__init__.py @@ -108,6 +108,12 @@ # don't get the filesystemencoding at translation time assert self.filesystemencoding is None + else: + if _WIN: + from pypy.module.sys import vm + w_handle = vm.get_dllhandle(space) + space.setitem(self.w_dict, space.wrap("dllhandle"), w_handle) + def getmodule(self, name): space = self.space w_modules = self.get('modules') diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py --- a/pypy/module/sys/vm.py +++ b/pypy/module/sys/vm.py @@ -229,6 +229,26 @@ ]) return space.call_function(w_windows_version_info, raw_version) + at jit.dont_look_inside +def get_dllhandle(space): + if not space.config.objspace.usemodules.cpyext: + return space.wrap(0) + if not space.config.objspace.usemodules._rawffi: + return space.wrap(0) + + return _get_dllhandle(space) + +def _get_dllhandle(space): + # Retrieve cpyext api handle + from pypy.module.cpyext.api import State + handle = space.fromcache(State).get_pythonapi_handle() + + # Make a dll object with it + from pypy.module._rawffi.interp_rawffi import W_CDLL + from rpython.rlib.clibffi import RawCDLL + cdll = RawCDLL(handle) + return space.wrap(W_CDLL(space, "python api", cdll)) + def getsizeof(space, w_object, w_default=None): """Not implemented on PyPy.""" if w_default is None: From noreply at buildbot.pypy.org Mon Jun 30 03:11:35 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 30 Jun 2014 03:11:35 +0200 (CEST) Subject: [pypy-commit] pypy disable_pythonapi: sys.dllhandle should be an int Message-ID: <20140630011135.419F51C024A@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: disable_pythonapi Changeset: r72275:4c96fe379073 Date: 2014-06-30 04:08 +0300 http://bitbucket.org/pypy/pypy/changeset/4c96fe379073/ Log: sys.dllhandle should be an int diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -205,8 +205,7 @@ import sys if sys.platform != "win32" or sys.version_info < (2, 6): skip("Windows Python >= 2.6 only") - assert sys.dllhandle - assert sys.dllhandle.getaddressindll('cpyexttestErr_NewException') + assert isinstance(sys.dllhandle, int) class AppTestCpythonExtensionBase(LeakCheckingTest): diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py --- a/pypy/module/sys/vm.py +++ b/pypy/module/sys/vm.py @@ -243,11 +243,14 @@ from pypy.module.cpyext.api import State handle = space.fromcache(State).get_pythonapi_handle() - # Make a dll object with it - from pypy.module._rawffi.interp_rawffi import W_CDLL - from rpython.rlib.clibffi import RawCDLL - cdll = RawCDLL(handle) - return space.wrap(W_CDLL(space, "python api", cdll)) + # It used to be a CDLL + # from pypy.module._rawffi.interp_rawffi import W_CDLL + # from rpython.rlib.clibffi import RawCDLL + # cdll = RawCDLL(handle) + # return space.wrap(W_CDLL(space, "python api", cdll)) + # Provide a cpython-compatible int + from rpython.rtyper.lltypesystem import rffi + return space.wrap(rffi.cast(rffi.INT, handle)) def getsizeof(space, w_object, w_default=None): """Not implemented on PyPy.""" From noreply at buildbot.pypy.org Mon Jun 30 03:11:38 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 30 Jun 2014 03:11:38 +0200 (CEST) Subject: [pypy-commit] pypy disable_pythonapi: merge default into branch Message-ID: <20140630011138.494A11C024A@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: disable_pythonapi Changeset: r72276:095a123d35c1 Date: 2014-06-30 04:08 +0300 http://bitbucket.org/pypy/pypy/changeset/095a123d35c1/ Log: merge default into branch diff too long, truncating to 2000 out of 3709 lines diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -443,6 +443,10 @@ for enumname, enumval in zip(tp.enumerators, tp.enumvalues): if enumname not in library.__dict__: library.__dict__[enumname] = enumval + for key, val in ffi._parser._int_constants.items(): + if key not in library.__dict__: + library.__dict__[key] = val + copied_enums.append(True) if name in library.__dict__: return diff --git a/lib_pypy/cffi/cparser.py b/lib_pypy/cffi/cparser.py --- a/lib_pypy/cffi/cparser.py +++ b/lib_pypy/cffi/cparser.py @@ -24,6 +24,7 @@ _r_partial_array = re.compile(r"\[\s*\.\.\.\s*\]") _r_words = re.compile(r"\w+|\S") _parser_cache = None +_r_int_literal = re.compile(r"^0?x?[0-9a-f]+u?l?$", re.IGNORECASE) def _get_parser(): global _parser_cache @@ -99,6 +100,7 @@ self._structnode2type = weakref.WeakKeyDictionary() self._override = False self._packed = False + self._int_constants = {} def _parse(self, csource): csource, macros = _preprocess(csource) @@ -128,9 +130,10 @@ finally: if lock is not None: lock.release() - return ast, macros + # csource will be used to find buggy source text + return ast, macros, csource - def convert_pycparser_error(self, e, csource): + def _convert_pycparser_error(self, e, csource): # xxx look for ":NUM:" at the start of str(e) and try to interpret # it as a line number line = None @@ -142,6 +145,12 @@ csourcelines = csource.splitlines() if 1 <= linenum <= len(csourcelines): line = csourcelines[linenum-1] + return line + + def convert_pycparser_error(self, e, csource): + line = self._convert_pycparser_error(e, csource) + + msg = str(e) if line: msg = 'cannot parse "%s"\n%s' % (line.strip(), msg) else: @@ -160,14 +169,9 @@ self._packed = prev_packed def _internal_parse(self, csource): - ast, macros = self._parse(csource) + ast, macros, csource = self._parse(csource) # add the macros - for key, value in macros.items(): - value = value.strip() - if value != '...': - raise api.CDefError('only supports the syntax "#define ' - '%s ..." for now (literally)' % key) - self._declare('macro ' + key, value) + self._process_macros(macros) # find the first "__dotdotdot__" and use that as a separator # between the repeated typedefs and the real csource iterator = iter(ast.ext) @@ -175,27 +179,61 @@ if decl.name == '__dotdotdot__': break # - for decl in iterator: - if isinstance(decl, pycparser.c_ast.Decl): - self._parse_decl(decl) - elif isinstance(decl, pycparser.c_ast.Typedef): - if not decl.name: - raise api.CDefError("typedef does not declare any name", - decl) - if (isinstance(decl.type.type, pycparser.c_ast.IdentifierType) - and decl.type.type.names == ['__dotdotdot__']): - realtype = model.unknown_type(decl.name) - elif (isinstance(decl.type, pycparser.c_ast.PtrDecl) and - isinstance(decl.type.type, pycparser.c_ast.TypeDecl) and - isinstance(decl.type.type.type, - pycparser.c_ast.IdentifierType) and - decl.type.type.type.names == ['__dotdotdot__']): - realtype = model.unknown_ptr_type(decl.name) + try: + for decl in iterator: + if isinstance(decl, pycparser.c_ast.Decl): + self._parse_decl(decl) + elif isinstance(decl, pycparser.c_ast.Typedef): + if not decl.name: + raise api.CDefError("typedef does not declare any name", + decl) + if (isinstance(decl.type.type, pycparser.c_ast.IdentifierType) + and decl.type.type.names == ['__dotdotdot__']): + realtype = model.unknown_type(decl.name) + elif (isinstance(decl.type, pycparser.c_ast.PtrDecl) and + isinstance(decl.type.type, pycparser.c_ast.TypeDecl) and + isinstance(decl.type.type.type, + pycparser.c_ast.IdentifierType) and + decl.type.type.type.names == ['__dotdotdot__']): + realtype = model.unknown_ptr_type(decl.name) + else: + realtype = self._get_type(decl.type, name=decl.name) + self._declare('typedef ' + decl.name, realtype) else: - realtype = self._get_type(decl.type, name=decl.name) - self._declare('typedef ' + decl.name, realtype) + raise api.CDefError("unrecognized construct", decl) + except api.FFIError as e: + msg = self._convert_pycparser_error(e, csource) + if msg: + e.args = (e.args[0] + "\n *** Err: %s" % msg,) + raise + + def _add_constants(self, key, val): + if key in self._int_constants: + raise api.FFIError( + "multiple declarations of constant: %s" % (key,)) + self._int_constants[key] = val + + def _process_macros(self, macros): + for key, value in macros.items(): + value = value.strip() + match = _r_int_literal.search(value) + if match is not None: + int_str = match.group(0).lower().rstrip("ul") + + # "010" is not valid oct in py3 + if (int_str.startswith("0") and + int_str != "0" and + not int_str.startswith("0x")): + int_str = "0o" + int_str[1:] + + pyvalue = int(int_str, 0) + self._add_constants(key, pyvalue) + elif value == '...': + self._declare('macro ' + key, value) else: - raise api.CDefError("unrecognized construct", decl) + raise api.CDefError('only supports the syntax "#define ' + '%s ..." (literally) or "#define ' + '%s 0x1FF" for now' % (key, key)) def _parse_decl(self, decl): node = decl.type @@ -227,7 +265,7 @@ self._declare('variable ' + decl.name, tp) def parse_type(self, cdecl): - ast, macros = self._parse('void __dummy(\n%s\n);' % cdecl) + ast, macros = self._parse('void __dummy(\n%s\n);' % cdecl)[:2] assert not macros exprnode = ast.ext[-1].type.args.params[0] if isinstance(exprnode, pycparser.c_ast.ID): @@ -306,7 +344,8 @@ if ident == 'void': return model.void_type if ident == '__dotdotdot__': - raise api.FFIError('bad usage of "..."') + raise api.FFIError(':%d: bad usage of "..."' % + typenode.coord.line) return resolve_common_type(ident) # if isinstance(type, pycparser.c_ast.Struct): @@ -333,7 +372,8 @@ return self._get_struct_union_enum_type('union', typenode, name, nested=True) # - raise api.FFIError("bad or unsupported type declaration") + raise api.FFIError(":%d: bad or unsupported type declaration" % + typenode.coord.line) def _parse_function_type(self, typenode, funcname=None): params = list(getattr(typenode.args, 'params', [])) @@ -499,6 +539,10 @@ if (isinstance(exprnode, pycparser.c_ast.UnaryOp) and exprnode.op == '-'): return -self._parse_constant(exprnode.expr) + # load previously defined int constant + if (isinstance(exprnode, pycparser.c_ast.ID) and + exprnode.name in self._int_constants): + return self._int_constants[exprnode.name] # if partial_length_ok: if (isinstance(exprnode, pycparser.c_ast.ID) and @@ -506,8 +550,8 @@ self._partial_length = True return '...' # - raise api.FFIError("unsupported expression: expected a " - "simple numeric constant") + raise api.FFIError(":%d: unsupported expression: expected a " + "simple numeric constant" % exprnode.coord.line) def _build_enum_type(self, explicit_name, decls): if decls is not None: @@ -522,6 +566,7 @@ if enum.value is not None: nextenumvalue = self._parse_constant(enum.value) enumvalues.append(nextenumvalue) + self._add_constants(enum.name, nextenumvalue) nextenumvalue += 1 enumvalues = tuple(enumvalues) tp = model.EnumType(explicit_name, enumerators, enumvalues) @@ -535,3 +580,5 @@ kind = name.split(' ', 1)[0] if kind in ('typedef', 'struct', 'union', 'enum'): self._declare(name, tp) + for k, v in other._int_constants.items(): + self._add_constants(k, v) diff --git a/lib_pypy/cffi/ffiplatform.py b/lib_pypy/cffi/ffiplatform.py --- a/lib_pypy/cffi/ffiplatform.py +++ b/lib_pypy/cffi/ffiplatform.py @@ -38,6 +38,7 @@ import distutils.errors # dist = Distribution({'ext_modules': [ext]}) + dist.parse_config_files() options = dist.get_option_dict('build_ext') options['force'] = ('ffiplatform', True) options['build_lib'] = ('ffiplatform', tmpdir) diff --git a/lib_pypy/cffi/vengine_cpy.py b/lib_pypy/cffi/vengine_cpy.py --- a/lib_pypy/cffi/vengine_cpy.py +++ b/lib_pypy/cffi/vengine_cpy.py @@ -89,43 +89,54 @@ # by generate_cpy_function_method(). prnt('static PyMethodDef _cffi_methods[] = {') self._generate("method") - prnt(' {"_cffi_setup", _cffi_setup, METH_VARARGS},') - prnt(' {NULL, NULL} /* Sentinel */') + prnt(' {"_cffi_setup", _cffi_setup, METH_VARARGS, NULL},') + prnt(' {NULL, NULL, 0, NULL} /* Sentinel */') prnt('};') prnt() # # standard init. modname = self.verifier.get_module_name() - if sys.version_info >= (3,): - prnt('static struct PyModuleDef _cffi_module_def = {') - prnt(' PyModuleDef_HEAD_INIT,') - prnt(' "%s",' % modname) - prnt(' NULL,') - prnt(' -1,') - prnt(' _cffi_methods,') - prnt(' NULL, NULL, NULL, NULL') - prnt('};') - prnt() - initname = 'PyInit_%s' % modname - createmod = 'PyModule_Create(&_cffi_module_def)' - errorcase = 'return NULL' - finalreturn = 'return lib' - else: - initname = 'init%s' % modname - createmod = 'Py_InitModule("%s", _cffi_methods)' % modname - errorcase = 'return' - finalreturn = 'return' + constants = self._chained_list_constants[False] + prnt('#if PY_MAJOR_VERSION >= 3') + prnt() + prnt('static struct PyModuleDef _cffi_module_def = {') + prnt(' PyModuleDef_HEAD_INIT,') + prnt(' "%s",' % modname) + prnt(' NULL,') + prnt(' -1,') + prnt(' _cffi_methods,') + prnt(' NULL, NULL, NULL, NULL') + prnt('};') + prnt() prnt('PyMODINIT_FUNC') - prnt('%s(void)' % initname) + prnt('PyInit_%s(void)' % modname) prnt('{') prnt(' PyObject *lib;') - prnt(' lib = %s;' % createmod) - prnt(' if (lib == NULL || %s < 0)' % ( - self._chained_list_constants[False],)) - prnt(' %s;' % errorcase) - prnt(' _cffi_init();') - prnt(' %s;' % finalreturn) + prnt(' lib = PyModule_Create(&_cffi_module_def);') + prnt(' if (lib == NULL)') + prnt(' return NULL;') + prnt(' if (%s < 0 || _cffi_init() < 0) {' % (constants,)) + prnt(' Py_DECREF(lib);') + prnt(' return NULL;') + prnt(' }') + prnt(' return lib;') prnt('}') + prnt() + prnt('#else') + prnt() + prnt('PyMODINIT_FUNC') + prnt('init%s(void)' % modname) + prnt('{') + prnt(' PyObject *lib;') + prnt(' lib = Py_InitModule("%s", _cffi_methods);' % modname) + prnt(' if (lib == NULL)') + prnt(' return;') + prnt(' if (%s < 0 || _cffi_init() < 0)' % (constants,)) + prnt(' return;') + prnt(' return;') + prnt('}') + prnt() + prnt('#endif') def load_library(self): # XXX review all usages of 'self' here! @@ -394,7 +405,7 @@ meth = 'METH_O' else: meth = 'METH_VARARGS' - self._prnt(' {"%s", _cffi_f_%s, %s},' % (name, name, meth)) + self._prnt(' {"%s", _cffi_f_%s, %s, NULL},' % (name, name, meth)) _loading_cpy_function = _loaded_noop @@ -481,8 +492,8 @@ if tp.fldnames is None: return # nothing to do with opaque structs layoutfuncname = '_cffi_layout_%s_%s' % (prefix, name) - self._prnt(' {"%s", %s, METH_NOARGS},' % (layoutfuncname, - layoutfuncname)) + self._prnt(' {"%s", %s, METH_NOARGS, NULL},' % (layoutfuncname, + layoutfuncname)) def _loading_struct_or_union(self, tp, prefix, name, module): if tp.fldnames is None: @@ -589,13 +600,7 @@ 'variable type'),)) assert delayed else: - prnt(' if (LONG_MIN <= (%s) && (%s) <= LONG_MAX)' % (name, name)) - prnt(' o = PyInt_FromLong((long)(%s));' % (name,)) - prnt(' else if ((%s) <= 0)' % (name,)) - prnt(' o = PyLong_FromLongLong((long long)(%s));' % (name,)) - prnt(' else') - prnt(' o = PyLong_FromUnsignedLongLong(' - '(unsigned long long)(%s));' % (name,)) + prnt(' o = _cffi_from_c_int_const(%s);' % name) prnt(' if (o == NULL)') prnt(' return -1;') if size_too: @@ -632,13 +637,18 @@ # ---------- # enums + def _enum_funcname(self, prefix, name): + # "$enum_$1" => "___D_enum____D_1" + name = name.replace('$', '___D_') + return '_cffi_e_%s_%s' % (prefix, name) + def _generate_cpy_enum_decl(self, tp, name, prefix='enum'): if tp.partial: for enumerator in tp.enumerators: self._generate_cpy_const(True, enumerator, delayed=False) return # - funcname = '_cffi_e_%s_%s' % (prefix, name) + funcname = self._enum_funcname(prefix, name) prnt = self._prnt prnt('static int %s(PyObject *lib)' % funcname) prnt('{') @@ -760,17 +770,30 @@ #include #include -#ifdef MS_WIN32 -#include /* for alloca() */ -typedef __int8 int8_t; -typedef __int16 int16_t; -typedef __int32 int32_t; -typedef __int64 int64_t; -typedef unsigned __int8 uint8_t; -typedef unsigned __int16 uint16_t; -typedef unsigned __int32 uint32_t; -typedef unsigned __int64 uint64_t; -typedef unsigned char _Bool; +/* this block of #ifs should be kept exactly identical between + c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */ +#if defined(_MSC_VER) +# include /* for alloca() */ +# if _MSC_VER < 1600 /* MSVC < 2010 */ + typedef __int8 int8_t; + typedef __int16 int16_t; + typedef __int32 int32_t; + typedef __int64 int64_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; +# else +# include +# endif +# if _MSC_VER < 1800 /* MSVC < 2013 */ + typedef unsigned char _Bool; +# endif +#else +# include +# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) +# include +# endif #endif #if PY_MAJOR_VERSION < 3 @@ -795,6 +818,15 @@ #define _cffi_to_c_double PyFloat_AsDouble #define _cffi_to_c_float PyFloat_AsDouble +#define _cffi_from_c_int_const(x) \ + (((x) > 0) ? \ + ((unsigned long long)(x) <= (unsigned long long)LONG_MAX) ? \ + PyInt_FromLong((long)(x)) : \ + PyLong_FromUnsignedLongLong((unsigned long long)(x)) : \ + ((long long)(x) >= (long long)LONG_MIN) ? \ + PyInt_FromLong((long)(x)) : \ + PyLong_FromLongLong((long long)(x))) + #define _cffi_from_c_int(x, type) \ (((type)-1) > 0 ? /* unsigned */ \ (sizeof(type) < sizeof(long) ? PyInt_FromLong(x) : \ @@ -804,14 +836,14 @@ PyLong_FromLongLong(x))) #define _cffi_to_c_int(o, type) \ - (sizeof(type) == 1 ? (((type)-1) > 0 ? _cffi_to_c_u8(o) \ - : _cffi_to_c_i8(o)) : \ - sizeof(type) == 2 ? (((type)-1) > 0 ? _cffi_to_c_u16(o) \ - : _cffi_to_c_i16(o)) : \ - sizeof(type) == 4 ? (((type)-1) > 0 ? _cffi_to_c_u32(o) \ - : _cffi_to_c_i32(o)) : \ - sizeof(type) == 8 ? (((type)-1) > 0 ? _cffi_to_c_u64(o) \ - : _cffi_to_c_i64(o)) : \ + (sizeof(type) == 1 ? (((type)-1) > 0 ? (type)_cffi_to_c_u8(o) \ + : (type)_cffi_to_c_i8(o)) : \ + sizeof(type) == 2 ? (((type)-1) > 0 ? (type)_cffi_to_c_u16(o) \ + : (type)_cffi_to_c_i16(o)) : \ + sizeof(type) == 4 ? (((type)-1) > 0 ? (type)_cffi_to_c_u32(o) \ + : (type)_cffi_to_c_i32(o)) : \ + sizeof(type) == 8 ? (((type)-1) > 0 ? (type)_cffi_to_c_u64(o) \ + : (type)_cffi_to_c_i64(o)) : \ (Py_FatalError("unsupported size for type " #type), 0)) #define _cffi_to_c_i8 \ @@ -885,25 +917,32 @@ return PyBool_FromLong(was_alive); } -static void _cffi_init(void) +static int _cffi_init(void) { - PyObject *module = PyImport_ImportModule("_cffi_backend"); - PyObject *c_api_object; + PyObject *module, *c_api_object = NULL; + module = PyImport_ImportModule("_cffi_backend"); if (module == NULL) - return; + goto failure; c_api_object = PyObject_GetAttrString(module, "_C_API"); if (c_api_object == NULL) - return; + goto failure; if (!PyCapsule_CheckExact(c_api_object)) { - Py_DECREF(c_api_object); PyErr_SetNone(PyExc_ImportError); - return; + goto failure; } memcpy(_cffi_exports, PyCapsule_GetPointer(c_api_object, "cffi"), _CFFI_NUM_EXPORTS * sizeof(void *)); + + Py_DECREF(module); Py_DECREF(c_api_object); + return 0; + + failure: + Py_XDECREF(module); + Py_XDECREF(c_api_object); + return -1; } #define _cffi_type(num) ((CTypeDescrObject *)PyList_GET_ITEM(_cffi_types, num)) diff --git a/lib_pypy/cffi/vengine_gen.py b/lib_pypy/cffi/vengine_gen.py --- a/lib_pypy/cffi/vengine_gen.py +++ b/lib_pypy/cffi/vengine_gen.py @@ -249,10 +249,10 @@ prnt(' /* %s */' % str(e)) # cannot verify it, ignore prnt('}') self.export_symbols.append(layoutfuncname) - prnt('ssize_t %s(ssize_t i)' % (layoutfuncname,)) + prnt('intptr_t %s(intptr_t i)' % (layoutfuncname,)) prnt('{') prnt(' struct _cffi_aligncheck { char x; %s y; };' % cname) - prnt(' static ssize_t nums[] = {') + prnt(' static intptr_t nums[] = {') prnt(' sizeof(%s),' % cname) prnt(' offsetof(struct _cffi_aligncheck, y),') for fname, ftype, fbitsize in tp.enumfields(): @@ -276,7 +276,7 @@ return # nothing to do with opaque structs layoutfuncname = '_cffi_layout_%s_%s' % (prefix, name) # - BFunc = self.ffi._typeof_locked("ssize_t(*)(ssize_t)")[0] + BFunc = self.ffi._typeof_locked("intptr_t(*)(intptr_t)")[0] function = module.load_function(BFunc, layoutfuncname) layout = [] num = 0 @@ -410,13 +410,18 @@ # ---------- # enums + def _enum_funcname(self, prefix, name): + # "$enum_$1" => "___D_enum____D_1" + name = name.replace('$', '___D_') + return '_cffi_e_%s_%s' % (prefix, name) + def _generate_gen_enum_decl(self, tp, name, prefix='enum'): if tp.partial: for enumerator in tp.enumerators: self._generate_gen_const(True, enumerator) return # - funcname = '_cffi_e_%s_%s' % (prefix, name) + funcname = self._enum_funcname(prefix, name) self.export_symbols.append(funcname) prnt = self._prnt prnt('int %s(char *out_error)' % funcname) @@ -453,7 +458,7 @@ else: BType = self.ffi._typeof_locked("char[]")[0] BFunc = self.ffi._typeof_locked("int(*)(char*)")[0] - funcname = '_cffi_e_%s_%s' % (prefix, name) + funcname = self._enum_funcname(prefix, name) function = module.load_function(BFunc, funcname) p = self.ffi.new(BType, 256) if function(p) < 0: @@ -547,20 +552,29 @@ #include #include /* XXX for ssize_t on some platforms */ -#ifdef _WIN32 -# include -# define snprintf _snprintf -typedef __int8 int8_t; -typedef __int16 int16_t; -typedef __int32 int32_t; -typedef __int64 int64_t; -typedef unsigned __int8 uint8_t; -typedef unsigned __int16 uint16_t; -typedef unsigned __int32 uint32_t; -typedef unsigned __int64 uint64_t; -typedef SSIZE_T ssize_t; -typedef unsigned char _Bool; +/* this block of #ifs should be kept exactly identical between + c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */ +#if defined(_MSC_VER) +# include /* for alloca() */ +# if _MSC_VER < 1600 /* MSVC < 2010 */ + typedef __int8 int8_t; + typedef __int16 int16_t; + typedef __int32 int32_t; + typedef __int64 int64_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; +# else +# include +# endif +# if _MSC_VER < 1800 /* MSVC < 2013 */ + typedef unsigned char _Bool; +# endif #else -# include +# include +# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) +# include +# endif #endif ''' diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst --- a/pypy/doc/faq.rst +++ b/pypy/doc/faq.rst @@ -465,9 +465,13 @@ This is documented (here__ and here__). It needs 4 GB of RAM to run "rpython targetpypystandalone" on top of PyPy, a bit more when running -on CPython. If you have less than 4 GB it will just swap forever (or -fail if you don't have enough swap). On 32-bit, divide the numbers by -two. +on top of CPython. If you have less than 4 GB free, it will just swap +forever (or fail if you don't have enough swap). And we mean *free:* +if the machine has 4 GB *in total,* then it will swap. + +On 32-bit, divide the numbers by two. (We didn't try recently, but in +the past it was possible to compile a 32-bit version on a 2 GB Linux +machine with nothing else running: no Gnome/KDE, for example.) .. __: http://pypy.org/download.html#building-from-source .. __: https://pypy.readthedocs.org/en/latest/getting-started-python.html#translating-the-pypy-python-interpreter diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -30,3 +30,16 @@ x86-64, the JIT backend has a special optimization that lets it emit directly a single MOV from a %gs- or %fs-based address. It seems actually to give a good boost in performance. + +.. branch: fast-gil +A faster way to handle the GIL, particularly in JIT code. The GIL is +now a composite of two concepts: a global number (it's just set from +1 to 0 and back around CALL_RELEASE_GIL), and a real mutex. If there +are threads waiting to acquire the GIL, one of them is actively +checking the global number every 0.1 ms to 1 ms. Overall, JIT loops +full of external function calls now run a bit faster (if no thread was +started yet), or a *lot* faster (if threads were started already). + +.. branch: jit-get-errno +Optimize the errno handling in the JIT, notably around external +function calls. Linux-only. diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py --- a/pypy/interpreter/executioncontext.py +++ b/pypy/interpreter/executioncontext.py @@ -496,6 +496,13 @@ """ +class UserDelCallback(object): + def __init__(self, w_obj, callback, descrname): + self.w_obj = w_obj + self.callback = callback + self.descrname = descrname + self.next = None + class UserDelAction(AsyncAction): """An action that invokes all pending app-level __del__() method. This is done as an action instead of immediately when the @@ -506,12 +513,18 @@ def __init__(self, space): AsyncAction.__init__(self, space) - self.dying_objects = [] + self.dying_objects = None + self.dying_objects_last = None self.finalizers_lock_count = 0 self.enabled_at_app_level = True def register_callback(self, w_obj, callback, descrname): - self.dying_objects.append((w_obj, callback, descrname)) + cb = UserDelCallback(w_obj, callback, descrname) + if self.dying_objects_last is None: + self.dying_objects = cb + else: + self.dying_objects_last.next = cb + self.dying_objects_last = cb self.fire() def perform(self, executioncontext, frame): @@ -525,13 +538,33 @@ # avoid too deep recursions of the kind of __del__ being called # while in the middle of another __del__ call. pending = self.dying_objects - self.dying_objects = [] + self.dying_objects = None + self.dying_objects_last = None space = self.space - for i in range(len(pending)): - w_obj, callback, descrname = pending[i] - pending[i] = (None, None, None) + while pending is not None: try: - callback(w_obj) + pending.callback(pending.w_obj) except OperationError, e: - e.write_unraisable(space, descrname, w_obj) + e.write_unraisable(space, pending.descrname, pending.w_obj) e.clear(space) # break up reference cycles + pending = pending.next + # + # Note: 'dying_objects' used to be just a regular list instead + # of a chained list. This was the cause of "leaks" if we have a + # program that constantly creates new objects with finalizers. + # Here is why: say 'dying_objects' is a long list, and there + # are n instances in it. Then we spend some time in this + # function, possibly triggering more GCs, but keeping the list + # of length n alive. Then the list is suddenly freed at the + # end, and we return to the user program. At this point the + # GC limit is still very high, because just before, there was + # a list of length n alive. Assume that the program continues + # to allocate a lot of instances with finalizers. The high GC + # limit means that it could allocate a lot of instances before + # reaching it --- possibly more than n. So the whole procedure + # repeats with higher and higher values of n. + # + # This does not occur in the current implementation because + # there is no list of length n: if n is large, then the GC + # will run several times while walking the list, but it will + # see lower and lower memory usage, with no lower bound of n. diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py --- a/pypy/interpreter/gateway.py +++ b/pypy/interpreter/gateway.py @@ -895,7 +895,7 @@ "use unwrap_spec(...=WrappedDefault(default))" % ( self._code.identifier, name, defaultval)) defs_w.append(None) - else: + elif name != '__args__' and name != 'args_w': defs_w.append(space.wrap(defaultval)) if self._code._unwrap_spec: UNDEFINED = object() diff --git a/pypy/interpreter/miscutils.py b/pypy/interpreter/miscutils.py --- a/pypy/interpreter/miscutils.py +++ b/pypy/interpreter/miscutils.py @@ -17,6 +17,9 @@ def enter_thread(self, space): self._value = space.createexecutioncontext() + def try_enter_thread(self, space): + return False + def signals_enabled(self): return True diff --git a/pypy/interpreter/test/test_gateway.py b/pypy/interpreter/test/test_gateway.py --- a/pypy/interpreter/test/test_gateway.py +++ b/pypy/interpreter/test/test_gateway.py @@ -726,6 +726,22 @@ never_called py.test.raises(AssertionError, space.wrap, gateway.interp2app_temp(g)) + def test_unwrap_spec_default_applevel_bug2(self): + space = self.space + def g(space, w_x, w_y=None, __args__=None): + return w_x + w_g = space.wrap(gateway.interp2app_temp(g)) + w_42 = space.call_function(w_g, space.wrap(42)) + assert space.int_w(w_42) == 42 + py.test.raises(gateway.OperationError, space.call_function, w_g) + # + def g(space, w_x, w_y=None, args_w=None): + return w_x + w_g = space.wrap(gateway.interp2app_temp(g)) + w_42 = space.call_function(w_g, space.wrap(42)) + assert space.int_w(w_42) == 42 + py.test.raises(gateway.OperationError, space.call_function, w_g) + def test_interp2app_doc(self): space = self.space def f(space, w_x): diff --git a/pypy/module/_cffi_backend/ccallback.py b/pypy/module/_cffi_backend/ccallback.py --- a/pypy/module/_cffi_backend/ccallback.py +++ b/pypy/module/_cffi_backend/ccallback.py @@ -183,9 +183,12 @@ misc._raw_memclear(ll_res, SIZE_OF_FFI_ARG) return # + must_leave = False ec = None + space = callback.space try: - ec = cerrno.get_errno_container(callback.space) + must_leave = space.threadlocals.try_enter_thread(space) + ec = cerrno.get_errno_container(space) cerrno.save_errno_into(ec, e) extra_line = '' try: @@ -206,5 +209,7 @@ except OSError: pass callback.write_error_return_value(ll_res) + if must_leave: + space.threadlocals.leave_thread(space) if ec is not None: cerrno.restore_errno_from(ec) diff --git a/pypy/module/_cffi_backend/ctypefunc.py b/pypy/module/_cffi_backend/ctypefunc.py --- a/pypy/module/_cffi_backend/ctypefunc.py +++ b/pypy/module/_cffi_backend/ctypefunc.py @@ -4,7 +4,7 @@ import sys -from rpython.rlib import jit, clibffi, jit_libffi +from rpython.rlib import jit, clibffi, jit_libffi, rgc from rpython.rlib.jit_libffi import (CIF_DESCRIPTION, CIF_DESCRIPTION_P, FFI_TYPE, FFI_TYPE_P, FFI_TYPE_PP, SIZE_OF_FFI_ARG) from rpython.rlib.objectmodel import we_are_translated, instantiate @@ -63,6 +63,7 @@ CifDescrBuilder(fvarargs, self.ctitem).rawallocate(ctypefunc) return ctypefunc + @rgc.must_be_light_finalizer def __del__(self): if self.cif_descr: lltype.free(self.cif_descr, flavor='raw') @@ -156,8 +157,8 @@ data = rffi.ptradd(buffer, cif_descr.exchange_args[i]) flag = get_mustfree_flag(data) if flag == 1: - raw_string = rffi.cast(rffi.CCHARPP, data)[0] - lltype.free(raw_string, flavor='raw') + raw_cdata = rffi.cast(rffi.CCHARPP, data)[0] + lltype.free(raw_cdata, flavor='raw') lltype.free(buffer, flavor='raw') return w_res diff --git a/pypy/module/_socket/__init__.py b/pypy/module/_socket/__init__.py --- a/pypy/module/_socket/__init__.py +++ b/pypy/module/_socket/__init__.py @@ -6,8 +6,8 @@ } interpleveldefs = { - 'SocketType': 'interp_socket.W_RSocket', - 'socket' : 'interp_socket.W_RSocket', + 'SocketType': 'interp_socket.W_Socket', + 'socket' : 'interp_socket.W_Socket', 'error' : 'interp_socket.get_error(space, "error")', 'herror' : 'interp_socket.get_error(space, "herror")', 'gaierror' : 'interp_socket.get_error(space, "gaierror")', diff --git a/pypy/module/_socket/interp_func.py b/pypy/module/_socket/interp_func.py --- a/pypy/module/_socket/interp_func.py +++ b/pypy/module/_socket/interp_func.py @@ -1,8 +1,12 @@ -from pypy.interpreter.gateway import unwrap_spec, WrappedDefault -from pypy.module._socket.interp_socket import converted_error, W_RSocket, addr_as_object, ipaddr_from_object from rpython.rlib import rsocket from rpython.rlib.rsocket import SocketError, INVALID_SOCKET + from pypy.interpreter.error import OperationError +from pypy.interpreter.gateway import unwrap_spec, WrappedDefault +from pypy.module._socket.interp_socket import ( + converted_error, W_Socket, addr_as_object, ipaddr_from_object +) + def gethostname(space): """gethostname() -> string @@ -136,10 +140,10 @@ The remaining arguments are the same as for socket(). """ try: - sock = rsocket.fromfd(fd, family, type, proto, W_RSocket) + sock = rsocket.fromfd(fd, family, type, proto) except SocketError, e: raise converted_error(space, e) - return space.wrap(sock) + return space.wrap(W_Socket(sock)) @unwrap_spec(family=int, type=int, proto=int) def socketpair(space, family=rsocket.socketpair_default_family, @@ -153,10 +157,13 @@ AF_UNIX if defined on the platform; otherwise, the default is AF_INET. """ try: - sock1, sock2 = rsocket.socketpair(family, type, proto, W_RSocket) + sock1, sock2 = rsocket.socketpair(family, type, proto) except SocketError, e: raise converted_error(space, e) - return space.newtuple([space.wrap(sock1), space.wrap(sock2)]) + return space.newtuple([ + space.wrap(W_Socket(sock1)), + space.wrap(W_Socket(sock2)) + ]) # The following 4 functions refuse all negative numbers, like CPython 2.6. # They could also check that the argument is not too large, but CPython 2.6 diff --git a/pypy/module/_socket/interp_socket.py b/pypy/module/_socket/interp_socket.py --- a/pypy/module/_socket/interp_socket.py +++ b/pypy/module/_socket/interp_socket.py @@ -1,14 +1,18 @@ +from rpython.rlib import rsocket +from rpython.rlib.rarithmetic import intmask +from rpython.rlib.rsocket import ( + RSocket, AF_INET, SOCK_STREAM, SocketError, SocketErrorWithErrno, + RSocketError +) +from rpython.rtyper.lltypesystem import lltype, rffi + +from pypy.interpreter import gateway from pypy.interpreter.baseobjspace import W_Root -from pypy.interpreter.typedef import TypeDef, make_weakref_descr,\ - interp_attrproperty +from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault -from rpython.rlib.rarithmetic import intmask -from rpython.rtyper.lltypesystem import lltype, rffi -from rpython.rlib import rsocket -from rpython.rlib.rsocket import RSocket, AF_INET, SOCK_STREAM -from rpython.rlib.rsocket import SocketError, SocketErrorWithErrno, RSocketError -from pypy.interpreter.error import OperationError, oefmt -from pypy.interpreter import gateway +from pypy.interpreter.typedef import ( + GetSetProperty, TypeDef, make_weakref_descr +) # XXX Hack to seperate rpython and pypy @@ -124,10 +128,18 @@ return addr -class W_RSocket(W_Root, RSocket): - def __del__(self): - self.clear_all_weakrefs() - RSocket.__del__(self) +class W_Socket(W_Root): + def __init__(self, sock): + self.sock = sock + + def get_type_w(self, space): + return space.wrap(self.sock.type) + + def get_proto_w(self, space): + return space.wrap(self.sock.proto) + + def get_family_w(self, space): + return space.wrap(self.sock.family) def accept_w(self, space): """accept() -> (socket object, address info) @@ -137,22 +149,22 @@ info is a pair (hostaddr, port). """ try: - fd, addr = self.accept() + fd, addr = self.sock.accept() sock = rsocket.make_socket( - fd, self.family, self.type, self.proto, W_RSocket) - return space.newtuple([space.wrap(sock), + fd, self.sock.family, self.sock.type, self.sock.proto) + return space.newtuple([space.wrap(W_Socket(sock)), addr_as_object(addr, sock.fd, space)]) - except SocketError, e: + except SocketError as e: raise converted_error(space, e) # convert an Address into an app-level object def addr_as_object(self, space, address): - return addr_as_object(address, self.fd, space) + return addr_as_object(address, self.sock.fd, space) # convert an app-level object into an Address # based on the current socket's family def addr_from_object(self, space, w_address): - return addr_from_object(self.family, space, w_address) + return addr_from_object(self.sock.family, space, w_address) def bind_w(self, space, w_addr): """bind(address) @@ -162,8 +174,8 @@ sockets the address is a tuple (ifname, proto [,pkttype [,hatype]]) """ try: - self.bind(self.addr_from_object(space, w_addr)) - except SocketError, e: + self.sock.bind(self.addr_from_object(space, w_addr)) + except SocketError as e: raise converted_error(space, e) def close_w(self, space): @@ -172,7 +184,7 @@ Close the socket. It cannot be used after this call. """ try: - self.close() + self.sock.close() except SocketError: # cpython doesn't return any errors on close pass @@ -184,8 +196,8 @@ is a pair (host, port). """ try: - self.connect(self.addr_from_object(space, w_addr)) - except SocketError, e: + self.sock.connect(self.addr_from_object(space, w_addr)) + except SocketError as e: raise converted_error(space, e) def connect_ex_w(self, space, w_addr): @@ -196,15 +208,16 @@ """ try: addr = self.addr_from_object(space, w_addr) - except SocketError, e: + except SocketError as e: raise converted_error(space, e) - error = self.connect_ex(addr) + error = self.sock.connect_ex(addr) return space.wrap(error) def dup_w(self, space): try: - return self.dup(W_RSocket) - except SocketError, e: + sock = self.sock.dup() + return W_Socket(sock) + except SocketError as e: raise converted_error(space, e) def fileno_w(self, space): @@ -212,7 +225,7 @@ Return the integer file descriptor of the socket. """ - return space.wrap(intmask(self.fd)) + return space.wrap(intmask(self.sock.fd)) def getpeername_w(self, space): """getpeername() -> address info @@ -221,9 +234,9 @@ info is a pair (hostaddr, port). """ try: - addr = self.getpeername() - return addr_as_object(addr, self.fd, space) - except SocketError, e: + addr = self.sock.getpeername() + return addr_as_object(addr, self.sock.fd, space) + except SocketError as e: raise converted_error(space, e) def getsockname_w(self, space): @@ -233,9 +246,9 @@ info is a pair (hostaddr, port). """ try: - addr = self.getsockname() - return addr_as_object(addr, self.fd, space) - except SocketError, e: + addr = self.sock.getsockname() + return addr_as_object(addr, self.sock.fd, space) + except SocketError as e: raise converted_error(space, e) @unwrap_spec(level=int, optname=int) @@ -248,11 +261,11 @@ """ if w_buflen is None: try: - return space.wrap(self.getsockopt_int(level, optname)) - except SocketError, e: + return space.wrap(self.sock.getsockopt_int(level, optname)) + except SocketError as e: raise converted_error(space, e) buflen = space.int_w(w_buflen) - return space.wrap(self.getsockopt(level, optname, buflen)) + return space.wrap(self.sock.getsockopt(level, optname, buflen)) def gettimeout_w(self, space): """gettimeout() -> timeout @@ -260,7 +273,7 @@ Returns the timeout in floating seconds associated with socket operations. A timeout of None indicates that timeouts on socket """ - timeout = self.gettimeout() + timeout = self.sock.gettimeout() if timeout < 0.0: return space.w_None return space.wrap(timeout) @@ -274,8 +287,8 @@ will allow before refusing new connections. """ try: - self.listen(backlog) - except SocketError, e: + self.sock.listen(backlog) + except SocketError as e: raise converted_error(space, e) @unwrap_spec(w_mode = WrappedDefault("r"), @@ -298,8 +311,8 @@ the remote end is closed and all data is read, return the empty string. """ try: - data = self.recv(buffersize, flags) - except SocketError, e: + data = self.sock.recv(buffersize, flags) + except SocketError as e: raise converted_error(space, e) return space.wrap(data) @@ -310,13 +323,13 @@ Like recv(buffersize, flags) but also return the sender's address info. """ try: - data, addr = self.recvfrom(buffersize, flags) + data, addr = self.sock.recvfrom(buffersize, flags) if addr: - w_addr = addr_as_object(addr, self.fd, space) + w_addr = addr_as_object(addr, self.sock.fd, space) else: w_addr = space.w_None return space.newtuple([space.wrap(data), w_addr]) - except SocketError, e: + except SocketError as e: raise converted_error(space, e) @unwrap_spec(data='bufferstr', flags=int) @@ -328,8 +341,8 @@ sent; this may be less than len(data) if the network is busy. """ try: - count = self.send(data, flags) - except SocketError, e: + count = self.sock.send(data, flags) + except SocketError as e: raise converted_error(space, e) return space.wrap(count) @@ -343,8 +356,9 @@ to tell how much data has been sent. """ try: - self.sendall(data, flags, space.getexecutioncontext().checksignals) - except SocketError, e: + self.sock.sendall( + data, flags, space.getexecutioncontext().checksignals) + except SocketError as e: raise converted_error(space, e) @unwrap_spec(data='bufferstr') @@ -364,8 +378,8 @@ w_addr = w_param3 try: addr = self.addr_from_object(space, w_addr) - count = self.sendto(data, flags, addr) - except SocketError, e: + count = self.sock.sendto(data, flags, addr) + except SocketError as e: raise converted_error(space, e) return space.wrap(count) @@ -377,7 +391,7 @@ setblocking(True) is equivalent to settimeout(None); setblocking(False) is equivalent to settimeout(0.0). """ - self.setblocking(flag) + self.sock.setblocking(flag) @unwrap_spec(level=int, optname=int) def setsockopt_w(self, space, level, optname, w_optval): @@ -391,13 +405,13 @@ except: optval = space.str_w(w_optval) try: - self.setsockopt(level, optname, optval) - except SocketError, e: + self.sock.setsockopt(level, optname, optval) + except SocketError as e: raise converted_error(space, e) return try: - self.setsockopt_int(level, optname, optval) - except SocketError, e: + self.sock.setsockopt_int(level, optname, optval) + except SocketError as e: raise converted_error(space, e) def settimeout_w(self, space, w_timeout): @@ -415,7 +429,7 @@ if timeout < 0.0: raise OperationError(space.w_ValueError, space.wrap('Timeout value out of range')) - self.settimeout(timeout) + self.sock.settimeout(timeout) @unwrap_spec(nbytes=int, flags=int) def recv_into_w(self, space, w_buffer, nbytes=0, flags=0): @@ -424,8 +438,8 @@ if nbytes == 0 or nbytes > lgt: nbytes = lgt try: - return space.wrap(self.recvinto(rwbuffer, nbytes, flags)) - except SocketError, e: + return space.wrap(self.sock.recvinto(rwbuffer, nbytes, flags)) + except SocketError as e: raise converted_error(space, e) @unwrap_spec(nbytes=int, flags=int) @@ -435,13 +449,13 @@ if nbytes == 0 or nbytes > lgt: nbytes = lgt try: - readlgt, addr = self.recvfrom_into(rwbuffer, nbytes, flags) + readlgt, addr = self.sock.recvfrom_into(rwbuffer, nbytes, flags) if addr: - w_addr = addr_as_object(addr, self.fd, space) + w_addr = addr_as_object(addr, self.sock.fd, space) else: w_addr = space.w_None return space.newtuple([space.wrap(readlgt), w_addr]) - except SocketError, e: + except SocketError as e: raise converted_error(space, e) @unwrap_spec(cmd=int) @@ -473,7 +487,7 @@ option_ptr.c_keepaliveinterval = space.uint_w(w_interval) res = _c.WSAIoctl( - self.fd, cmd, value_ptr, value_size, + self.sock.fd, cmd, value_ptr, value_size, rffi.NULL, 0, recv_ptr, rffi.NULL, rffi.NULL) if res < 0: raise converted_error(space, rsocket.last_error()) @@ -494,8 +508,8 @@ (flag == SHUT_RDWR). """ try: - self.shutdown(how) - except SocketError, e: + self.sock.shutdown(how) + except SocketError as e: raise converted_error(space, e) #------------------------------------------------------------ @@ -536,12 +550,13 @@ @unwrap_spec(family=int, type=int, proto=int) def newsocket(space, w_subtype, family=AF_INET, type=SOCK_STREAM, proto=0): - sock = space.allocate_instance(W_RSocket, w_subtype) + self = space.allocate_instance(W_Socket, w_subtype) try: - W_RSocket.__init__(sock, family, type, proto) - except SocketError, e: + sock = RSocket(family, type, proto) + except SocketError as e: raise converted_error(space, e) - return space.wrap(sock) + W_Socket.__init__(self, sock) + return space.wrap(self) descr_socket_new = interp2app(newsocket) # ____________________________________________________________ @@ -597,10 +612,10 @@ socketmethods = {} for methodname in socketmethodnames: - method = getattr(W_RSocket, methodname + '_w') + method = getattr(W_Socket, methodname + '_w') socketmethods[methodname] = interp2app(method) -W_RSocket.typedef = TypeDef("_socket.socket", +W_Socket.typedef = TypeDef("_socket.socket", __doc__ = """\ socket([family[, type[, proto]]]) -> socket object @@ -639,9 +654,9 @@ [*] not available on all platforms!""", __new__ = descr_socket_new, - __weakref__ = make_weakref_descr(W_RSocket), - type = interp_attrproperty('type', W_RSocket), - proto = interp_attrproperty('proto', W_RSocket), - family = interp_attrproperty('family', W_RSocket), + __weakref__ = make_weakref_descr(W_Socket), + type = GetSetProperty(W_Socket.get_type_w), + proto = GetSetProperty(W_Socket.get_proto_w), + family = GetSetProperty(W_Socket.get_family_w), ** socketmethods ) diff --git a/pypy/module/_weakref/test/test_weakref.py b/pypy/module/_weakref/test/test_weakref.py --- a/pypy/module/_weakref/test/test_weakref.py +++ b/pypy/module/_weakref/test/test_weakref.py @@ -15,6 +15,10 @@ gc.collect() assert ref() is None + def test_missing_arg(self): + import _weakref + raises(TypeError, _weakref.ref) + def test_callback(self): import _weakref, gc class A(object): diff --git a/pypy/module/test_lib_pypy/cffi_tests/backend_tests.py b/pypy/module/test_lib_pypy/cffi_tests/backend_tests.py --- a/pypy/module/test_lib_pypy/cffi_tests/backend_tests.py +++ b/pypy/module/test_lib_pypy/cffi_tests/backend_tests.py @@ -866,25 +866,25 @@ def test_enum(self): ffi = FFI(backend=self.Backend()) - ffi.cdef("enum foo { A, B, CC, D };") - assert ffi.string(ffi.cast("enum foo", 0)) == "A" - assert ffi.string(ffi.cast("enum foo", 2)) == "CC" - assert ffi.string(ffi.cast("enum foo", 3)) == "D" + ffi.cdef("enum foo { A0, B0, CC0, D0 };") + assert ffi.string(ffi.cast("enum foo", 0)) == "A0" + assert ffi.string(ffi.cast("enum foo", 2)) == "CC0" + assert ffi.string(ffi.cast("enum foo", 3)) == "D0" assert ffi.string(ffi.cast("enum foo", 4)) == "4" - ffi.cdef("enum bar { A, B=-2, CC, D, E };") - assert ffi.string(ffi.cast("enum bar", 0)) == "A" - assert ffi.string(ffi.cast("enum bar", -2)) == "B" - assert ffi.string(ffi.cast("enum bar", -1)) == "CC" - assert ffi.string(ffi.cast("enum bar", 1)) == "E" + ffi.cdef("enum bar { A1, B1=-2, CC1, D1, E1 };") + assert ffi.string(ffi.cast("enum bar", 0)) == "A1" + assert ffi.string(ffi.cast("enum bar", -2)) == "B1" + assert ffi.string(ffi.cast("enum bar", -1)) == "CC1" + assert ffi.string(ffi.cast("enum bar", 1)) == "E1" assert ffi.cast("enum bar", -2) != ffi.cast("enum bar", -2) assert ffi.cast("enum foo", 0) != ffi.cast("enum bar", 0) assert ffi.cast("enum bar", 0) != ffi.cast("int", 0) - assert repr(ffi.cast("enum bar", -1)) == "" + assert repr(ffi.cast("enum bar", -1)) == "" assert repr(ffi.cast("enum foo", -1)) == ( # enums are unsigned, if "") # they contain no neg value - ffi.cdef("enum baz { A=0x1000, B=0x2000 };") - assert ffi.string(ffi.cast("enum baz", 0x1000)) == "A" - assert ffi.string(ffi.cast("enum baz", 0x2000)) == "B" + ffi.cdef("enum baz { A2=0x1000, B2=0x2000 };") + assert ffi.string(ffi.cast("enum baz", 0x1000)) == "A2" + assert ffi.string(ffi.cast("enum baz", 0x2000)) == "B2" def test_enum_in_struct(self): ffi = FFI(backend=self.Backend()) @@ -1323,6 +1323,16 @@ e = ffi.cast("enum e", 0) assert ffi.string(e) == "AA" # pick the first one arbitrarily + def test_enum_refer_previous_enum_value(self): + ffi = FFI(backend=self.Backend()) + ffi.cdef("enum e { AA, BB=2, CC=4, DD=BB, EE, FF=CC, GG=FF };") + assert ffi.string(ffi.cast("enum e", 2)) == "BB" + assert ffi.string(ffi.cast("enum e", 3)) == "EE" + assert ffi.sizeof("char[DD]") == 2 + assert ffi.sizeof("char[EE]") == 3 + assert ffi.sizeof("char[FF]") == 4 + assert ffi.sizeof("char[GG]") == 4 + def test_nested_anonymous_struct(self): ffi = FFI(backend=self.Backend()) ffi.cdef(""" @@ -1544,6 +1554,7 @@ ffi2.include(ffi1) p = ffi2.cast("enum foo", 1) assert ffi2.string(p) == "FB" + assert ffi2.sizeof("char[FC]") == 2 def test_include_typedef_2(self): backend = self.Backend() @@ -1564,10 +1575,32 @@ assert ffi.alignof("struct is_packed") == 1 s = ffi.new("struct is_packed[2]") s[0].b = 42623381 - s[0].a = 'X' + s[0].a = b'X' s[1].b = -4892220 - s[1].a = 'Y' + s[1].a = b'Y' assert s[0].b == 42623381 - assert s[0].a == 'X' + assert s[0].a == b'X' assert s[1].b == -4892220 - assert s[1].a == 'Y' + assert s[1].a == b'Y' + + def test_define_integer_constant(self): + ffi = FFI(backend=self.Backend()) + ffi.cdef(""" + #define DOT_0 0 + #define DOT 100 + #define DOT_OCT 0100l + #define DOT_HEX 0x100u + #define DOT_HEX2 0X10 + #define DOT_UL 1000UL + enum foo {AA, BB=DOT, CC}; + """) + lib = ffi.dlopen(None) + assert ffi.string(ffi.cast("enum foo", 100)) == "BB" + assert lib.DOT_0 == 0 + assert lib.DOT == 100 + assert lib.DOT_OCT == 0o100 + assert lib.DOT_HEX == 0x100 + assert lib.DOT_HEX2 == 0x10 + assert lib.DOT_UL == 1000 + + diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_function.py b/pypy/module/test_lib_pypy/cffi_tests/test_function.py --- a/pypy/module/test_lib_pypy/cffi_tests/test_function.py +++ b/pypy/module/test_lib_pypy/cffi_tests/test_function.py @@ -36,13 +36,11 @@ return self._value lib_m = 'm' -has_sinf = True if sys.platform == 'win32': #there is a small chance this fails on Mingw via environ $CC import distutils.ccompiler if distutils.ccompiler.get_default_compiler() == 'msvc': lib_m = 'msvcrt' - has_sinf = False class TestFunction(object): Backend = CTypesBackend @@ -57,8 +55,8 @@ assert x == math.sin(1.23) def test_sinf(self): - if not has_sinf: - py.test.skip("sinf not available") + if sys.platform == 'win32': + py.test.skip("no sinf found in the Windows stdlib") ffi = FFI(backend=self.Backend()) ffi.cdef(""" float sinf(float x); diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_parsing.py b/pypy/module/test_lib_pypy/cffi_tests/test_parsing.py --- a/pypy/module/test_lib_pypy/cffi_tests/test_parsing.py +++ b/pypy/module/test_lib_pypy/cffi_tests/test_parsing.py @@ -162,9 +162,10 @@ def test_define_not_supported_for_now(): ffi = FFI(backend=FakeBackend()) - e = py.test.raises(CDefError, ffi.cdef, "#define FOO 42") - assert str(e.value) == \ - 'only supports the syntax "#define FOO ..." for now (literally)' + e = py.test.raises(CDefError, ffi.cdef, '#define FOO "blah"') + assert str(e.value) == ( + 'only supports the syntax "#define FOO ..." (literally)' + ' or "#define FOO 0x1FF" for now') def test_unnamed_struct(): ffi = FFI(backend=FakeBackend()) diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_verify.py b/pypy/module/test_lib_pypy/cffi_tests/test_verify.py --- a/pypy/module/test_lib_pypy/cffi_tests/test_verify.py +++ b/pypy/module/test_lib_pypy/cffi_tests/test_verify.py @@ -1,5 +1,5 @@ # Generated by pypy/tool/import_cffi.py -import py +import py, re import sys, os, math, weakref from cffi import FFI, VerificationError, VerificationMissing, model from pypy.module.test_lib_pypy.cffi_tests.support import * @@ -30,6 +30,24 @@ def setup_module(): import cffi.verifier cffi.verifier.cleanup_tmpdir() + # + # check that no $ sign is produced in the C file; it used to be the + # case that anonymous enums would produce '$enum_$1', which was + # used as part of a function name. GCC accepts such names, but it's + # apparently non-standard. + _r_comment = re.compile(r"/\*.*?\*/|//.*?$", re.DOTALL | re.MULTILINE) + _r_string = re.compile(r'\".*?\"') + def _write_source_and_check(self, file=None): + base_write_source(self, file) + if file is None: + f = open(self.sourcefilename) + data = f.read() + f.close() + data = _r_comment.sub(' ', data) + data = _r_string.sub('"skipped"', data) + assert '$' not in data + base_write_source = cffi.verifier.Verifier._write_source + cffi.verifier.Verifier._write_source = _write_source_and_check def test_module_type(): @@ -154,6 +172,9 @@ all_primitive_types = model.PrimitiveType.ALL_PRIMITIVE_TYPES +if sys.platform == 'win32': + all_primitive_types = all_primitive_types.copy() + del all_primitive_types['ssize_t'] all_integer_types = sorted(tp for tp in all_primitive_types if all_primitive_types[tp] == 'i') all_float_types = sorted(tp for tp in all_primitive_types @@ -1453,8 +1474,8 @@ assert func() == 42 def test_FILE_stored_in_stdout(): - if sys.platform == 'win32': - py.test.skip("MSVC: cannot assign to stdout") + if not sys.platform.startswith('linux'): + py.test.skip("likely, we cannot assign to stdout") ffi = FFI() ffi.cdef("int printf(const char *, ...); FILE *setstdout(FILE *);") lib = ffi.verify(""" @@ -1637,8 +1658,8 @@ ffi = FFI() ffi.cdef(""" int (*python_callback)(int how_many, int *values); - void *const c_callback; /* pass this ptr to C routines */ - int some_c_function(void *cb); + int (*const c_callback)(int,...); /* pass this ptr to C routines */ + int some_c_function(int(*cb)(int,...)); """) lib = ffi.verify(""" #include @@ -1885,3 +1906,60 @@ p = lib.f2(42) x = lib.f1(p) assert x == 42 + +def _run_in_multiple_threads(test1): + test1() + import sys + try: + import thread + except ImportError: + import _thread as thread + errors = [] + def wrapper(lock): + try: + test1() + except: + errors.append(sys.exc_info()) + lock.release() + locks = [] + for i in range(10): + _lock = thread.allocate_lock() + _lock.acquire() + thread.start_new_thread(wrapper, (_lock,)) + locks.append(_lock) + for _lock in locks: + _lock.acquire() + if errors: + raise errors[0][1] + +def test_errno_working_even_with_pypys_jit(): + ffi = FFI() + ffi.cdef("int f(int);") + lib = ffi.verify(""" + #include + int f(int x) { return (errno = errno + x); } + """) + @_run_in_multiple_threads + def test1(): + ffi.errno = 0 + for i in range(10000): + e = lib.f(1) + assert e == i + 1 + assert ffi.errno == e + for i in range(10000): + ffi.errno = i + e = lib.f(42) + assert e == i + 42 + +def test_getlasterror_working_even_with_pypys_jit(): + if sys.platform != 'win32': + py.test.skip("win32-only test") + ffi = FFI() + ffi.cdef("void SetLastError(DWORD);") + lib = ffi.dlopen("Kernel32.dll") + @_run_in_multiple_threads + def test1(): + for i in range(10000): + n = (1 << 29) + i + lib.SetLastError(n) + assert ffi.getwinerror()[0] == n diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_version.py b/pypy/module/test_lib_pypy/cffi_tests/test_version.py --- a/pypy/module/test_lib_pypy/cffi_tests/test_version.py +++ b/pypy/module/test_lib_pypy/cffi_tests/test_version.py @@ -11,7 +11,6 @@ '0.7.1': '0.7', # did not change '0.7.2': '0.7', # did not change '0.8.1': '0.8', # did not change (essentially) - '0.8.2': '0.8', # did not change } def test_version(): @@ -26,7 +25,7 @@ content = open(p).read() # v = cffi.__version__ - assert ("version = '%s'\n" % BACKEND_VERSIONS.get(v, v)) in content + assert ("version = '%s'\n" % v[:3]) in content assert ("release = '%s'\n" % v) in content def test_doc_version_file(): diff --git a/pypy/module/thread/gil.py b/pypy/module/thread/gil.py --- a/pypy/module/thread/gil.py +++ b/pypy/module/thread/gil.py @@ -7,7 +7,7 @@ # all but one will be blocked. The other threads get a chance to run # from time to time, using the periodic action GILReleaseAction. -from rpython.rlib import rthread +from rpython.rlib import rthread, rgil from pypy.module.thread.error import wrap_thread_error from pypy.interpreter.executioncontext import PeriodicAsyncAction from pypy.module.thread.threadlocals import OSThreadLocals @@ -25,8 +25,7 @@ use_bytecode_counter=True) def _initialize_gil(self, space): - if not rthread.gil_allocate(): - raise wrap_thread_error(space, "can't allocate GIL") + rgil.gil_allocate() def setup_threads(self, space): """Enable threads in the object space, if they haven't already been.""" @@ -71,15 +70,13 @@ def before_external_call(): # this function must not raise, in such a way that the exception # transformer knows that it cannot raise! - e = get_errno() - rthread.gil_release() - set_errno(e) + rgil.gil_release() before_external_call._gctransformer_hint_cannot_collect_ = True before_external_call._dont_reach_me_in_del_ = True def after_external_call(): e = get_errno() - rthread.gil_acquire() + rgil.gil_acquire() rthread.gc_thread_run() after_thread_switch() set_errno(e) @@ -97,7 +94,7 @@ # explicitly release the gil, in a way that tries to give more # priority to other threads (as opposed to continuing to run in # the same thread). - if rthread.gil_yield_thread(): + if rgil.gil_yield_thread(): rthread.gc_thread_run() after_thread_switch() do_yield_thread._gctransformer_hint_close_stack_ = True diff --git a/pypy/module/thread/threadlocals.py b/pypy/module/thread/threadlocals.py --- a/pypy/module/thread/threadlocals.py +++ b/pypy/module/thread/threadlocals.py @@ -27,6 +27,12 @@ "Notification that the current thread is about to start running." self._set_ec(space.createexecutioncontext()) + def try_enter_thread(self, space): + if rthread.get_ident() in self._valuedict: + return False + self.enter_thread(space) + return True + def _set_ec(self, ec): ident = rthread.get_ident() if self._mainthreadident == 0 or self._mainthreadident == ident: diff --git a/pypy/tool/gcdump.py b/pypy/tool/gcdump.py --- a/pypy/tool/gcdump.py +++ b/pypy/tool/gcdump.py @@ -43,7 +43,7 @@ def print_summary(self): items = self.summary.items() - items.sort(key=lambda(typenum, stat): stat[1]) # sort by totalsize + items.sort(key=lambda (typenum, stat): stat[1]) # sort by totalsize totalsize = 0 for typenum, stat in items: totalsize += stat[1] diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py --- a/rpython/jit/backend/llsupport/assembler.py +++ b/rpython/jit/backend/llsupport/assembler.py @@ -303,28 +303,39 @@ @staticmethod @rgc.no_collect - def _release_gil_asmgcc(css): - # similar to trackgcroot.py:pypy_asm_stackwalk, first part - from rpython.memory.gctransform import asmgcroot - new = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css) - next = asmgcroot.gcrootanchor.next - new.next = next - new.prev = asmgcroot.gcrootanchor - asmgcroot.gcrootanchor.next = new - next.prev = new - # and now release the GIL - before = rffi.aroundstate.before - if before: - before() + def _reacquire_gil_asmgcc(css, old_rpy_fastgil): + # Before doing an external call, 'rpy_fastgil' is initialized to + # be equal to css. This function is called if we find out after + # the call that it is no longer equal to css. See description + # in translator/c/src/thread_pthread.c. - @staticmethod - @rgc.no_collect - def _reacquire_gil_asmgcc(css): - # first reacquire the GIL - after = rffi.aroundstate.after - if after: - after() - # similar to trackgcroot.py:pypy_asm_stackwalk, second part + if old_rpy_fastgil == 0: + # this case occurs if some other thread stole the GIL but + # released it again. What occurred here is that we changed + # 'rpy_fastgil' from 0 to 1, thus successfully reaquiring the + # GIL. + pass + + elif old_rpy_fastgil == 1: + # 'rpy_fastgil' was (and still is) locked by someone else. + # We need to wait for the regular mutex. + after = rffi.aroundstate.after + if after: + after() + else: + # stole the GIL from a different thread that is also + # currently in an external call from the jit. Attach + # the 'old_rpy_fastgil' into the chained list. + from rpython.memory.gctransform import asmgcroot + oth = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, old_rpy_fastgil) + next = asmgcroot.gcrootanchor.next + oth.next = next + oth.prev = asmgcroot.gcrootanchor + asmgcroot.gcrootanchor.next = oth + next.prev = oth + + # similar to trackgcroot.py:pypy_asm_stackwalk, second part: + # detach the 'css' from the chained list from rpython.memory.gctransform import asmgcroot old = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css) prev = old.prev @@ -334,42 +345,28 @@ @staticmethod @rgc.no_collect - def _release_gil_shadowstack(): - before = rffi.aroundstate.before - if before: - before() - - @staticmethod - @rgc.no_collect def _reacquire_gil_shadowstack(): + # Simplified version of _reacquire_gil_asmgcc(): in shadowstack mode, + # 'rpy_fastgil' contains only zero or non-zero, and this is only + # called when the old value stored in 'rpy_fastgil' was non-zero + # (i.e. still locked, must wait with the regular mutex) after = rffi.aroundstate.after if after: after() - @staticmethod - def _no_op(): - pass - - _NOARG_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void)) - _CLOSESTACK_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP], - lltype.Void)) + _REACQGIL0_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void)) + _REACQGIL2_FUNC = lltype.Ptr(lltype.FuncType([rffi.CCHARP, lltype.Signed], + lltype.Void)) def _build_release_gil(self, gcrootmap): - if gcrootmap is None: - releasegil_func = llhelper(self._NOARG_FUNC, self._no_op) - reacqgil_func = llhelper(self._NOARG_FUNC, self._no_op) - elif gcrootmap.is_shadow_stack: - releasegil_func = llhelper(self._NOARG_FUNC, - self._release_gil_shadowstack) - reacqgil_func = llhelper(self._NOARG_FUNC, + if gcrootmap is None or gcrootmap.is_shadow_stack: + reacqgil_func = llhelper(self._REACQGIL0_FUNC, self._reacquire_gil_shadowstack) + self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func) else: - releasegil_func = llhelper(self._CLOSESTACK_FUNC, - self._release_gil_asmgcc) - reacqgil_func = llhelper(self._CLOSESTACK_FUNC, + reacqgil_func = llhelper(self._REACQGIL2_FUNC, self._reacquire_gil_asmgcc) - self.releasegil_addr = self.cpu.cast_ptr_to_int(releasegil_func) - self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func) + self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func) def _is_asmgcc(self): gcrootmap = self.cpu.gc_ll_descr.gcrootmap diff --git a/rpython/jit/backend/llsupport/callbuilder.py b/rpython/jit/backend/llsupport/callbuilder.py --- a/rpython/jit/backend/llsupport/callbuilder.py +++ b/rpython/jit/backend/llsupport/callbuilder.py @@ -1,4 +1,7 @@ from rpython.rlib.clibffi import FFI_DEFAULT_ABI +from rpython.rlib import rgil +from rpython.rtyper.lltypesystem import lltype, rffi + class AbstractCallBuilder(object): @@ -42,20 +45,21 @@ def emit_call_release_gil(self): """Emit a CALL_RELEASE_GIL, including calls to releasegil_addr and reacqgil_addr.""" + fastgil = rffi.cast(lltype.Signed, rgil.gil_fetch_fastgil()) self.select_call_release_gil_mode() self.prepare_arguments() self.push_gcmap_for_call_release_gil() - self.call_releasegil_addr_and_move_real_arguments() + self.call_releasegil_addr_and_move_real_arguments(fastgil) self.emit_raw_call() self.restore_stack_pointer() - self.move_real_result_and_call_reacqgil_addr() + self.move_real_result_and_call_reacqgil_addr(fastgil) self.pop_gcmap() self.load_result() - def call_releasegil_addr_and_move_real_arguments(self): + def call_releasegil_addr_and_move_real_arguments(self, fastgil): raise NotImplementedError - def move_real_result_and_call_reacqgil_addr(self): + def move_real_result_and_call_reacqgil_addr(self, fastgil): raise NotImplementedError def select_call_release_gil_mode(self): diff --git a/rpython/jit/backend/llsupport/test/test_gc_integration.py b/rpython/jit/backend/llsupport/test/test_gc_integration.py --- a/rpython/jit/backend/llsupport/test/test_gc_integration.py +++ b/rpython/jit/backend/llsupport/test/test_gc_integration.py @@ -2,6 +2,7 @@ """ Tests for register allocation for common constructs """ +import py import re from rpython.jit.metainterp.history import TargetToken, BasicFinalDescr,\ JitCellToken, BasicFailDescr, AbstractDescr @@ -780,6 +781,9 @@ assert rffi.cast(JITFRAMEPTR, cpu.gc_ll_descr.write_barrier_on_frame_called) == frame def test_call_release_gil(self): + py.test.skip("xxx fix this test: the code is now assuming that " + "'before' is just rgil.release_gil(), and 'after' is " + "only needed if 'rpy_fastgil' was not changed.") # note that we can't test floats here because when untranslated # people actually wreck xmm registers cpu = self.cpu diff --git a/rpython/jit/backend/llsupport/test/ztranslation_test.py b/rpython/jit/backend/llsupport/test/ztranslation_test.py --- a/rpython/jit/backend/llsupport/test/ztranslation_test.py +++ b/rpython/jit/backend/llsupport/test/ztranslation_test.py @@ -3,13 +3,16 @@ from rpython.rlib.jit import JitDriver, unroll_parameters, set_param from rpython.rlib.jit import PARAMETERS, dont_look_inside from rpython.rlib.jit import promote -from rpython.rlib import jit_hooks +from rpython.rlib import jit_hooks, rposix from rpython.rlib.objectmodel import keepalive_until_here from rpython.rlib.rthread import ThreadLocalReference from rpython.jit.backend.detect_cpu import getcpuclass from rpython.jit.backend.test.support import CCompiledMixin from rpython.jit.codewriter.policy import StopAtXPolicy from rpython.config.config import ConfigError +from rpython.translator.tool.cbuild import ExternalCompilationInfo +from rpython.rtyper.lltypesystem import lltype, rffi + class TranslationTest(CCompiledMixin): CPUClass = getcpuclass() @@ -24,6 +27,8 @@ # - full optimizer # - floats neg and abs # - threadlocalref_get + # - get_errno, set_errno + # - llexternal with macro=True class Frame(object): _virtualizable_ = ['i'] @@ -35,9 +40,15 @@ pass t = ThreadLocalReference(Foo) - @dont_look_inside - def myabs(x): - return abs(x) + eci = ExternalCompilationInfo(post_include_bits=[''' +#define pypy_my_fabs(x) fabs(x) +''']) + myabs1 = rffi.llexternal('pypy_my_fabs', [lltype.Float], + lltype.Float, macro=True, releasegil=False, + compilation_info=eci) + myabs2 = rffi.llexternal('pypy_my_fabs', [lltype.Float], + lltype.Float, macro=True, releasegil=True, + compilation_info=eci) jitdriver = JitDriver(greens = [], reds = ['total', 'frame', 'j'], @@ -60,13 +71,14 @@ frame.i -= 1 j *= -0.712 if j + (-j): raise ValueError - k = myabs(j) + k = myabs1(myabs2(j)) if k - abs(j): raise ValueError if k - abs(-j): raise ValueError if t.get().nine != 9: raise ValueError + rposix.set_errno(total) + if rposix.get_errno() != total: raise ValueError return chr(total % 253) # - from rpython.rtyper.lltypesystem import lltype, rffi from rpython.rlib.libffi import types, CDLL, ArgChain from rpython.rlib.test.test_clibffi import get_libm_name libm_name = get_libm_name(sys.platform) diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -1934,42 +1934,6 @@ self._genop_call(op, arglocs, result_loc, is_call_release_gil=True) self._emit_guard_not_forced(guard_token) - def call_reacquire_gil(self, gcrootmap, save_loc): - # save the previous result (eax/xmm0) into the stack temporarily. - # XXX like with call_release_gil(), we assume that we don't need - # to save xmm0 in this case. - if isinstance(save_loc, RegLoc) and not save_loc.is_xmm: - self.mc.MOV_sr(WORD, save_loc.value) - # call the reopenstack() function (also reacquiring the GIL) - if gcrootmap.is_shadow_stack: - args = [] - css = 0 - else: - from rpython.memory.gctransform import asmgcroot - css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS) - if IS_X86_32: - reg = eax - elif IS_X86_64: - reg = edi - self.mc.LEA_rs(reg.value, css) - args = [reg] - self._emit_call(imm(self.reacqgil_addr), args, can_collect=False) - # - # Now that we required the GIL, we can reload a possibly modified ebp - if not gcrootmap.is_shadow_stack: - # special-case: reload ebp from the css - from rpython.memory.gctransform import asmgcroot - index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP) - self.mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp] - #else: - # for shadowstack, done for us by _reload_frame_if_necessary() - self._reload_frame_if_necessary(self.mc) - self.set_extra_stack_depth(self.mc, 0) - # - # restore the result from the stack - if isinstance(save_loc, RegLoc) and not save_loc.is_xmm: - self.mc.MOV_rs(save_loc.value, WORD) - def imm(self, v): return imm(v) @@ -2361,12 +2325,38 @@ ed = effectinfo.extradescrs[0] assert isinstance(ed, ThreadLocalRefDescr) addr1 = rffi.cast(lltype.Signed, ed.get_tlref_addr()) + # 'addr1' is the address is the current thread, but we assume that + # it is a thread-local at a constant offset from %fs/%gs. addr0 = stmtlocal.threadlocal_base() addr = addr1 - addr0 assert rx86.fits_in_32bits(addr) mc = self.mc - mc.writechar(stmtlocal.SEGMENT_TL) # prefix - mc.MOV_rj(resloc.value, addr) + mc.writechar(stmtlocal.SEGMENT_TL) # prefix: %fs or %gs + mc.MOV_rj(resloc.value, addr) # memory read + + def get_set_errno(self, op, loc, issue_a_write): + # this function is only called on Linux + from rpython.jit.backend.x86 import stmtlocal + addr = stmtlocal.get_errno_tl() + assert rx86.fits_in_32bits(addr) + mc = self.mc + mc.writechar(stmtlocal.SEGMENT_TL) # prefix: %fs or %gs + # !!important: the *next* instruction must be the one using 'addr'!! + if issue_a_write: + if isinstance(loc, RegLoc): + mc.MOV32_jr(addr, loc.value) # memory write from reg + else: + assert isinstance(loc, ImmedLoc) + newvalue = loc.value + newvalue = rffi.cast(rffi.INT, newvalue) + newvalue = rffi.cast(lltype.Signed, newvalue) + mc.MOV32_ji(addr, newvalue) # memory write immediate + else: + assert isinstance(loc, RegLoc) + if IS_X86_32: + mc.MOV_rj(loc.value, addr) # memory read + elif IS_X86_64: + mc.MOVSX32_rj(loc.value, addr) # memory read, sign-extend genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST diff --git a/rpython/jit/backend/x86/callbuilder.py b/rpython/jit/backend/x86/callbuilder.py --- a/rpython/jit/backend/x86/callbuilder.py +++ b/rpython/jit/backend/x86/callbuilder.py @@ -25,9 +25,6 @@ # arguments, we need to decrease esp temporarily stack_max = PASS_ON_MY_FRAME From noreply at buildbot.pypy.org Mon Jun 30 03:11:39 2014 From: noreply at buildbot.pypy.org (mattip) Date: Mon, 30 Jun 2014 03:11:39 +0200 (CEST) Subject: [pypy-commit] pypy disable_pythonapi: document branch Message-ID: <20140630011139.78E571C024A@cobra.cs.uni-duesseldorf.de> Author: mattip Branch: disable_pythonapi Changeset: r72277:21dd51d4e286 Date: 2014-06-30 04:10 +0300 http://bitbucket.org/pypy/pypy/changeset/21dd51d4e286/ Log: document branch diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -43,3 +43,7 @@ .. branch: jit-get-errno Optimize the errno handling in the JIT, notably around external function calls. Linux-only. + +.. branch: disable_pythonapi +Remove non-functioning ctypes.pyhonapi and ctypes.PyDLL, document this +incompatability with cpython. Recast sys.dllhandle to an int. From noreply at buildbot.pypy.org Mon Jun 30 09:10:16 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 09:10:16 +0200 (CEST) Subject: [pypy-commit] pypy disable_pythonapi: Small fixes Message-ID: <20140630071016.BBEF11C024A@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: disable_pythonapi Changeset: r72278:da5ba751361e Date: 2014-06-30 09:09 +0200 http://bitbucket.org/pypy/pypy/changeset/da5ba751361e/ Log: Small fixes diff --git a/pypy/doc/ctypes-implementation.rst b/pypy/doc/ctypes-implementation.rst --- a/pypy/doc/ctypes-implementation.rst +++ b/pypy/doc/ctypes-implementation.rst @@ -72,14 +72,11 @@ Here is a list of the limitations and missing features of the current implementation: -* ``ctypes.pythonapi`` lets you access the CPython C API - emulation layer. It does not work on PyPy. - - Note that even if it worked, our implementation would not do anything - sensible about the GIL and the functions will be named with an extra - "Py", for example ``PyPyInt_FromLong()``. Basically, don't use this. - Assuming the PyObject pointers you get have any particular fields in - any particular order is just going to crash. +* ``ctypes.pythonapi`` is missing. In previous versions, it was present + and redirected to the `cpyext` C API emulation layer, but our + implementation did not do anything sensible about the GIL and the + functions were named with an extra "Py", for example + ``PyPyInt_FromLong()``. It was removed for being unhelpful. * We copy Python strings instead of having pointers to raw buffers diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -46,4 +46,4 @@ .. branch: disable_pythonapi Remove non-functioning ctypes.pyhonapi and ctypes.PyDLL, document this -incompatability with cpython. Recast sys.dllhandle to an int. +incompatibility with cpython. Recast sys.dllhandle to an int. diff --git a/pypy/module/sys/test/test_sysmodule.py b/pypy/module/sys/test/test_sysmodule.py --- a/pypy/module/sys/test/test_sysmodule.py +++ b/pypy/module/sys/test/test_sysmodule.py @@ -420,13 +420,9 @@ if hasattr(sys, "winver"): assert sys.winver == sys.version[:3] - def test_no_dllhandle(self): + def test_dllhandle(self): import sys - if '__pypy__' in sys.builtin_module_names: - assert not hasattr(sys, 'dllhandle') - elif sys.platform == 'win32': - # only on cpython win32 - assert hasattr(sys, 'dllhandle') + assert hasattr(sys, 'dllhandle') == (sys.platform == 'win32') def test_dlopenflags(self): import sys diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py --- a/pypy/module/sys/vm.py +++ b/pypy/module/sys/vm.py @@ -233,8 +233,6 @@ def get_dllhandle(space): if not space.config.objspace.usemodules.cpyext: return space.wrap(0) - if not space.config.objspace.usemodules._rawffi: - return space.wrap(0) return _get_dllhandle(space) @@ -249,8 +247,8 @@ # cdll = RawCDLL(handle) # return space.wrap(W_CDLL(space, "python api", cdll)) # Provide a cpython-compatible int - from rpython.rtyper.lltypesystem import rffi - return space.wrap(rffi.cast(rffi.INT, handle)) + from rpython.rtyper.lltypesystem import lltype + return space.wrap(rffi.cast(lltype.Signed, handle)) def getsizeof(space, w_object, w_default=None): """Not implemented on PyPy.""" From noreply at buildbot.pypy.org Mon Jun 30 09:15:48 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 09:15:48 +0200 (CEST) Subject: [pypy-commit] pypy disable_pythonapi: Fix Message-ID: <20140630071548.0D0961C1120@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: disable_pythonapi Changeset: r72279:966dc13241c2 Date: 2014-06-30 09:15 +0200 http://bitbucket.org/pypy/pypy/changeset/966dc13241c2/ Log: Fix diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py --- a/pypy/module/sys/vm.py +++ b/pypy/module/sys/vm.py @@ -247,7 +247,7 @@ # cdll = RawCDLL(handle) # return space.wrap(W_CDLL(space, "python api", cdll)) # Provide a cpython-compatible int - from rpython.rtyper.lltypesystem import lltype + from rpython.rtyper.lltypesystem import lltype, rffi return space.wrap(rffi.cast(lltype.Signed, handle)) def getsizeof(space, w_object, w_default=None): From noreply at buildbot.pypy.org Mon Jun 30 09:18:01 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 09:18:01 +0200 (CEST) Subject: [pypy-commit] pypy disable_pythonapi: Ready to merge Message-ID: <20140630071801.EEB8B1C1120@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: disable_pythonapi Changeset: r72280:e5647789f355 Date: 2014-06-30 09:15 +0200 http://bitbucket.org/pypy/pypy/changeset/e5647789f355/ Log: Ready to merge From noreply at buildbot.pypy.org Mon Jun 30 09:18:03 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 09:18:03 +0200 (CEST) Subject: [pypy-commit] pypy default: Merge disable_pythonapi by matti, which disables the buggy Message-ID: <20140630071803.892F51C1120@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72281:f65f69804dfc Date: 2014-06-30 09:16 +0200 http://bitbucket.org/pypy/pypy/changeset/f65f69804dfc/ Log: Merge disable_pythonapi by matti, which disables the buggy "ctypes.pythnonapi" and updates the docs. diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py --- a/lib-python/2.7/ctypes/__init__.py +++ b/lib-python/2.7/ctypes/__init__.py @@ -389,12 +389,13 @@ func.__name__ = name_or_ordinal return func -class PyDLL(CDLL): - """This class represents the Python library itself. It allows to - access Python API functions. The GIL is not released, and - Python exceptions are handled correctly. - """ - _func_flags_ = _FUNCFLAG_CDECL | _FUNCFLAG_PYTHONAPI +# Not in PyPy +#class PyDLL(CDLL): +# """This class represents the Python library itself. It allows to +# access Python API functions. The GIL is not released, and +# Python exceptions are handled correctly. +# """ +# _func_flags_ = _FUNCFLAG_CDECL | _FUNCFLAG_PYTHONAPI if _os.name in ("nt", "ce"): @@ -447,15 +448,8 @@ return self._dlltype(name) cdll = LibraryLoader(CDLL) -pydll = LibraryLoader(PyDLL) - -if _os.name in ("nt", "ce"): - pythonapi = PyDLL("python dll", None, _sys.dllhandle) -elif _sys.platform == "cygwin": - pythonapi = PyDLL("libpython%d.%d.dll" % _sys.version_info[:2]) -else: - pythonapi = PyDLL(None) - +# not on PyPy +#pydll = LibraryLoader(PyDLL) if _os.name in ("nt", "ce"): windll = LibraryLoader(WinDLL) diff --git a/lib-python/2.7/ctypes/test/test_values.py b/lib-python/2.7/ctypes/test/test_values.py --- a/lib-python/2.7/ctypes/test/test_values.py +++ b/lib-python/2.7/ctypes/test/test_values.py @@ -4,6 +4,7 @@ import unittest from ctypes import * +from ctypes.test import xfail import _ctypes_test @@ -23,7 +24,8 @@ class Win_ValuesTestCase(unittest.TestCase): """This test only works when python itself is a dll/shared library""" - + + @xfail def test_optimizeflag(self): # This test accesses the Py_OptimizeFlag intger, which is # exported by the Python dll. @@ -40,6 +42,7 @@ else: self.assertEqual(opt, 2) + @xfail def test_frozentable(self): # Python exports a PyImport_FrozenModules symbol. This is a # pointer to an array of struct _frozen entries. The end of the @@ -75,6 +78,7 @@ from ctypes import _pointer_type_cache del _pointer_type_cache[struct_frozen] + @xfail def test_undefined(self): self.assertRaises(ValueError, c_int.in_dll, pydll, "Undefined_Symbol") diff --git a/pypy/doc/ctypes-implementation.rst b/pypy/doc/ctypes-implementation.rst --- a/pypy/doc/ctypes-implementation.rst +++ b/pypy/doc/ctypes-implementation.rst @@ -72,13 +72,11 @@ Here is a list of the limitations and missing features of the current implementation: -* ``ctypes.pythonapi`` lets you access the CPython C API emulation layer - of PyPy, at your own risks and without doing anything sensible about - the GIL. Since PyPy 2.3, these functions are also named with an extra - "Py", for example ``PyPyInt_FromLong()``. Basically, don't use this, - but it might more or less work in simple cases if you do. (Obviously, - assuming the PyObject pointers you get have any particular fields in - any particular order is just going to crash.) +* ``ctypes.pythonapi`` is missing. In previous versions, it was present + and redirected to the `cpyext` C API emulation layer, but our + implementation did not do anything sensible about the GIL and the + functions were named with an extra "Py", for example + ``PyPyInt_FromLong()``. It was removed for being unhelpful. * We copy Python strings instead of having pointers to raw buffers diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -43,3 +43,7 @@ .. branch: jit-get-errno Optimize the errno handling in the JIT, notably around external function calls. Linux-only. + +.. branch: disable_pythonapi +Remove non-functioning ctypes.pyhonapi and ctypes.PyDLL, document this +incompatibility with cpython. Recast sys.dllhandle to an int. diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -205,12 +205,7 @@ import sys if sys.platform != "win32" or sys.version_info < (2, 6): skip("Windows Python >= 2.6 only") - assert sys.dllhandle - assert sys.dllhandle.getaddressindll('PyPyErr_NewException') - import ctypes # slow - PyUnicode_GetDefaultEncoding = ctypes.pythonapi.PyPyUnicode_GetDefaultEncoding - PyUnicode_GetDefaultEncoding.restype = ctypes.c_char_p - assert PyUnicode_GetDefaultEncoding() == 'ascii' + assert isinstance(sys.dllhandle, int) class AppTestCpythonExtensionBase(LeakCheckingTest): diff --git a/pypy/module/sys/test/test_sysmodule.py b/pypy/module/sys/test/test_sysmodule.py --- a/pypy/module/sys/test/test_sysmodule.py +++ b/pypy/module/sys/test/test_sysmodule.py @@ -391,7 +391,8 @@ import sys if hasattr(sys, "getwindowsversion"): v = sys.getwindowsversion() - assert isinstance(v, tuple) + if '__pypy__' in sys.builtin_module_names: + assert isinstance(v, tuple) assert len(v) == 5 assert isinstance(v[0], int) assert isinstance(v[1], int) @@ -419,6 +420,10 @@ if hasattr(sys, "winver"): assert sys.winver == sys.version[:3] + def test_dllhandle(self): + import sys + assert hasattr(sys, 'dllhandle') == (sys.platform == 'win32') + def test_dlopenflags(self): import sys if hasattr(sys, "setdlopenflags"): @@ -486,7 +491,8 @@ assert isinstance(sys.version, basestring) assert isinstance(sys.warnoptions, list) vi = sys.version_info - assert isinstance(vi, tuple) + if '__pypy__' in sys.builtin_module_names: + assert isinstance(vi, tuple) assert len(vi) == 5 assert isinstance(vi[0], int) assert isinstance(vi[1], int) @@ -512,6 +518,8 @@ def test_pypy_attributes(self): import sys + if '__pypy__' not in sys.builtin_module_names: + skip("only on PyPy") assert isinstance(sys.pypy_objspaceclass, str) vi = sys.pypy_version_info assert isinstance(vi, tuple) @@ -528,10 +536,14 @@ def test_subversion(self): import sys + if '__pypy__' not in sys.builtin_module_names: + skip("only on PyPy") assert sys.subversion == ('PyPy', '', '') def test__mercurial(self): import sys, re + if '__pypy__' not in sys.builtin_module_names: + skip("only on PyPy") project, hgtag, hgid = sys._mercurial assert project == 'PyPy' # the tag or branch may be anything, including the empty string diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py --- a/pypy/module/sys/vm.py +++ b/pypy/module/sys/vm.py @@ -233,8 +233,6 @@ def get_dllhandle(space): if not space.config.objspace.usemodules.cpyext: return space.wrap(0) - if not space.config.objspace.usemodules._rawffi: - return space.wrap(0) return _get_dllhandle(space) @@ -243,11 +241,14 @@ from pypy.module.cpyext.api import State handle = space.fromcache(State).get_pythonapi_handle() - # Make a dll object with it - from pypy.module._rawffi.interp_rawffi import W_CDLL - from rpython.rlib.clibffi import RawCDLL - cdll = RawCDLL(handle) - return space.wrap(W_CDLL(space, "python api", cdll)) + # It used to be a CDLL + # from pypy.module._rawffi.interp_rawffi import W_CDLL + # from rpython.rlib.clibffi import RawCDLL + # cdll = RawCDLL(handle) + # return space.wrap(W_CDLL(space, "python api", cdll)) + # Provide a cpython-compatible int + from rpython.rtyper.lltypesystem import lltype, rffi + return space.wrap(rffi.cast(lltype.Signed, handle)) def getsizeof(space, w_object, w_default=None): """Not implemented on PyPy.""" From noreply at buildbot.pypy.org Mon Jun 30 12:00:23 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 12:00:23 +0200 (CEST) Subject: [pypy-commit] stmgc card-marking: Remove unused argument. Message-ID: <20140630100023.DACBC1D2D13@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: card-marking Changeset: r1259:882222ce9035 Date: 2014-06-30 12:00 +0200 http://bitbucket.org/pypy/stmgc/changeset/882222ce9035/ Log: Remove unused argument. diff --git a/c7/stm/gcpage.c b/c7/stm/gcpage.c --- a/c7/stm/gcpage.c +++ b/c7/stm/gcpage.c @@ -111,7 +111,7 @@ return addr; } -object_t *_stm_allocate_old(ssize_t size_rounded_up, long use_cards) +object_t *_stm_allocate_old(ssize_t size_rounded_up) { /* only for tests xxx but stm_setup_prebuilt() uses this now too */ char *p = allocate_outside_nursery_large(size_rounded_up); diff --git a/c7/stm/prebuilt.c b/c7/stm/prebuilt.c --- a/c7/stm/prebuilt.c +++ b/c7/stm/prebuilt.c @@ -29,7 +29,7 @@ /* We need to make a copy of this object. The extra "long" is for the prebuilt hash. */ size_t size = stmcb_size_rounded_up(obj); - object_t *nobj = _stm_allocate_old(size + sizeof(long), 0); + object_t *nobj = _stm_allocate_old(size + sizeof(long)); /* Copy the object */ char *realnobj = REAL_ADDRESS(stm_object_pages, nobj); diff --git a/c7/stmgc.h b/c7/stmgc.h --- a/c7/stmgc.h +++ b/c7/stmgc.h @@ -115,7 +115,7 @@ void _stm_collectable_safe_point(void); /* for tests, but also used in duhton: */ -object_t *_stm_allocate_old(ssize_t size_rounded_up, long use_cards); +object_t *_stm_allocate_old(ssize_t size_rounded_up); char *_stm_real_address(object_t *o); #ifdef STM_TESTS #include diff --git a/c7/test/support.py b/c7/test/support.py --- a/c7/test/support.py +++ b/c7/test/support.py @@ -40,7 +40,7 @@ /*void stm_write(object_t *obj); use _checked_stm_write() instead */ object_t *stm_allocate(ssize_t size_rounded_up); object_t *stm_allocate_weakref(ssize_t size_rounded_up); -object_t *_stm_allocate_old(ssize_t size_rounded_up, long use_cards); +object_t *_stm_allocate_old(ssize_t size_rounded_up); /*void stm_write_card(); use _checked_stm_write_card() instead */ @@ -386,13 +386,13 @@ return lib.stm_can_move(o) def stm_allocate_old(size): - o = lib._stm_allocate_old(size, False) + o = lib._stm_allocate_old(size) tid = 42 + size lib._set_type_id(o, tid) return o def stm_allocate_old_refs(n): - o = lib._stm_allocate_old(HDR + n * WORD, True) + o = lib._stm_allocate_old(HDR + n * WORD) tid = 421420 + n lib._set_type_id(o, tid) return o From noreply at buildbot.pypy.org Mon Jun 30 12:07:34 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 12:07:34 +0200 (CEST) Subject: [pypy-commit] stmgc card-marking: Remove stmcb_should_use_cards(). Message-ID: <20140630100734.B21F01C33F0@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: card-marking Changeset: r1260:74180517533a Date: 2014-06-30 12:07 +0200 http://bitbucket.org/pypy/stmgc/changeset/74180517533a/ Log: Remove stmcb_should_use_cards(). diff --git a/c7/demo/demo2.c b/c7/demo/demo2.c --- a/c7/demo/demo2.c +++ b/c7/demo/demo2.c @@ -43,10 +43,6 @@ n = (struct node_s*)obj; visit((object_t **)&n->next); } -long stmcb_should_use_cards(struct object_s *obj) -{ - return 0; -} void stmcb_get_card_base_itemsize( struct object_s *obj, uintptr_t *base_offset, ssize_t *item_size) { diff --git a/c7/stm/core.c b/c7/stm/core.c --- a/c7/stm/core.c +++ b/c7/stm/core.c @@ -232,10 +232,7 @@ REAL_ADDRESS(STM_SEGMENT->segment_base, obj); size_t size = stmcb_size_rounded_up(realobj); - if (size < _STM_MIN_CARD_OBJ_SIZE) - return false; - - return !!stmcb_should_use_cards(realobj); + return (size >= _STM_MIN_CARD_OBJ_SIZE); } void _stm_write_slowpath_card(object_t *obj, uintptr_t index) diff --git a/c7/stmgc.h b/c7/stmgc.h --- a/c7/stmgc.h +++ b/c7/stmgc.h @@ -245,17 +245,13 @@ */ extern ssize_t stmcb_size_rounded_up(struct object_s *); extern void stmcb_trace(struct object_s *, void (object_t **)); -/* called to determine if we should use cards for this object. - (makes most sense for big arrays with references) */ -extern long stmcb_should_use_cards(struct object_s *); /* a special trace-callback that is only called for the marked ranges of indices (using stm_write_card(o, index)) */ extern void stmcb_trace_cards(struct object_s *, void (object_t **), uintptr_t start, uintptr_t stop); -/* this function will be called on objects that support cards - (stmcb_should_use_cards() returned True). It returns the - base_offset (in bytes) inside the object from where the - indices start, and item_size (in bytes) for the size of +/* this function will be called on objects that support cards. + It returns the base_offset (in bytes) inside the object from + where the indices start, and item_size (in bytes) for the size of one item */ extern void stmcb_get_card_base_itemsize( struct object_s *, uintptr_t *base_offset, ssize_t *item_size); diff --git a/c7/test/support.py b/c7/test/support.py --- a/c7/test/support.py +++ b/c7/test/support.py @@ -328,14 +328,6 @@ *item_size = sizeof(object_t *); } -long stmcb_should_use_cards(struct object_s *obj) -{ - struct myobj_s *myobj = (struct myobj_s*)obj; - if (myobj->type_id < 421420) - return 0; /*no refs*/ - return 1; -} - void stm_push_marker(stm_thread_local_t *tl, uintptr_t onum, object_t *ob) { STM_PUSH_MARKER(*tl, onum, ob); From noreply at buildbot.pypy.org Mon Jun 30 12:23:50 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 12:23:50 +0200 (CEST) Subject: [pypy-commit] stmgc card-marking: Call stmcb_get_card_base_itemsize() lazily only if needed Message-ID: <20140630102350.E34D81D2D06@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: card-marking Changeset: r1261:9c17fefa1cc0 Date: 2014-06-30 12:14 +0200 http://bitbucket.org/pypy/stmgc/changeset/9c17fefa1cc0/ Log: Call stmcb_get_card_base_itemsize() lazily only if needed diff --git a/c7/stm/core.c b/c7/stm/core.c --- a/c7/stm/core.c +++ b/c7/stm/core.c @@ -587,25 +587,29 @@ uintptr_t base_offset; ssize_t item_size; bool all_cards_were_cleared = true; - stmcb_get_card_base_itemsize(realobj, &base_offset, &item_size); uintptr_t start_card_index = -1; while (card_index <= last_card_index) { uintptr_t card_lock_idx = first_card_index + card_index; uint8_t card_value = write_locks[card_lock_idx]; - OPT_ASSERT(card_value != CARD_MARKED); /* always only MARKED_OLD or CLEAR */ - if (card_value == CARD_MARKED_OLD) { - all_cards_were_cleared = false; write_locks[card_lock_idx] = CARD_CLEAR; if (start_card_index == -1) { /* first marked card */ start_card_index = card_index; /* start = (uintptr_t)obj + stmcb_index_to_byte_offset( */ /* realobj, get_card_index_to_index(card_index)); */ + if (all_cards_were_cleared) { + all_cards_were_cleared = false; + stmcb_get_card_base_itemsize(realobj, &base_offset, + &item_size); + } } } + else { + OPT_ASSERT(card_value == CARD_CLEAR); + } if (start_card_index != -1 /* something to copy */ && (card_value != CARD_MARKED_OLD /* found non-marked card */ From noreply at buildbot.pypy.org Mon Jun 30 12:23:52 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 12:23:52 +0200 (CEST) Subject: [pypy-commit] stmgc card-marking: Fix test using cards on a non-GC array Message-ID: <20140630102352.0CC2B1D2D06@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: card-marking Changeset: r1262:488189516065 Date: 2014-06-30 12:23 +0200 http://bitbucket.org/pypy/stmgc/changeset/488189516065/ Log: Fix test using cards on a non-GC array diff --git a/c7/test/support.py b/c7/test/support.py --- a/c7/test/support.py +++ b/c7/test/support.py @@ -321,11 +321,13 @@ { struct myobj_s *myobj = (struct myobj_s*)obj; if (myobj->type_id < 421420) { - abort(); // works, but we want to test otherwise - /* basic case: index=byteoffset */ + *base_offset = SIZEOF_MYOBJ; + *item_size = 1; } - *base_offset = sizeof(struct myobj_s); - *item_size = sizeof(object_t *); + else { + *base_offset = sizeof(struct myobj_s); + *item_size = sizeof(object_t *); + } } void stm_push_marker(stm_thread_local_t *tl, uintptr_t onum, object_t *ob) @@ -427,7 +429,7 @@ def stm_set_char(obj, c, offset=HDR, use_cards=False): assert HDR <= offset < stm_get_obj_size(obj) if use_cards: - stm_write_card(obj, offset) + stm_write_card(obj, offset - HDR) else: stm_write(obj) stm_get_real_address(obj)[offset] = c From noreply at buildbot.pypy.org Mon Jun 30 12:56:39 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 12:56:39 +0200 (CEST) Subject: [pypy-commit] stmgc card-marking: Pass only one pointer argument (actually an array) to Message-ID: <20140630105639.A0D9A1D3530@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: card-marking Changeset: r1263:bde7c7634878 Date: 2014-06-30 12:56 +0200 http://bitbucket.org/pypy/stmgc/changeset/bde7c7634878/ Log: Pass only one pointer argument (actually an array) to stmcb_get_card_base_itemsize(). diff --git a/c7/demo/demo2.c b/c7/demo/demo2.c --- a/c7/demo/demo2.c +++ b/c7/demo/demo2.c @@ -43,8 +43,8 @@ n = (struct node_s*)obj; visit((object_t **)&n->next); } -void stmcb_get_card_base_itemsize( - struct object_s *obj, uintptr_t *base_offset, ssize_t *item_size) +void stmcb_get_card_base_itemsize(struct object_s *obj, + uintptr_t offset_itemsize[2]) { abort(); } diff --git a/c7/stm/core.c b/c7/stm/core.c --- a/c7/stm/core.c +++ b/c7/stm/core.c @@ -584,8 +584,7 @@ /* Combine multiple marked cards and do a memcpy for them. We don't try yet to use page_copy() or otherwise take into account privatization of pages (except _has_private_page_in_range) */ - uintptr_t base_offset; - ssize_t item_size; + uintptr_t offset_itemsize[2]; bool all_cards_were_cleared = true; uintptr_t start_card_index = -1; @@ -602,8 +601,7 @@ /* realobj, get_card_index_to_index(card_index)); */ if (all_cards_were_cleared) { all_cards_were_cleared = false; - stmcb_get_card_base_itemsize(realobj, &base_offset, - &item_size); + stmcb_get_card_base_itemsize(realobj, offset_itemsize); } } } @@ -626,11 +624,11 @@ next_card_index++; } - start_card_offset = base_offset + - get_card_index_to_index(start_card_index) * item_size; + start_card_offset = offset_itemsize[0] + + get_card_index_to_index(start_card_index) * offset_itemsize[1]; - next_card_offset = base_offset + - get_card_index_to_index(next_card_index) * item_size; + next_card_offset = offset_itemsize[0] + + get_card_index_to_index(next_card_index) * offset_itemsize[1]; if (next_card_offset > obj_size) next_card_offset = obj_size; diff --git a/c7/stmgc.h b/c7/stmgc.h --- a/c7/stmgc.h +++ b/c7/stmgc.h @@ -253,8 +253,8 @@ It returns the base_offset (in bytes) inside the object from where the indices start, and item_size (in bytes) for the size of one item */ -extern void stmcb_get_card_base_itemsize( - struct object_s *, uintptr_t *base_offset, ssize_t *item_size); +extern void stmcb_get_card_base_itemsize(struct object_s *, + uintptr_t offset_itemsize[2]); extern void stmcb_commit_soon(void); diff --git a/c7/test/support.py b/c7/test/support.py --- a/c7/test/support.py +++ b/c7/test/support.py @@ -316,17 +316,17 @@ } } -void stmcb_get_card_base_itemsize( - struct object_s *obj, uintptr_t *base_offset, ssize_t *item_size) +void stmcb_get_card_base_itemsize(struct object_s *obj, + uintptr_t offset_itemsize[2]) { struct myobj_s *myobj = (struct myobj_s*)obj; if (myobj->type_id < 421420) { - *base_offset = SIZEOF_MYOBJ; - *item_size = 1; + offset_itemsize[0] = SIZEOF_MYOBJ; + offset_itemsize[1] = 1; } else { - *base_offset = sizeof(struct myobj_s); - *item_size = sizeof(object_t *); + offset_itemsize[0] = sizeof(struct myobj_s); + offset_itemsize[1] = sizeof(object_t *); } } From noreply at buildbot.pypy.org Mon Jun 30 13:04:53 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 13:04:53 +0200 (CEST) Subject: [pypy-commit] pypy stmgc-c7: import stmgc/bde7c7634878 (branch card-marking) Message-ID: <20140630110453.2E65F1D3531@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stmgc-c7 Changeset: r72282:6bd3b22ee3f6 Date: 2014-06-30 12:57 +0200 http://bitbucket.org/pypy/pypy/changeset/6bd3b22ee3f6/ Log: import stmgc/bde7c7634878 (branch card-marking) diff --git a/rpython/translator/stm/src_stm/revision b/rpython/translator/stm/src_stm/revision --- a/rpython/translator/stm/src_stm/revision +++ b/rpython/translator/stm/src_stm/revision @@ -1,1 +1,1 @@ -70c403598485 +bde7c7634878 diff --git a/rpython/translator/stm/src_stm/stm/contention.c b/rpython/translator/stm/src_stm/stm/contention.c --- a/rpython/translator/stm/src_stm/stm/contention.c +++ b/rpython/translator/stm/src_stm/stm/contention.c @@ -195,7 +195,7 @@ /* tell the other to commit ASAP, since it causes aborts */ signal_other_to_commit_soon(contmgr.other_pseg); - dprintf(("abort in contention\n")); + dprintf(("abort in contention: kind %d\n", kind)); STM_SEGMENT->nursery_end = abort_category; marker_contention(kind, false, other_segment_num, obj); abort_with_mutex(); diff --git a/rpython/translator/stm/src_stm/stm/core.c b/rpython/translator/stm/src_stm/stm/core.c --- a/rpython/translator/stm/src_stm/stm/core.c +++ b/rpython/translator/stm/src_stm/stm/core.c @@ -41,26 +41,66 @@ #endif } -void _stm_write_slowpath(object_t *obj) +__attribute__((always_inline)) +static void write_slowpath_overflow_obj(object_t *obj, bool mark_card) +{ + /* An overflow object is an object from the same transaction, but + outside the nursery. More precisely, it is no longer young, + i.e. it comes from before the most recent minor collection. + */ + assert(STM_PSEGMENT->objects_pointing_to_nursery != NULL); + + assert(obj->stm_flags & GCFLAG_WRITE_BARRIER); + if (!mark_card) { + /* The basic case, with no card marking. We append the object + into 'objects_pointing_to_nursery', and remove the flag so + that the write_slowpath will not be called again until the + next minor collection. */ + if (obj->stm_flags & GCFLAG_CARDS_SET) { + /* if we clear this flag, we also need to clear the cards */ + _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num), + obj, CARD_CLEAR, false); + } + obj->stm_flags &= ~(GCFLAG_WRITE_BARRIER | GCFLAG_CARDS_SET); + LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj); + } + else { + /* Card marking. Don't remove GCFLAG_WRITE_BARRIER because we + need to come back to _stm_write_slowpath_card() for every + card to mark. Add GCFLAG_CARDS_SET. */ + obj->stm_flags |= GCFLAG_CARDS_SET; + assert(STM_PSEGMENT->old_objects_with_cards); + LIST_APPEND(STM_PSEGMENT->old_objects_with_cards, obj); + } +} + +__attribute__((always_inline)) +static void write_slowpath_common(object_t *obj, bool mark_card) { assert(_seems_to_be_running_transaction()); assert(!_is_young(obj)); assert(obj->stm_flags & GCFLAG_WRITE_BARRIER); - /* is this an object from the same transaction, outside the nursery? */ - if ((obj->stm_flags & -GCFLAG_OVERFLOW_NUMBER_bit0) == - STM_PSEGMENT->overflow_number) { + uintptr_t base_lock_idx = get_write_lock_idx((uintptr_t)obj); - dprintf_test(("write_slowpath %p -> ovf obj_to_nurs\n", obj)); - obj->stm_flags &= ~GCFLAG_WRITE_BARRIER; - assert(STM_PSEGMENT->objects_pointing_to_nursery != NULL); - LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj); + if (IS_OVERFLOW_OBJ(STM_PSEGMENT, obj)) { + assert(write_locks[base_lock_idx] == 0); + write_slowpath_overflow_obj(obj, mark_card); return; } + /* Else, it's an old object and we need to privatise it. + Do a read-barrier now. Note that this must occur before the + safepoints that may be issued in write_write_contention_management(). + */ + stm_read(obj); - /* do a read-barrier now. Note that this must occur before the - safepoints that may be issued in write_write_contention_management(). */ - stm_read(obj); + /* Take the segment's own lock number */ + uint8_t lock_num = STM_PSEGMENT->write_lock_num; + + /* If CARDS_SET, we entered here at least once already, so we + already own the write_lock */ + assert(IMPLY(obj->stm_flags & GCFLAG_CARDS_SET, + write_locks[base_lock_idx] == lock_num)); /* XXX XXX XXX make the logic of write-locking objects optional! */ @@ -69,16 +109,14 @@ 'modified_old_objects' (but, because it had GCFLAG_WRITE_BARRIER, not in 'objects_pointing_to_nursery'). We'll detect this case by finding that we already own the write-lock. */ - uintptr_t lock_idx = (((uintptr_t)obj) >> 4) - WRITELOCK_START; - uint8_t lock_num = STM_PSEGMENT->write_lock_num; - assert(lock_idx < sizeof(write_locks)); + retry: - if (write_locks[lock_idx] == 0) { + if (write_locks[base_lock_idx] == 0) { /* A lock to prevent reading garbage from lookup_other_thread_recorded_marker() */ acquire_marker_lock(STM_SEGMENT->segment_base); - if (UNLIKELY(!__sync_bool_compare_and_swap(&write_locks[lock_idx], + if (UNLIKELY(!__sync_bool_compare_and_swap(&write_locks[base_lock_idx], 0, lock_num))) { release_marker_lock(STM_SEGMENT->segment_base); goto retry; @@ -120,16 +158,15 @@ realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj); obj_size = stmcb_size_rounded_up((struct object_s *)realobj); - /* that's the page *following* the last page with the object */ - end_page = (((uintptr_t)obj) + obj_size + 4095) / 4096UL; + /* get the last page containing data from the object */ + end_page = (((uintptr_t)obj) + obj_size - 1) / 4096UL; - for (i = first_page; i < end_page; i++) { + for (i = first_page; i <= end_page; i++) { page_privatize(i); } } } - else if (write_locks[lock_idx] == lock_num) { - OPT_ASSERT(STM_PSEGMENT->objects_pointing_to_nursery != NULL); + else if (write_locks[base_lock_idx] == lock_num) { #ifdef STM_TESTS bool found = false; LIST_FOREACH_R(STM_PSEGMENT->modified_old_objects, object_t *, @@ -140,17 +177,10 @@ else { /* call the contention manager, and then retry (unless we were aborted). */ - write_write_contention_management(lock_idx, obj); + write_write_contention_management(base_lock_idx, obj); goto retry; } - /* A common case for write_locks[] that was either 0 or lock_num: - we need to add the object to 'objects_pointing_to_nursery' - if there is such a list. */ - if (STM_PSEGMENT->objects_pointing_to_nursery != NULL) { - dprintf_test(("write_slowpath %p -> old obj_to_nurs\n", obj)); - LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj); - } /* check that we really have a private page */ assert(is_private_page(STM_SEGMENT->segment_num, @@ -159,16 +189,104 @@ /* check that so far all copies of the object have the flag */ check_flag_write_barrier(obj); - /* remove GCFLAG_WRITE_BARRIER, but only if we succeeded in - getting the write-lock */ assert(obj->stm_flags & GCFLAG_WRITE_BARRIER); - obj->stm_flags &= ~GCFLAG_WRITE_BARRIER; + if (!mark_card) { + /* A common case for write_locks[] that was either 0 or lock_num: + we need to add the object to the appropriate list if there is one. + */ + if (STM_PSEGMENT->objects_pointing_to_nursery != NULL) { + dprintf_test(("write_slowpath %p -> old obj_to_nurs\n", obj)); + LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj); + } + + if (obj->stm_flags & GCFLAG_CARDS_SET) { + /* if we clear this flag, we have to tell sync_old_objs that + everything needs to be synced */ + _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num), + obj, CARD_MARKED_OLD, true); /* mark all */ + } + + /* remove GCFLAG_WRITE_BARRIER if we succeeded in getting the base + write-lock (not for card marking). */ + obj->stm_flags &= ~(GCFLAG_WRITE_BARRIER | GCFLAG_CARDS_SET); + } + else { + /* don't remove WRITE_BARRIER, but add CARDS_SET */ + obj->stm_flags |= GCFLAG_CARDS_SET; + assert(STM_PSEGMENT->old_objects_with_cards); + LIST_APPEND(STM_PSEGMENT->old_objects_with_cards, obj); + } /* for sanity, check again that all other segment copies of this object still have the flag (so privatization worked) */ check_flag_write_barrier(obj); } +void _stm_write_slowpath(object_t *obj) +{ + write_slowpath_common(obj, /*mark_card=*/false); +} + +static bool obj_should_use_cards(object_t *obj) +{ + struct object_s *realobj = (struct object_s *) + REAL_ADDRESS(STM_SEGMENT->segment_base, obj); + size_t size = stmcb_size_rounded_up(realobj); + + return (size >= _STM_MIN_CARD_OBJ_SIZE); +} + +void _stm_write_slowpath_card(object_t *obj, uintptr_t index) +{ + /* If CARDS_SET is not set so far, issue a normal write barrier. + If the object is large enough, ask it to set up the object for + card marking instead. + */ + if (!(obj->stm_flags & GCFLAG_CARDS_SET)) { + bool mark_card = obj_should_use_cards(obj); + write_slowpath_common(obj, mark_card); + if (!mark_card) + return; + } + + dprintf_test(("write_slowpath_card %p -> index:%lu\n", + obj, index)); + + /* We reach this point if we have to mark the card. + */ + assert(obj->stm_flags & GCFLAG_WRITE_BARRIER); + assert(obj->stm_flags & GCFLAG_CARDS_SET); + assert(!(obj->stm_flags & GCFLAG_SMALL_UNIFORM)); /* not supported/tested */ + +#ifndef NDEBUG + struct object_s *realobj = (struct object_s *) + REAL_ADDRESS(STM_SEGMENT->segment_base, obj); + size_t size = stmcb_size_rounded_up(realobj); + /* we need at least one lock in addition to the STM-reserved object + write-lock */ + assert(size >= 32); + /* the 'index' must be in range(length-of-obj), but we don't have + a direct way to know the length. We know that it is smaller + than the size in bytes. */ + assert(index < size); +#endif + + /* Write into the card's lock. This is used by the next minor + collection to know what parts of the big object may have changed. + We already own the object here or it is an overflow obj. */ + uintptr_t base_lock_idx = get_write_lock_idx((uintptr_t)obj); + uintptr_t card_lock_idx = base_lock_idx + get_index_to_card_index(index); + write_locks[card_lock_idx] = CARD_MARKED; + + /* More debug checks */ + dprintf(("mark %p index %lu, card:%lu with %d\n", + obj, index, get_index_to_card_index(index), CARD_MARKED)); + assert(IMPLY(IS_OVERFLOW_OBJ(STM_PSEGMENT, obj), + write_locks[base_lock_idx] == 0)); + assert(IMPLY(!IS_OVERFLOW_OBJ(STM_PSEGMENT, obj), + write_locks[base_lock_idx] == STM_PSEGMENT->write_lock_num)); +} + static void reset_transaction_read_version(void) { /* force-reset all read markers to 0 */ @@ -285,6 +403,8 @@ ({ if (was_read_remote(remote_base, item, remote_version)) { /* A write-read conflict! */ + dprintf(("write-read conflict on %p, our seg: %d, other: %ld\n", + item, STM_SEGMENT->segment_num, i)); if (write_read_contention_management(i, item)) { /* If we reach this point, we didn't abort, but we had to wait for the other thread to commit. If we @@ -356,7 +476,214 @@ } } -static void synchronize_object_now(object_t *obj) +static void _page_wise_synchronize_object_now(object_t *obj) +{ + uintptr_t start = (uintptr_t)obj; + uintptr_t first_page = start / 4096UL; + + char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj); + ssize_t obj_size = stmcb_size_rounded_up((struct object_s *)realobj); + assert(obj_size >= 16); + uintptr_t end = start + obj_size; + uintptr_t last_page = (end - 1) / 4096UL; + long i, myself = STM_SEGMENT->segment_num; + + for (; first_page <= last_page; first_page++) { + + uintptr_t copy_size; + if (first_page == last_page) { + /* this is the final fragment */ + copy_size = end - start; + } + else { + /* this is a non-final fragment, going up to the + page's end */ + copy_size = 4096 - (start & 4095); + } + /* double-check that the result fits in one page */ + assert(copy_size > 0); + assert(copy_size + (start & 4095) <= 4096); + + /* First copy the object into the shared page, if needed */ + char *src = REAL_ADDRESS(STM_SEGMENT->segment_base, start); + char *dst = REAL_ADDRESS(stm_object_pages, start); + if (is_private_page(myself, first_page)) { + if (copy_size == 4096) + pagecopy(dst, src); + else + memcpy(dst, src, copy_size); + } + else { + assert(memcmp(dst, src, copy_size) == 0); /* same page */ + } + + for (i = 1; i <= NB_SEGMENTS; i++) { + if (i == myself) + continue; + + /* src = REAL_ADDRESS(stm_object_pages, start); */ + dst = REAL_ADDRESS(get_segment_base(i), start); + if (is_private_page(i, first_page)) { + /* The page is a private page. We need to diffuse this + fragment of object from the shared page to this private + page. */ + if (copy_size == 4096) + pagecopy(dst, src); + else + memcpy(dst, src, copy_size); + } + else { + assert(!memcmp(dst, src, copy_size)); /* same page */ + } + } + + start = (start + 4096) & ~4095; + } +} + +static inline bool _has_private_page_in_range( + long seg_num, uintptr_t start, uintptr_t size) +{ + uintptr_t first_page = start / 4096UL; + uintptr_t last_page = (start + size) / 4096UL; + for (; first_page <= last_page; first_page++) + if (is_private_page(seg_num, first_page)) + return true; + return false; +} + +static void _card_wise_synchronize_object_now(object_t *obj) +{ + assert(obj_should_use_cards(obj)); + assert(!(obj->stm_flags & GCFLAG_CARDS_SET)); + assert(!IS_OVERFLOW_OBJ(STM_PSEGMENT, obj)); + + struct object_s *realobj = (struct object_s *)REAL_ADDRESS(STM_SEGMENT->segment_base, obj); + size_t obj_size = stmcb_size_rounded_up(realobj); + assert(obj_size >= 32); + + uintptr_t first_card_index = get_write_lock_idx((uintptr_t)obj); + uintptr_t card_index = 1; + uintptr_t last_card_index = get_index_to_card_index(obj_size - 1); /* max valid index */ + long i, myself = STM_SEGMENT->segment_num; + + /* simple heuristic to check if probably the whole object is + marked anyway so we should do page-wise synchronize */ + if (write_locks[first_card_index + 1] == CARD_MARKED_OLD + && write_locks[first_card_index + last_card_index] == CARD_MARKED_OLD + && write_locks[first_card_index + (last_card_index >> 1) + 1] == CARD_MARKED_OLD) { + + dprintf(("card_wise_sync assumes %p,size:%lu is fully marked\n", obj, obj_size)); + _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num), + obj, CARD_CLEAR, false); + _page_wise_synchronize_object_now(obj); + return; + } + + dprintf(("card_wise_sync syncs %p,size:%lu card-wise\n", obj, obj_size)); + + /* Combine multiple marked cards and do a memcpy for them. We don't + try yet to use page_copy() or otherwise take into account privatization + of pages (except _has_private_page_in_range) */ + uintptr_t offset_itemsize[2]; + bool all_cards_were_cleared = true; + + uintptr_t start_card_index = -1; + while (card_index <= last_card_index) { + uintptr_t card_lock_idx = first_card_index + card_index; + uint8_t card_value = write_locks[card_lock_idx]; + + if (card_value == CARD_MARKED_OLD) { + write_locks[card_lock_idx] = CARD_CLEAR; + + if (start_card_index == -1) { /* first marked card */ + start_card_index = card_index; + /* start = (uintptr_t)obj + stmcb_index_to_byte_offset( */ + /* realobj, get_card_index_to_index(card_index)); */ + if (all_cards_were_cleared) { + all_cards_were_cleared = false; + stmcb_get_card_base_itemsize(realobj, offset_itemsize); + } + } + } + else { + OPT_ASSERT(card_value == CARD_CLEAR); + } + + if (start_card_index != -1 /* something to copy */ + && (card_value != CARD_MARKED_OLD /* found non-marked card */ + || card_index == last_card_index)) { /* this is the last card */ + /* do the copying: */ + uintptr_t start, copy_size; + uintptr_t next_card_offset; + uintptr_t start_card_offset; + uintptr_t next_card_index = card_index; + + if (card_value == CARD_MARKED_OLD) { + /* card_index is the last card of the object, but we need + to go one further to get the right offset */ + next_card_index++; + } + + start_card_offset = offset_itemsize[0] + + get_card_index_to_index(start_card_index) * offset_itemsize[1]; + + next_card_offset = offset_itemsize[0] + + get_card_index_to_index(next_card_index) * offset_itemsize[1]; + + if (next_card_offset > obj_size) + next_card_offset = obj_size; + + start = (uintptr_t)obj + start_card_offset; + copy_size = next_card_offset - start_card_offset; + OPT_ASSERT(copy_size > 0); + + /* dprintf(("copy %lu bytes\n", copy_size)); */ + + /* since we have marked cards, at least one page here must be private */ + assert(_has_private_page_in_range(myself, start, copy_size)); + + /* copy to shared segment: */ + char *src = REAL_ADDRESS(STM_SEGMENT->segment_base, start); + char *dst = REAL_ADDRESS(stm_object_pages, start); + memcpy(dst, src, copy_size); + + /* copy to other segments */ + for (i = 1; i <= NB_SEGMENTS; i++) { + if (i == myself) + continue; + if (!_has_private_page_in_range(i, start, copy_size)) + continue; + /* src = REAL_ADDRESS(stm_object_pages, start); */ + dst = REAL_ADDRESS(get_segment_base(i), start); + memcpy(dst, src, copy_size); + } + + start_card_index = -1; + } + + card_index++; + } + + if (all_cards_were_cleared) { + /* well, seems like we never called stm_write_card() on it, so actually + we need to fall back to synchronize the whole object */ + _page_wise_synchronize_object_now(obj); + return; + } + +#ifndef NDEBUG + char *src = REAL_ADDRESS(stm_object_pages, (uintptr_t)obj); + char *dst; + for (i = 1; i <= NB_SEGMENTS; i++) { + dst = REAL_ADDRESS(get_segment_base(i), (uintptr_t)obj); + assert(memcmp(dst, src, obj_size) == 0); + } +#endif +} + + +static void synchronize_object_now(object_t *obj, bool ignore_cards) { /* Copy around the version of 'obj' that lives in our own segment. It is first copied into the shared pages, and then into other @@ -368,72 +695,16 @@ assert(obj->stm_flags & GCFLAG_WRITE_BARRIER); assert(STM_PSEGMENT->privatization_lock == 1); - uintptr_t start = (uintptr_t)obj; - uintptr_t first_page = start / 4096UL; + if (obj->stm_flags & GCFLAG_SMALL_UNIFORM) { + assert(!(obj->stm_flags & GCFLAG_CARDS_SET)); + abort();//XXX WRITE THE FAST CASE + } else if (ignore_cards || !obj_should_use_cards(obj)) { + _page_wise_synchronize_object_now(obj); + } else { + _card_wise_synchronize_object_now(obj); + } - if (obj->stm_flags & GCFLAG_SMALL_UNIFORM) { - abort();//XXX WRITE THE FAST CASE - } - else { - char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj); - ssize_t obj_size = stmcb_size_rounded_up((struct object_s *)realobj); - assert(obj_size >= 16); - uintptr_t end = start + obj_size; - uintptr_t last_page = (end - 1) / 4096UL; - long i, myself = STM_SEGMENT->segment_num; - - for (; first_page <= last_page; first_page++) { - - uintptr_t copy_size; - if (first_page == last_page) { - /* this is the final fragment */ - copy_size = end - start; - } - else { - /* this is a non-final fragment, going up to the - page's end */ - copy_size = 4096 - (start & 4095); - } - /* double-check that the result fits in one page */ - assert(copy_size > 0); - assert(copy_size + (start & 4095) <= 4096); - - /* First copy the object into the shared page, if needed */ - char *src = REAL_ADDRESS(STM_SEGMENT->segment_base, start); - char *dst = REAL_ADDRESS(stm_object_pages, start); - if (is_private_page(myself, first_page)) { - if (copy_size == 4096) - pagecopy(dst, src); - else - memcpy(dst, src, copy_size); - } - else { - assert(memcmp(dst, src, copy_size) == 0); /* same page */ - } - - for (i = 1; i <= NB_SEGMENTS; i++) { - if (i == myself) - continue; - - src = REAL_ADDRESS(stm_object_pages, start); - dst = REAL_ADDRESS(get_segment_base(i), start); - if (is_private_page(i, first_page)) { - /* The page is a private page. We need to diffuse this - fragment of object from the shared page to this private - page. */ - if (copy_size == 4096) - pagecopy(dst, src); - else - memcpy(dst, src, copy_size); - } - else { - assert(!memcmp(dst, src, copy_size)); /* same page */ - } - } - - start = (start + 4096) & ~4095; - } - } + _cards_cleared_in_object(get_priv_segment(STM_SEGMENT->segment_num), obj); } static void push_overflow_objects_from_privatized_pages(void) @@ -443,7 +714,7 @@ acquire_privatization_lock(); LIST_FOREACH_R(STM_PSEGMENT->large_overflow_objects, object_t *, - synchronize_object_now(item)); + synchronize_object_now(item, true /*ignore_cards*/)); release_privatization_lock(); } @@ -467,7 +738,7 @@ /* copy the object to the shared page, and to the other private pages as needed */ - synchronize_object_now(item); + synchronize_object_now(item, false); /* don't ignore_cards */ })); release_privatization_lock(); @@ -484,7 +755,9 @@ STM_PSEGMENT->marker_inev[1] = 0; /* reset these lists to NULL for the next transaction */ + _verify_cards_cleared_in_all_lists(get_priv_segment(STM_SEGMENT->segment_num)); LIST_FREE(STM_PSEGMENT->objects_pointing_to_nursery); + list_clear(STM_PSEGMENT->old_objects_with_cards); LIST_FREE(STM_PSEGMENT->large_overflow_objects); timing_end_transaction(attribute_to); @@ -535,6 +808,7 @@ /* synchronize modified old objects to other threads */ push_modified_to_other_segments(); + _verify_cards_cleared_in_all_lists(get_priv_segment(STM_SEGMENT->segment_num)); /* update 'overflow_number' if needed */ if (STM_PSEGMENT->overflow_number_has_been_used) { @@ -594,6 +868,9 @@ ssize_t size = stmcb_size_rounded_up((struct object_s *)src); memcpy(dst, src, size); + if (obj_should_use_cards(item)) + _reset_object_cards(pseg, item, CARD_CLEAR, false); + /* objects in 'modified_old_objects' usually have the WRITE_BARRIER flag, unless they have been modified recently. Ignore the old flag; after copying from the @@ -622,6 +899,10 @@ static void abort_data_structures_from_segment_num(int segment_num) { +#pragma push_macro("STM_PSEGMENT") +#pragma push_macro("STM_SEGMENT") +#undef STM_PSEGMENT +#undef STM_SEGMENT /* This function clears the content of the given segment undergoing an abort. It is called from abort_with_mutex(), but also sometimes from other threads that figure out that this segment should abort. @@ -651,6 +932,7 @@ /* reset all the modified objects (incl. re-adding GCFLAG_WRITE_BARRIER) */ reset_modified_from_other_segments(segment_num); + _verify_cards_cleared_in_all_lists(pseg); /* reset the tl->shadowstack and thread_local_obj to their original value before the transaction start */ @@ -663,8 +945,11 @@ /* reset these lists to NULL too on abort */ LIST_FREE(pseg->objects_pointing_to_nursery); + list_clear(pseg->old_objects_with_cards); LIST_FREE(pseg->large_overflow_objects); list_clear(pseg->young_weakrefs); +#pragma pop_macro("STM_SEGMENT") +#pragma pop_macro("STM_PSEGMENT") } static void abort_with_mutex(void) diff --git a/rpython/translator/stm/src_stm/stm/core.h b/rpython/translator/stm/src_stm/stm/core.h --- a/rpython/translator/stm/src_stm/stm/core.h +++ b/rpython/translator/stm/src_stm/stm/core.h @@ -36,6 +36,8 @@ #define WRITELOCK_START ((END_NURSERY_PAGE * 4096UL) >> 4) #define WRITELOCK_END READMARKER_END +#define CARD_SIZE _STM_CARD_SIZE + enum /* stm_flags */ { /* This flag is set on non-nursery objects. It forces stm_write() to call _stm_write_slowpath(). @@ -55,6 +57,12 @@ after the object. */ GCFLAG_HAS_SHADOW = 0x04, + /* Set on objects that are large enough (_STM_MIN_CARD_OBJ_SIZE) + to have multiple cards (at least _STM_MIN_CARD_COUNT), and that + have at least one card marked. This flag implies + GCFLAG_WRITE_BARRIER. */ + GCFLAG_CARDS_SET = _STM_GCFLAG_CARDS_SET, + /* All remaining bits of the 32-bit 'stm_flags' field are taken by the "overflow number". This is a number that identifies the "overflow objects" from the current transaction among all old @@ -62,7 +70,7 @@ current transaction that have been flushed out of the nursery, which occurs if the same transaction allocates too many objects. */ - GCFLAG_OVERFLOW_NUMBER_bit0 = 0x8 /* must be last */ + GCFLAG_OVERFLOW_NUMBER_bit0 = 0x10 /* must be last */ }; @@ -97,6 +105,10 @@ understood as meaning implicitly "this is the same as 'modified_old_objects'". */ struct list_s *objects_pointing_to_nursery; + /* Like objects_pointing_to_nursery it holds the old objects that + we did a stm_write_card() on. Objects can be in both lists. + It is NULL iff objects_pointing_to_nursery is NULL. */ + struct list_s *old_objects_with_cards; /* List of all large, overflowed objects. Only non-NULL after the current transaction spanned a minor collection. */ @@ -213,9 +225,34 @@ static uint8_t write_locks[WRITELOCK_END - WRITELOCK_START]; +enum /* card values for write_locks */ { + CARD_CLEAR = 0, /* card not used at all */ + CARD_MARKED = 100, /* card marked for tracing in the next gc */ + CARD_MARKED_OLD = 101, /* card was marked before, but cleared + in a GC */ +}; + #define REAL_ADDRESS(segment_base, src) ((segment_base) + (uintptr_t)(src)) +#define IS_OVERFLOW_OBJ(pseg, obj) (((obj)->stm_flags & -GCFLAG_OVERFLOW_NUMBER_bit0) \ + == (pseg)->overflow_number) + +static inline uintptr_t get_index_to_card_index(uintptr_t index) { + return (index / CARD_SIZE) + 1; +} + +static inline uintptr_t get_card_index_to_index(uintptr_t card_index) { + return (card_index - 1) * CARD_SIZE; +} + + +static inline uintptr_t get_write_lock_idx(uintptr_t obj) { + uintptr_t res = (obj >> 4) - WRITELOCK_START; + assert(res < sizeof(write_locks)); + return res; +} + static inline char *get_segment_base(long segment_num) { return stm_object_pages + segment_num * (NB_PAGES * 4096UL); } @@ -258,7 +295,7 @@ } static void copy_object_to_shared(object_t *obj, int source_segment_num); -static void synchronize_object_now(object_t *obj); +static void synchronize_object_now(object_t *obj, bool ignore_cards); static inline void acquire_privatization_lock(void) { diff --git a/rpython/translator/stm/src_stm/stm/gcpage.c b/rpython/translator/stm/src_stm/stm/gcpage.c --- a/rpython/translator/stm/src_stm/stm/gcpage.c +++ b/rpython/translator/stm/src_stm/stm/gcpage.c @@ -167,7 +167,7 @@ static inline uintptr_t mark_loc(object_t *obj) { - uintptr_t lock_idx = (((uintptr_t)obj) >> 4) - WRITELOCK_START; + uintptr_t lock_idx = get_write_lock_idx((uintptr_t)obj); assert(lock_idx < sizeof(write_locks)); return lock_idx; } @@ -441,6 +441,11 @@ static void clean_up_segment_lists(void) { +#pragma push_macro("STM_PSEGMENT") +#pragma push_macro("STM_SEGMENT") +#undef STM_PSEGMENT +#undef STM_SEGMENT + long i; for (i = 1; i <= NB_SEGMENTS; i++) { struct stm_priv_segment_info_s *pseg = get_priv_segment(i); @@ -451,21 +456,54 @@ written to but don't actually point to the nursery. Clear it up and set GCFLAG_WRITE_BARRIER again on the objects. This is the case for transactions where - MINOR_NOTHING_TO_DO() == false + MINOR_NOTHING_TO_DO() == true but they still did write-barriers on objects */ lst = pseg->objects_pointing_to_nursery; if (lst != NULL) { - LIST_FOREACH_R(lst, uintptr_t /*item*/, + LIST_FOREACH_R(lst, object_t* /*item*/, ({ struct object_s *realobj = (struct object_s *) - REAL_ADDRESS(pseg->pub.segment_base, item); + REAL_ADDRESS(pseg->pub.segment_base, (uintptr_t)item); + assert(!(realobj->stm_flags & GCFLAG_WRITE_BARRIER)); + OPT_ASSERT(!(realobj->stm_flags & GCFLAG_CARDS_SET)); + realobj->stm_flags |= GCFLAG_WRITE_BARRIER; + + if (realobj->stm_flags & GCFLAG_CARDS_SET) { + /* we called a normal WB on this object, so all cards + need to be marked OLD */ + if (!IS_OVERFLOW_OBJ(pseg, realobj)) { + _reset_object_cards(pseg, item, CARD_MARKED_OLD, true); /* mark all */ + } else { + /* simply clear overflow */ + _reset_object_cards(pseg, item, CARD_CLEAR, false); + } + } })); list_clear(lst); + } else { + /* if here MINOR_NOTHING_TO_DO() was true before, it's like + we "didn't do a collection" at all. So nothing to do on + modified_old_objs. */ } + lst = pseg->old_objects_with_cards; + LIST_FOREACH_R(lst, object_t* /*item*/, + ({ + struct object_s *realobj = (struct object_s *) + REAL_ADDRESS(pseg->pub.segment_base, item); + OPT_ASSERT(realobj->stm_flags & GCFLAG_CARDS_SET); + OPT_ASSERT(realobj->stm_flags & GCFLAG_WRITE_BARRIER); + + /* clear cards if overflow, or mark marked cards as old otherwise */ + uint8_t mark_value = IS_OVERFLOW_OBJ(pseg, realobj) ? + CARD_CLEAR : CARD_MARKED_OLD; + _reset_object_cards(pseg, item, mark_value, false); + })); + list_clear(lst); + /* Remove from 'large_overflow_objects' all objects that die */ lst = pseg->large_overflow_objects; if (lst != NULL) { @@ -478,6 +516,8 @@ } } } +#pragma pop_macro("STM_SEGMENT") +#pragma pop_macro("STM_PSEGMENT") } static inline bool largemalloc_keep_object_at(char *data) @@ -506,6 +546,20 @@ _stm_largemalloc_sweep(); } +static void assert_cleared_locks(size_t n) +{ +#ifndef NDEBUG + size_t i; + uint8_t *s = write_locks; +# ifndef STM_TESTS + if (n > 5000) n = 5000; +# endif + for (i = 0; i < n; i++) + assert(s[i] == CARD_CLEAR || s[i] == CARD_MARKED + || s[i] == CARD_MARKED_OLD); +#endif +} + static void clean_write_locks(void) { /* the write_locks array, containing the visit marker during @@ -515,7 +569,7 @@ object_t *loc2 = (object_t *)(uninitialized_page_stop - stm_object_pages); uintptr_t lock2_idx = mark_loc(loc2 - 1) + 1; - assert_memset_zero(write_locks, lock2_idx); + assert_cleared_locks(lock2_idx); memset(write_locks + lock2_idx, 0, sizeof(write_locks) - lock2_idx); } diff --git a/rpython/translator/stm/src_stm/stm/misc.c b/rpython/translator/stm/src_stm/stm/misc.c --- a/rpython/translator/stm/src_stm/stm/misc.c +++ b/rpython/translator/stm/src_stm/stm/misc.c @@ -41,6 +41,12 @@ return (obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) == 0; } + +bool _stm_was_written_card(object_t *obj) +{ + return obj->stm_flags & _STM_GCFLAG_CARDS_SET; +} + #ifdef STM_TESTS uintptr_t _stm_get_private_page(uintptr_t pagenum) { @@ -62,6 +68,13 @@ return list_count(STM_PSEGMENT->objects_pointing_to_nursery); } +long _stm_count_old_objects_with_cards(void) +{ + if (STM_PSEGMENT->old_objects_with_cards == NULL) + return -1; + return list_count(STM_PSEGMENT->old_objects_with_cards); +} + object_t *_stm_enum_modified_old_objects(long index) { return (object_t *)list_item( @@ -74,6 +87,12 @@ STM_PSEGMENT->objects_pointing_to_nursery, index); } +object_t *_stm_enum_old_objects_with_cards(long index) +{ + return (object_t *)list_item( + STM_PSEGMENT->old_objects_with_cards, index); +} + uint64_t _stm_total_allocated(void) { return increment_total_allocated(0); diff --git a/rpython/translator/stm/src_stm/stm/nursery.c b/rpython/translator/stm/src_stm/stm/nursery.c --- a/rpython/translator/stm/src_stm/stm/nursery.c +++ b/rpython/translator/stm/src_stm/stm/nursery.c @@ -66,6 +66,8 @@ object_t *obj = *pobj; object_t *nobj; uintptr_t nobj_sync_now; + char *realobj; + size_t size; if (obj == NULL) return; @@ -76,8 +78,6 @@ to GCWORD_MOVED. In that case, the forwarding location, i.e. where the object moved to, is stored in the second word in 'obj'. */ object_t *TLPREFIX *pforwarded_array = (object_t *TLPREFIX *)obj; - char *realobj; - size_t size; if (obj->stm_flags & GCFLAG_HAS_SHADOW) { /* ^^ the single check above detects both already-moved objects @@ -150,6 +150,7 @@ /* Must trace the object later */ LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, nobj_sync_now); + _cards_cleared_in_object(get_priv_segment(STM_SEGMENT->segment_num), nobj); } static void collect_roots_in_nursery(void) @@ -184,30 +185,217 @@ minor_trace_if_young(&tl->thread_local_obj); } -static inline void _collect_now(object_t *obj) +static void _cards_cleared_in_object(struct stm_priv_segment_info_s *pseg, object_t *obj) +{ +#ifndef NDEBUG + struct object_s *realobj = (struct object_s *)REAL_ADDRESS(pseg->pub.segment_base, obj); + size_t size = stmcb_size_rounded_up(realobj); + + if (size < _STM_MIN_CARD_OBJ_SIZE) + return; /* too small for cards */ + + uintptr_t first_card_index = get_write_lock_idx((uintptr_t)obj); + uintptr_t card_index = 1; + uintptr_t last_card_index = get_index_to_card_index(size - 1); /* max valid index */ + + OPT_ASSERT(write_locks[first_card_index] <= NB_SEGMENTS_MAX + || write_locks[first_card_index] == 255); /* see gcpage.c */ + while (card_index <= last_card_index) { + uintptr_t card_lock_idx = first_card_index + card_index; + assert(write_locks[card_lock_idx] == CARD_CLEAR); + card_index++; + } + + assert(!(realobj->stm_flags & GCFLAG_CARDS_SET)); +#endif +} + +static void _verify_cards_cleared_in_all_lists(struct stm_priv_segment_info_s *pseg) +{ +#ifndef NDEBUG + LIST_FOREACH_R( + pseg->modified_old_objects, object_t * /*item*/, + _cards_cleared_in_object(pseg, item)); + + if (pseg->large_overflow_objects) { + LIST_FOREACH_R( + pseg->large_overflow_objects, object_t * /*item*/, + _cards_cleared_in_object(pseg, item)); + } + if (pseg->objects_pointing_to_nursery) { + LIST_FOREACH_R( + pseg->objects_pointing_to_nursery, object_t * /*item*/, + _cards_cleared_in_object(pseg, item)); + } + LIST_FOREACH_R( + pseg->old_objects_with_cards, object_t * /*item*/, + _cards_cleared_in_object(pseg, item)); +#endif +} + +static void _reset_object_cards(struct stm_priv_segment_info_s *pseg, + object_t *obj, uint8_t mark_value, + bool mark_all) +{ +#pragma push_macro("STM_PSEGMENT") +#pragma push_macro("STM_SEGMENT") +#undef STM_PSEGMENT +#undef STM_SEGMENT + struct object_s *realobj = (struct object_s *)REAL_ADDRESS(pseg->pub.segment_base, obj); + size_t size = stmcb_size_rounded_up(realobj); + + OPT_ASSERT(size >= _STM_MIN_CARD_OBJ_SIZE); + assert(IMPLY(mark_value == CARD_CLEAR, !mark_all)); /* not necessary */ + assert(IMPLY(mark_all, mark_value == CARD_MARKED_OLD)); /* set *all* to OLD */ + assert(IMPLY(IS_OVERFLOW_OBJ(pseg, realobj), + mark_value == CARD_CLEAR)); /* overflows are always CLEARed */ + + uintptr_t first_card_index = get_write_lock_idx((uintptr_t)obj); + uintptr_t card_index = 1; + uintptr_t last_card_index = get_index_to_card_index(size - 1); /* max valid index */ + + OPT_ASSERT(write_locks[first_card_index] <= NB_SEGMENTS + || write_locks[first_card_index] == 255); /* see gcpage.c */ + + dprintf(("mark cards of %p, size %lu with %d, all: %d\n", + obj, size, mark_value, mark_all)); + dprintf(("obj has %lu cards\n", last_card_index)); + while (card_index <= last_card_index) { + uintptr_t card_lock_idx = first_card_index + card_index; + + if (mark_all || write_locks[card_lock_idx] != CARD_CLEAR) { + /* dprintf(("mark card %lu,wl:%lu of %p with %d\n", */ + /* card_index, card_lock_idx, obj, mark_value)); */ + write_locks[card_lock_idx] = mark_value; + } + card_index++; + } + + realobj->stm_flags &= ~GCFLAG_CARDS_SET; + +#pragma pop_macro("STM_SEGMENT") +#pragma pop_macro("STM_PSEGMENT") +} + + +static void _trace_card_object(object_t *obj) +{ + assert(!_is_in_nursery(obj)); + assert(obj->stm_flags & GCFLAG_CARDS_SET); + assert(obj->stm_flags & GCFLAG_WRITE_BARRIER); + + dprintf(("_trace_card_object(%p)\n", obj)); + bool obj_is_overflow = IS_OVERFLOW_OBJ(STM_PSEGMENT, obj); + uint8_t mark_value = obj_is_overflow ? CARD_CLEAR : CARD_MARKED_OLD; + + struct object_s *realobj = (struct object_s *)REAL_ADDRESS(STM_SEGMENT->segment_base, obj); + size_t size = stmcb_size_rounded_up(realobj); + + uintptr_t first_card_index = get_write_lock_idx((uintptr_t)obj); + uintptr_t card_index = 1; + uintptr_t last_card_index = get_index_to_card_index(size - 1); /* max valid index */ + + OPT_ASSERT(write_locks[first_card_index] <= NB_SEGMENTS_MAX + || write_locks[first_card_index] == 255); /* see gcpage.c */ + + /* XXX: merge ranges */ + while (card_index <= last_card_index) { + uintptr_t card_lock_idx = first_card_index + card_index; + if (write_locks[card_lock_idx] == CARD_MARKED) { + /* clear or set to old: */ + write_locks[card_lock_idx] = mark_value; + + uintptr_t start = get_card_index_to_index(card_index); + uintptr_t stop = get_card_index_to_index(card_index + 1); + + dprintf(("trace_cards on %p with start:%lu stop:%lu\n", + obj, start, stop)); + stmcb_trace_cards(realobj, &minor_trace_if_young, + start, stop); + + } + + /* all cards should be cleared on overflow objs */ + assert(IMPLY(obj_is_overflow, + write_locks[card_lock_idx] == CARD_CLEAR)); + + card_index++; + } + obj->stm_flags &= ~GCFLAG_CARDS_SET; +} + + + +static inline void _collect_now(object_t *obj, bool was_definitely_young) { assert(!_is_young(obj)); - /* We must not have GCFLAG_WRITE_BARRIER so far. Add it now. */ - assert(!(obj->stm_flags & GCFLAG_WRITE_BARRIER)); - obj->stm_flags |= GCFLAG_WRITE_BARRIER; + dprintf(("_collect_now: %p\n", obj)); - /* Trace the 'obj' to replace pointers to nursery with pointers - outside the nursery, possibly forcing nursery objects out and - adding them to 'objects_pointing_to_nursery' as well. */ - char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj); - stmcb_trace((struct object_s *)realobj, &minor_trace_if_young); + if (!(obj->stm_flags & GCFLAG_WRITE_BARRIER)) { + /* Trace the 'obj' to replace pointers to nursery with pointers + outside the nursery, possibly forcing nursery objects out and + adding them to 'objects_pointing_to_nursery' as well. */ + char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj); + stmcb_trace((struct object_s *)realobj, &minor_trace_if_young); + + obj->stm_flags |= GCFLAG_WRITE_BARRIER; + if (obj->stm_flags & GCFLAG_CARDS_SET) { + /* all objects that had WB cleared need to be fully synchronised + on commit, so we have to mark all their cards */ + struct stm_priv_segment_info_s *pseg = get_priv_segment( + STM_SEGMENT->segment_num); + + /* stm_wb-slowpath should never have triggered for young objs */ + assert(!was_definitely_young); + + if (!IS_OVERFLOW_OBJ(STM_PSEGMENT, obj)) { + _reset_object_cards(pseg, obj, CARD_MARKED_OLD, true); /* mark all */ + } else { + /* simply clear overflow */ + _reset_object_cards(pseg, obj, CARD_CLEAR, false); + } + } + } + /* else traced in collect_cardrefs_to_nursery if necessary */ +} + + +static void collect_cardrefs_to_nursery(void) +{ + dprintf(("collect_cardrefs_to_nursery\n")); + struct list_s *lst = STM_PSEGMENT->old_objects_with_cards; + + while (!list_is_empty(lst)) { + object_t *obj = (object_t*)list_pop_item(lst); + + assert(!_is_young(obj)); + + if (!(obj->stm_flags & GCFLAG_CARDS_SET)) { + /* handled in _collect_now() */ + continue; + } + + /* traces cards, clears marked cards or marks them old if + necessary */ + _trace_card_object(obj); + + assert(!(obj->stm_flags & GCFLAG_CARDS_SET)); + } } static void collect_oldrefs_to_nursery(void) { + dprintf(("collect_oldrefs_to_nursery\n")); struct list_s *lst = STM_PSEGMENT->objects_pointing_to_nursery; while (!list_is_empty(lst)) { uintptr_t obj_sync_now = list_pop_item(lst); object_t *obj = (object_t *)(obj_sync_now & ~FLAG_SYNC_LARGE); - _collect_now(obj); + bool was_definitely_young = (obj_sync_now & FLAG_SYNC_LARGE); + _collect_now(obj, was_definitely_young); + assert(!(obj->stm_flags & GCFLAG_CARDS_SET)); if (obj_sync_now & FLAG_SYNC_LARGE) { /* this was a large object. We must either synchronize the @@ -215,13 +403,15 @@ WRITE_BARRIER flag and traced into it to fix its content); or add the object to 'large_overflow_objects'. */ + struct stm_priv_segment_info_s *pseg = get_priv_segment(STM_SEGMENT->segment_num); if (STM_PSEGMENT->minor_collect_will_commit_now) { acquire_privatization_lock(); - synchronize_object_now(obj); + synchronize_object_now(obj, true); /* ignore cards! */ release_privatization_lock(); } else { LIST_APPEND(STM_PSEGMENT->large_overflow_objects, obj); } + _cards_cleared_in_object(pseg, obj); } /* the list could have moved while appending */ @@ -231,12 +421,15 @@ static void collect_modified_old_objects(void) { - LIST_FOREACH_R(STM_PSEGMENT->modified_old_objects, object_t * /*item*/, - _collect_now(item)); + dprintf(("collect_modified_old_objects\n")); + LIST_FOREACH_R( + STM_PSEGMENT->modified_old_objects, object_t * /*item*/, + _collect_now(item, false)); } static void collect_roots_from_markers(uintptr_t num_old) { + dprintf(("collect_roots_from_markers\n")); /* visit the marker objects */ struct list_s *mlst = STM_PSEGMENT->modified_old_objects_markers; STM_PSEGMENT->modified_old_objects_markers_num_old = list_count(mlst); @@ -255,6 +448,11 @@ static size_t throw_away_nursery(struct stm_priv_segment_info_s *pseg) { +#pragma push_macro("STM_PSEGMENT") +#pragma push_macro("STM_SEGMENT") +#undef STM_PSEGMENT +#undef STM_SEGMENT + dprintf(("throw_away_nursery\n")); /* reset the nursery by zeroing it */ size_t nursery_used; char *realnursery; @@ -280,10 +478,11 @@ wlog_t *item; TREE_LOOP_FORWARD(*pseg->young_outside_nursery, item) { - assert(!_is_in_nursery((object_t *)item->addr)); + object_t *obj = (object_t*)item->addr; + /* mark slot as unread */ ((struct stm_read_marker_s *) - (pseg->pub.segment_base + (item->addr >> 4)))->rm = 0; + (pseg->pub.segment_base + (((uintptr_t)obj) >> 4)))->rm = 0; _stm_large_free(stm_object_pages + item->addr); } TREE_LOOP_END; @@ -292,7 +491,29 @@ } tree_clear(pseg->nursery_objects_shadows); + + + /* modified_old_objects' cards get cleared in push_modified_to_other_segments + or reset_modified_from_other_segments. Objs in old_objs_with_cards but not + in modified_old_objs are overflow objects and handled here: */ + if (pseg->large_overflow_objects != NULL) { + /* some overflow objects may have cards when aborting, clear them too */ + LIST_FOREACH_R(pseg->large_overflow_objects, object_t * /*item*/, + { + struct object_s *realobj = (struct object_s *) + REAL_ADDRESS(pseg->pub.segment_base, item); + + if (realobj->stm_flags & GCFLAG_CARDS_SET) { + /* CARDS_SET is enough since other HAS_CARDS objs + are already cleared */ + _reset_object_cards(pseg, item, CARD_CLEAR, false); + } + }); + } + return nursery_used; +#pragma pop_macro("STM_SEGMENT") +#pragma pop_macro("STM_PSEGMENT") } #define MINOR_NOTHING_TO_DO(pseg) \ @@ -354,7 +575,9 @@ collect_roots_in_nursery(); + collect_cardrefs_to_nursery(); collect_oldrefs_to_nursery(); + assert(list_is_empty(STM_PSEGMENT->old_objects_with_cards)); /* now all surviving nursery objects have been moved out */ stm_move_young_weakrefs(); @@ -428,6 +651,7 @@ char *result = allocate_outside_nursery_large(size_rounded_up); object_t *o = (object_t *)(result - stm_object_pages); + tree_insert(STM_PSEGMENT->young_outside_nursery, (uintptr_t)o, 0); memset(REAL_ADDRESS(STM_SEGMENT->segment_base, o), 0, size_rounded_up); @@ -529,6 +753,7 @@ memcpy(realnobj, realobj, size); obj->stm_flags |= GCFLAG_HAS_SHADOW; + tree_insert(STM_PSEGMENT->nursery_objects_shadows, (uintptr_t)obj, (uintptr_t)nobj); return nobj; diff --git a/rpython/translator/stm/src_stm/stm/nursery.h b/rpython/translator/stm/src_stm/stm/nursery.h --- a/rpython/translator/stm/src_stm/stm/nursery.h +++ b/rpython/translator/stm/src_stm/stm/nursery.h @@ -7,6 +7,10 @@ static uint32_t highest_overflow_number; +static void _cards_cleared_in_object(struct stm_priv_segment_info_s *pseg, object_t *obj); +static void _reset_object_cards(struct stm_priv_segment_info_s *pseg, + object_t *obj, uint8_t mark_value, + bool mark_all); static void minor_collection(bool commit); static void check_nursery_at_transaction_start(void); static size_t throw_away_nursery(struct stm_priv_segment_info_s *pseg); diff --git a/rpython/translator/stm/src_stm/stm/setup.c b/rpython/translator/stm/src_stm/stm/setup.c --- a/rpython/translator/stm/src_stm/stm/setup.c +++ b/rpython/translator/stm/src_stm/stm/setup.c @@ -84,6 +84,7 @@ { /* Check that some values are acceptable */ assert(NB_SEGMENTS <= NB_SEGMENTS_MAX); + assert(CARD_SIZE >= 32 && CARD_SIZE % 16 == 0); assert(4096 <= ((uintptr_t)STM_SEGMENT)); assert((uintptr_t)STM_SEGMENT == (uintptr_t)STM_PSEGMENT); assert(((uintptr_t)STM_PSEGMENT) + sizeof(*STM_PSEGMENT) <= 8192); @@ -118,6 +119,7 @@ pr->pub.segment_num = i; pr->pub.segment_base = segment_base; pr->objects_pointing_to_nursery = NULL; + pr->old_objects_with_cards = list_create(); pr->large_overflow_objects = NULL; pr->modified_old_objects = list_create(); pr->modified_old_objects_markers = list_create(); @@ -157,6 +159,7 @@ for (i = 1; i <= NB_SEGMENTS; i++) { struct stm_priv_segment_info_s *pr = get_priv_segment(i); assert(pr->objects_pointing_to_nursery == NULL); + list_free(pr->old_objects_with_cards); assert(pr->large_overflow_objects == NULL); list_free(pr->modified_old_objects); list_free(pr->modified_old_objects_markers); diff --git a/rpython/translator/stm/src_stm/stmgc.h b/rpython/translator/stm/src_stm/stmgc.h --- a/rpython/translator/stm/src_stm/stmgc.h +++ b/rpython/translator/stm/src_stm/stmgc.h @@ -108,6 +108,7 @@ /* this should use llvm's coldcc calling convention, but it's not exposed to C code so far */ void _stm_write_slowpath(object_t *); +void _stm_write_slowpath_card(object_t *, uintptr_t); object_t *_stm_allocate_slowpath(ssize_t); object_t *_stm_allocate_external(ssize_t); void _stm_become_inevitable(const char*); @@ -121,6 +122,7 @@ #include bool _stm_was_read(object_t *obj); bool _stm_was_written(object_t *obj); +bool _stm_was_written_card(object_t *obj); uintptr_t _stm_get_private_page(uintptr_t pagenum); bool _stm_in_transaction(stm_thread_local_t *tl); char *_stm_get_segment_base(long index); @@ -138,12 +140,18 @@ void _stm_set_nursery_free_count(uint64_t free_count); long _stm_count_modified_old_objects(void); long _stm_count_objects_pointing_to_nursery(void); +long _stm_count_old_objects_with_cards(void); object_t *_stm_enum_modified_old_objects(long index); object_t *_stm_enum_objects_pointing_to_nursery(long index); +object_t *_stm_enum_old_objects_with_cards(long index); uint64_t _stm_total_allocated(void); #endif #define _STM_GCFLAG_WRITE_BARRIER 0x01 +#define _STM_GCFLAG_CARDS_SET 0x08 +#define _STM_CARD_SIZE 32 /* must be >= 32 */ +#define _STM_MIN_CARD_COUNT 17 +#define _STM_MIN_CARD_OBJ_SIZE (_STM_CARD_SIZE * _STM_MIN_CARD_COUNT) #define _STM_NSE_SIGNAL_MAX _STM_TIME_N #define _STM_FAST_ALLOC (66*1024) @@ -214,6 +222,20 @@ _stm_write_slowpath(obj); } +/* The following is a GC-optimized barrier that works on the granularity + of CARD_SIZE. It can be used on any array object, but it is only + useful with those that were internally marked with GCFLAG_HAS_CARDS. + It has the same purpose as stm_write() for TM. + 'index' is the array-item-based position within the object, which + is measured in units returned by stmcb_get_card_base_itemsize(). +*/ +__attribute__((always_inline)) +static inline void stm_write_card(object_t *obj, uintptr_t index) +{ + if (UNLIKELY((obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) != 0)) + _stm_write_slowpath_card(obj, index); +} + /* Must be provided by the user of this library. The "size rounded up" must be a multiple of 8 and at least 16. "Tracing" an object means enumerating all GC references in it, @@ -224,6 +246,16 @@ */ extern ssize_t stmcb_size_rounded_up(struct object_s *); extern void stmcb_trace(struct object_s *, void (object_t **)); +/* a special trace-callback that is only called for the marked + ranges of indices (using stm_write_card(o, index)) */ +extern void stmcb_trace_cards(struct object_s *, void (object_t **), + uintptr_t start, uintptr_t stop); +/* this function will be called on objects that support cards. + It returns the base_offset (in bytes) inside the object from + where the indices start, and item_size (in bytes) for the size of + one item */ +extern void stmcb_get_card_base_itemsize(struct object_s *, + uintptr_t offset_itemsize[2]); extern void stmcb_commit_soon(void); @@ -249,6 +281,7 @@ return (object_t *)p; } + /* Allocate a weakref object. Weakref objects have a reference to an object at the byte-offset stmcb_size_rounded_up(obj) - sizeof(void*) From noreply at buildbot.pypy.org Mon Jun 30 13:04:54 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 13:04:54 +0200 (CEST) Subject: [pypy-commit] pypy stmgc-c7: in-progress Message-ID: <20140630110454.8C5541D3531@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stmgc-c7 Changeset: r72283:ee9beb2782fe Date: 2014-06-30 13:04 +0200 http://bitbucket.org/pypy/pypy/changeset/ee9beb2782fe/ Log: in-progress diff --git a/rpython/memory/gc/stmgc.py b/rpython/memory/gc/stmgc.py --- a/rpython/memory/gc/stmgc.py +++ b/rpython/memory/gc/stmgc.py @@ -43,6 +43,14 @@ def get_type_id(self, obj): return llop.stm_addr_get_tid(llgroup.HALFWORD, obj) + def get_card_base_itemsize(self, obj, offset_itemsize): + typeid = self.get_type_id(obj) + assert self.is_varsize(typeid) + ofs = self.fixed_size(typeid) + isz = self.varsize_item_sizes(typeid) + offset_itemsize[0] = rffi.cast(lltype.Unsigned, ofs) + offset_itemsize[1] = rffi.cast(lltype.Unsigned, isz) + def setup(self): # Hack: MovingGCBase.setup() sets up stuff related to id(), which # we implement differently anyway. So directly call GCBase.setup(). diff --git a/rpython/memory/gctransform/stmframework.py b/rpython/memory/gctransform/stmframework.py --- a/rpython/memory/gctransform/stmframework.py +++ b/rpython/memory/gctransform/stmframework.py @@ -32,6 +32,16 @@ getfn(pypy_stmcb_trace, [llannotation.SomeAddress(), llannotation.SomePtr(GCClass.VISIT_FPTR)], annmodel.s_None)) + # + def pypy_stmcb_get_card_base_itemsize(obj, offset_itemsize): + gc.get_card_base_itemsize(obj, offset_itemsize) + pypy_stmcb_get_card_base_itemsize.c_name = ( + "pypy_stmcb_get_card_base_itemsize") + self.autoregister_ptrs.append( + getfn(pypy_stmcb_get_card_base_itemsize, + [llannotation.SomeAddress(), + llannotation.SomePtr(rffi.CArrayPtr(lltype.Unsigned))], + annmodel.s_None)) def build_root_walker(self): return StmRootWalker(self) @@ -87,6 +97,11 @@ if var_needsgc(hop.spaceop.args[-1]): raise Exception("in stm_ignored block: write of a gc " "pointer") + elif self._set_into_gc_array_part(hop.spaceop) is not None: + self.write_barrier_from_array_calls += 1 + v_index = self._set_into_gc_array_part(hop.spaceop) + assert v_index.concretetype == lltype.Signed + hop.genop("stm_write", [v_struct, v_index]) else: self.write_barrier_calls += 1 hop.genop("stm_write", [v_struct]) diff --git a/rpython/translator/stm/funcgen.py b/rpython/translator/stm/funcgen.py --- a/rpython/translator/stm/funcgen.py +++ b/rpython/translator/stm/funcgen.py @@ -62,7 +62,11 @@ assert isinstance(op.args[0].concretetype, lltype.Ptr) assert op.args[0].concretetype.TO._gckind == 'gc' arg0 = funcgen.expr(op.args[0]) - return 'stm_write((object_t *)%s);' % (arg0,) + if len(op.args) == 1: + return 'stm_write((object_t *)%s);' % (arg0,) + else: + arg1 = funcgen.expr(op.args[1]) + return 'stm_write_card((object_t *)%s, %s);' % (arg0, arg1) def stm_can_move(funcgen, op): arg0 = funcgen.expr(op.args[0]) diff --git a/rpython/translator/stm/src_stm/stmgcintf.c b/rpython/translator/stm/src_stm/stmgcintf.c --- a/rpython/translator/stm/src_stm/stmgcintf.c +++ b/rpython/translator/stm/src_stm/stmgcintf.c @@ -10,6 +10,7 @@ __thread uintptr_t pypy_stm_nursery_low_fill_mark_saved; extern Signed pypy_stmcb_size_rounded_up(void*); +extern void pypy_stmcb_get_card_base_itemsize(void*, uintptr_t[]); extern void pypy_stmcb_trace(void*, void(*)(void*)); inline ssize_t stmcb_size_rounded_up(struct object_s *obj) { @@ -19,6 +20,11 @@ return result; } +inline void stmcb_get_card_base_itemsize(struct object_s *obj, + uintptr_t offset_itemsize[2]) { + pypy_stmcb_get_card_base_itemsize(obj, offset_itemsize); +} + inline void stmcb_trace(struct object_s *obj, void visit(object_t **)) { pypy_stmcb_trace(obj, (void(*)(void*))visit); } diff --git a/rpython/translator/stm/test/test_ztranslated.py b/rpython/translator/stm/test/test_ztranslated.py --- a/rpython/translator/stm/test/test_ztranslated.py +++ b/rpython/translator/stm/test/test_ztranslated.py @@ -451,6 +451,20 @@ data = cbuilder.cmdexec('') assert 'did not crash 84\n' in data + def test_stm_write_card(self): + lst = [0] * 100 + def main(argv): + lst[42] = 43 + print 'did not crash', lst[42] + return 0 + + t, cbuilder = self.compile(main) + first_op = t.graphs[0].startblock.operations[0] + assert first_op.opnames == opnames[0] == 'stm_write' + assert first_op.args[1].value == 42 + data = cbuilder.cmdexec('') + assert 'did not crash 43\n' in data + def test_float_inf_nan_in_struct(self): mylist = [float("inf"), float("-inf"), float("nan")] def main(argv): From noreply at buildbot.pypy.org Mon Jun 30 13:11:39 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 13:11:39 +0200 (CEST) Subject: [pypy-commit] pypy stmgc-c7: Integrate stm_write_card() inside PyPy Message-ID: <20140630111139.85C751C024A@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stmgc-c7 Changeset: r72284:293e18aefb23 Date: 2014-06-30 13:10 +0200 http://bitbucket.org/pypy/pypy/changeset/293e18aefb23/ Log: Integrate stm_write_card() inside PyPy diff --git a/rpython/memory/gctransform/stmframework.py b/rpython/memory/gctransform/stmframework.py --- a/rpython/memory/gctransform/stmframework.py +++ b/rpython/memory/gctransform/stmframework.py @@ -33,6 +33,17 @@ llannotation.SomePtr(GCClass.VISIT_FPTR)], annmodel.s_None)) # + def pypy_stmcb_trace_cards(obj, visit_fn, start, stop): + gc.trace_partial(obj, start, stop, invokecallback, visit_fn) + pypy_stmcb_trace_cards.c_name = "pypy_stmcb_trace_cards" + self.autoregister_ptrs.append( + getfn(pypy_stmcb_trace_cards, + [llannotation.SomeAddress(), + llannotation.SomePtr(GCClass.VISIT_FPTR), + annmodel.s_Int, + annmodel.s_Int], + annmodel.s_None)) + # def pypy_stmcb_get_card_base_itemsize(obj, offset_itemsize): gc.get_card_base_itemsize(obj, offset_itemsize) pypy_stmcb_get_card_base_itemsize.c_name = ( diff --git a/rpython/translator/stm/src_stm/stmgcintf.c b/rpython/translator/stm/src_stm/stmgcintf.c --- a/rpython/translator/stm/src_stm/stmgcintf.c +++ b/rpython/translator/stm/src_stm/stmgcintf.c @@ -12,6 +12,7 @@ extern Signed pypy_stmcb_size_rounded_up(void*); extern void pypy_stmcb_get_card_base_itemsize(void*, uintptr_t[]); extern void pypy_stmcb_trace(void*, void(*)(void*)); +extern void pypy_stmcb_trace_cards(void*, void(*)(void*), uintptr_t, uintptr_t); inline ssize_t stmcb_size_rounded_up(struct object_s *obj) { ssize_t result = pypy_stmcb_size_rounded_up(obj); @@ -29,6 +30,11 @@ pypy_stmcb_trace(obj, (void(*)(void*))visit); } +inline void stmcb_trace_cards(struct object_s *obj, void visit(object_t **), + uintptr_t start, uintptr_t stop) { + pypy_stmcb_trace_cards(obj, (void(*)(void*))visit, start, stop); +} + inline void stmcb_commit_soon() { if (pypy_stm_nursery_low_fill_mark == (uintptr_t)-1) { diff --git a/rpython/translator/stm/test/test_ztranslated.py b/rpython/translator/stm/test/test_ztranslated.py --- a/rpython/translator/stm/test/test_ztranslated.py +++ b/rpython/translator/stm/test/test_ztranslated.py @@ -452,7 +452,8 @@ assert 'did not crash 84\n' in data def test_stm_write_card(self): - lst = [0] * 100 + LST = lltype.GcArray(lltype.Signed) + lst = lltype.malloc(LST, 100, immortal=True) def main(argv): lst[42] = 43 print 'did not crash', lst[42] @@ -460,7 +461,7 @@ t, cbuilder = self.compile(main) first_op = t.graphs[0].startblock.operations[0] - assert first_op.opnames == opnames[0] == 'stm_write' + assert first_op.opname == 'stm_write' assert first_op.args[1].value == 42 data = cbuilder.cmdexec('') assert 'did not crash 43\n' in data From noreply at buildbot.pypy.org Mon Jun 30 13:20:03 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 13:20:03 +0200 (CEST) Subject: [pypy-commit] pypy stmgc-c7: Test and fix Message-ID: <20140630112003.B34A11C024A@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stmgc-c7 Changeset: r72285:dfd595aa55f9 Date: 2014-06-30 13:18 +0200 http://bitbucket.org/pypy/pypy/changeset/dfd595aa55f9/ Log: Test and fix diff --git a/rpython/memory/gctransform/stmframework.py b/rpython/memory/gctransform/stmframework.py --- a/rpython/memory/gctransform/stmframework.py +++ b/rpython/memory/gctransform/stmframework.py @@ -34,6 +34,8 @@ annmodel.s_None)) # def pypy_stmcb_trace_cards(obj, visit_fn, start, stop): + if not gc.has_gcptr_in_varsize(gc.get_type_id(obj)): + return # there are cards, but they don't need tracing gc.trace_partial(obj, start, stop, invokecallback, visit_fn) pypy_stmcb_trace_cards.c_name = "pypy_stmcb_trace_cards" self.autoregister_ptrs.append( diff --git a/rpython/translator/stm/test/test_ztranslated.py b/rpython/translator/stm/test/test_ztranslated.py --- a/rpython/translator/stm/test/test_ztranslated.py +++ b/rpython/translator/stm/test/test_ztranslated.py @@ -456,6 +456,8 @@ lst = lltype.malloc(LST, 100, immortal=True) def main(argv): lst[42] = 43 + llop.stm_commit_if_not_atomic(lltype.Void) + llop.stm_start_inevitable_if_not_atomic(lltype.Void) print 'did not crash', lst[42] return 0 From noreply at buildbot.pypy.org Mon Jun 30 14:03:58 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 14:03:58 +0200 (CEST) Subject: [pypy-commit] pypy default: Turn off this warning Message-ID: <20140630120358.BA8351D2E50@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72286:25d2e2050596 Date: 2014-06-30 14:03 +0200 http://bitbucket.org/pypy/pypy/changeset/25d2e2050596/ Log: Turn off this warning diff --git a/rpython/translator/backendopt/merge_if_blocks.py b/rpython/translator/backendopt/merge_if_blocks.py --- a/rpython/translator/backendopt/merge_if_blocks.py +++ b/rpython/translator/backendopt/merge_if_blocks.py @@ -37,8 +37,10 @@ default.args = [get_new_arg(arg) for arg in default.args] for block, case in chain: if case.value in values: - log.WARNING("unreachable code with value %r in graph %s" % ( - case.value, graph)) + # - ignore silently: it occurs in platform-dependent + # chains of tests, for example + #log.WARNING("unreachable code with value %r in graph %s" % ( + # case.value, graph)) continue values[case.value] = True link = block.exits[1] From noreply at buildbot.pypy.org Mon Jun 30 15:11:57 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 15:11:57 +0200 (CEST) Subject: [pypy-commit] cffi default: Attempt to copy ctypes' name-mangling capability "_name@num". Message-ID: <20140630131157.293871C33F0@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r1532:f2712a617bfa Date: 2014-06-30 15:10 +0200 http://bitbucket.org/cffi/cffi/changeset/f2712a617bfa/ Log: Attempt to copy ctypes' name-mangling capability "_name at num". diff --git a/c/misc_win32.h b/c/misc_win32.h --- a/c/misc_win32.h +++ b/c/misc_win32.h @@ -192,7 +192,27 @@ static void *dlsym(void *handle, const char *symbol) { - return GetProcAddress((HMODULE)handle, symbol); + void *address = GetProcAddress((HMODULE)handle, symbol); +#ifndef MS_WIN64 + if (!address) { + /* If 'symbol' is not found, then try '_symbol at N' for N in + (0, 4, 8, 12, ..., 124). Unlike ctypes, we try to do that + for any symbol, although in theory it should only be done + for __stdcall functions. + */ + int i; + char *mangled_name = alloca(1 + strlen(symbol) + 1 + 3 + 1); + if (!mangled_name) + return NULL; + for (i = 0; i < 32; i++) { + sprintf(mangled_name, "_%s@%d", symbol, i * 4); + address = GetProcAddress((HMODULE)handle, mangled_name); + if (address) + break; + } + } +#endif + return address; } static void dlclose(void *handle) From noreply at buildbot.pypy.org Mon Jun 30 15:32:08 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 15:32:08 +0200 (CEST) Subject: [pypy-commit] pypy stmgc-c7: Test and fix Message-ID: <20140630133208.CABCD1D2E50@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stmgc-c7 Changeset: r72287:6e6305a37a7b Date: 2014-06-30 15:29 +0200 http://bitbucket.org/pypy/pypy/changeset/6e6305a37a7b/ Log: Test and fix diff --git a/rpython/memory/gctransform/stmframework.py b/rpython/memory/gctransform/stmframework.py --- a/rpython/memory/gctransform/stmframework.py +++ b/rpython/memory/gctransform/stmframework.py @@ -34,8 +34,11 @@ annmodel.s_None)) # def pypy_stmcb_trace_cards(obj, visit_fn, start, stop): - if not gc.has_gcptr_in_varsize(gc.get_type_id(obj)): + typeid = gc.get_type_id(obj) + if not gc.has_gcptr_in_varsize(typeid): return # there are cards, but they don't need tracing + length = (obj + gc.varsize_offset_to_length(typeid)).signed[0] + stop = min(stop, length) gc.trace_partial(obj, start, stop, invokecallback, visit_fn) pypy_stmcb_trace_cards.c_name = "pypy_stmcb_trace_cards" self.autoregister_ptrs.append( diff --git a/rpython/translator/stm/test/test_ztranslated.py b/rpython/translator/stm/test/test_ztranslated.py --- a/rpython/translator/stm/test/test_ztranslated.py +++ b/rpython/translator/stm/test/test_ztranslated.py @@ -453,12 +453,15 @@ def test_stm_write_card(self): LST = lltype.GcArray(lltype.Signed) - lst = lltype.malloc(LST, 100, immortal=True) + lst = lltype.malloc(LST, 1000, immortal=True) + LST2 = lltype.GcArray(lltype.Ptr(LST)) + lst2 = lltype.malloc(LST2, 1000, immortal=True) def main(argv): lst[42] = 43 + lst2[999] = lst llop.stm_commit_if_not_atomic(lltype.Void) llop.stm_start_inevitable_if_not_atomic(lltype.Void) - print 'did not crash', lst[42] + print 'did not crash', lst2[999][42] return 0 t, cbuilder = self.compile(main) From noreply at buildbot.pypy.org Mon Jun 30 16:11:08 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 30 Jun 2014 16:11:08 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: add test with two pinned objects and a non-pinned in between Message-ID: <20140630141108.A49791C33F0@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r72288:719bf1e31183 Date: 2014-06-25 18:41 +0200 http://bitbucket.org/pypy/pypy/changeset/719bf1e31183/ Log: add test with two pinned objects and a non-pinned in between diff --git a/rpython/jit/backend/llsupport/test/test_object_pinning_rewrite.py b/rpython/jit/backend/llsupport/test/test_object_pinning_rewrite.py --- a/rpython/jit/backend/llsupport/test/test_object_pinning_rewrite.py +++ b/rpython/jit/backend/llsupport/test/test_object_pinning_rewrite.py @@ -40,6 +40,11 @@ pinned_obj_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, pinned_obj_ptr) assert rgc.pin(pinned_obj_gcref) # + notpinned_obj_type = lltype.GcStruct('NOT_PINNED_STRUCT', ('my_int', lltype.Signed)) + notpinned_obj_my_int_descr = get_field_descr(self.gc_ll_descr, notpinned_obj_type, 'my_int') + notpinned_obj_ptr = lltype.malloc(notpinned_obj_type) + notpinned_obj_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, notpinned_obj_ptr) + # ref_array_descr = self.cpu.arraydescrof(PinnedObjectTracker._ref_array_type) # vtable_descr = self.gc_ll_descr.fielddescr_vtable @@ -125,3 +130,18 @@ p1 = getarrayitem_gc(ConstPtr(ref_array_gcref), 0, descr=ref_array_descr) i0 = getfield_gc(p1, descr=pinned_obj_my_int_descr) """) + + def test_simple_getfield_twice(self): + self.check_rewrite(""" + [] + i0 = getfield_gc(ConstPtr(pinned_obj_gcref), descr=pinned_obj_my_int_descr) + i1 = getfield_gc(ConstPtr(notpinned_obj_gcref), descr=notpinned_obj_my_int_descr) + i2 = getfield_gc(ConstPtr(pinned_obj_gcref), descr=pinned_obj_my_int_descr) + """, """ + [] + p1 = getarrayitem_gc(ConstPtr(ref_array_gcref), 0, descr=ref_array_descr) + i0 = getfield_gc(p1, descr=pinned_obj_my_int_descr) + i1 = getfield_gc(ConstPtr(notpinned_obj_gcref), descr=notpinned_obj_my_int_descr) + p2 = getarrayitem_gc(ConstPtr(ref_array_gcref), 1, descr=ref_array_descr) + i2 = getfield_gc(p2, descr=pinned_obj_my_int_descr) + """) From noreply at buildbot.pypy.org Mon Jun 30 16:11:09 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 30 Jun 2014 16:11:09 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: rename test file Message-ID: <20140630141109.EABA11C33F0@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r72289:e5c5f760f8eb Date: 2014-06-25 18:46 +0200 http://bitbucket.org/pypy/pypy/changeset/e5c5f760f8eb/ Log: rename test file diff --git a/rpython/jit/backend/llsupport/test/test_object_pinning_rewrite.py b/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py rename from rpython/jit/backend/llsupport/test/test_object_pinning_rewrite.py rename to rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py From noreply at buildbot.pypy.org Mon Jun 30 16:11:11 2014 From: noreply at buildbot.pypy.org (groggi) Date: Mon, 30 Jun 2014 16:11:11 +0200 (CEST) Subject: [pypy-commit] pypy gc-incminimark-pinning: add test for missing feature. fails for now. Message-ID: <20140630141111.1FD881C33F0@cobra.cs.uni-duesseldorf.de> Author: Gregor Wegberg Branch: gc-incminimark-pinning Changeset: r72290:dbcb81255e80 Date: 2014-06-30 16:09 +0200 http://bitbucket.org/pypy/pypy/changeset/dbcb81255e80/ Log: add test for missing feature. fails for now. diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -109,6 +109,31 @@ # ^^^ should not be possible, struct is already old and won't # move. + def test_old_points_to_pinned(self): + # Test if we handle the case that an old object can point + # to a pinned object and keeps the pinned object alive by + # that. + # + # create the old object that will point to a pinned object + old_ptr = self.malloc(S) + self.stackroots.append(old_ptr) + self.gc.collect() + assert not self.gc.is_in_nursery(llmemory.cast_ptr_to_adr(self.stackroots[0])) + # + # create the young pinned object and attach it to the old object + pinned_ptr = self.malloc(S) + assert self.gc.pin(llmemory.cast_ptr_to_adr(pinned_ptr)) + self.write(self.stackroots[0], 'next', pinned_ptr) + # + # let's check if everything stays in place before/after a collection + assert self.gc.is_in_nursery(llmemory.cast_ptr_to_adr(pinned_ptr)) + self.gc.collect() + assert self.gc.is_in_nursery(llmemory.cast_ptr_to_adr(pinned_ptr)) + # + self.stackroots[0].next.someInt = 100 + self.gc.collect() + assert self.stackroots[0].next.someInt == 100 + def test_pin_malloc_pin(self): first_ptr = self.malloc(S) first_ptr.someInt = 101 From noreply at buildbot.pypy.org Mon Jun 30 17:02:09 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 17:02:09 +0200 (CEST) Subject: [pypy-commit] stmgc card-marking: Split _stm_write_slowpath_card() in two, as needed for the PyPy JIT Message-ID: <20140630150209.46BB61C024A@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: card-marking Changeset: r1264:1ee48e74669d Date: 2014-06-30 17:02 +0200 http://bitbucket.org/pypy/stmgc/changeset/1ee48e74669d/ Log: Split _stm_write_slowpath_card() in two, as needed for the PyPy JIT diff --git a/c7/stm/core.c b/c7/stm/core.c --- a/c7/stm/core.c +++ b/c7/stm/core.c @@ -235,6 +235,15 @@ return (size >= _STM_MIN_CARD_OBJ_SIZE); } +char _stm_write_slowpath_card_extra(object_t *obj) +{ + /* the PyPy JIT calls this function directly if it finds that an + array doesn't have the GCFLAG_CARDS_SET */ + bool mark_card = obj_should_use_cards(obj); + write_slowpath_common(obj, mark_card); + return mark_card; +} + void _stm_write_slowpath_card(object_t *obj, uintptr_t index) { /* If CARDS_SET is not set so far, issue a normal write barrier. @@ -242,8 +251,7 @@ card marking instead. */ if (!(obj->stm_flags & GCFLAG_CARDS_SET)) { - bool mark_card = obj_should_use_cards(obj); - write_slowpath_common(obj, mark_card); + char mark_card = _stm_write_slowpath_card_extra(obj); if (!mark_card) return; } diff --git a/c7/stmgc.h b/c7/stmgc.h --- a/c7/stmgc.h +++ b/c7/stmgc.h @@ -108,6 +108,7 @@ but it's not exposed to C code so far */ void _stm_write_slowpath(object_t *); void _stm_write_slowpath_card(object_t *, uintptr_t); +char _stm_write_slowpath_card_extra(object_t *); object_t *_stm_allocate_slowpath(ssize_t); object_t *_stm_allocate_external(ssize_t); void _stm_become_inevitable(const char*); From noreply at buildbot.pypy.org Mon Jun 30 17:32:09 2014 From: noreply at buildbot.pypy.org (waedt) Date: Mon, 30 Jun 2014 17:32:09 +0200 (CEST) Subject: [pypy-commit] pypy utf8-unicode2: WIP. Most codec and unicodeobject tests pass now Message-ID: <20140630153209.0332B1D2E50@cobra.cs.uni-duesseldorf.de> Author: Tyler Wade Branch: utf8-unicode2 Changeset: r72291:927fb84a5116 Date: 2014-06-30 10:14 -0500 http://bitbucket.org/pypy/pypy/changeset/927fb84a5116/ Log: WIP. Most codec and unicodeobject tests pass now diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py --- a/pypy/interpreter/pycode.py +++ b/pypy/interpreter/pycode.py @@ -13,6 +13,7 @@ from pypy.interpreter.astcompiler.consts import ( CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS, CO_NESTED, CO_GENERATOR, CO_KILL_DOCSTRING, CO_YIELD_INSIDE_TRY) +from pypy.interpreter.utf8 import Utf8Str from pypy.tool.stdlib_opcode import opcodedesc, HAVE_ARGUMENT from rpython.rlib.rarithmetic import intmask from rpython.rlib.objectmodel import compute_hash @@ -150,6 +151,8 @@ for const in code.co_consts: if isinstance(const, types.CodeType): # from stable compiler const = code_hook(space, const, hidden_applevel, code_hook) + if isinstance(const, unicode): + const = Utf8Str.from_unicode(const) newconsts_w[num] = space.wrap(const) num += 1 # stick the underlying CPython magic value, if the code object diff --git a/pypy/interpreter/test/test_utf8.py b/pypy/interpreter/test/test_utf8.py --- a/pypy/interpreter/test/test_utf8.py +++ b/pypy/interpreter/test/test_utf8.py @@ -1,3 +1,7 @@ +# -*- coding: utf-8 -*- + +import py +import sys from pypy.interpreter.utf8 import ( Utf8Str, Utf8Builder, utf8chr, utf8ord) @@ -20,6 +24,39 @@ 0xF0, 0x9F, 0x98, 0xBD, ]] +def test_iterator(): + s = build_utf8str() + iter = s.codepoint_iter() + assert iter.peek_next() == 0x41 + assert list(iter) == [0x41, 0x10F, 0x20AC, 0x1F63D] + + for i in range(1, 5): + iter = s.codepoint_iter() + iter.move(i) + if i != 4: + assert iter.peek_next() == [0x41, 0x10F, 0x20AC, 0x1F63D][i] + assert list(iter) == [0x41, 0x10F, 0x20AC, 0x1F63D][i:] + + for i in range(1, 5): + iter = s.codepoint_iter() + list(iter) # move the iterator to the end + iter.move(-i) + assert list(iter) == [0x41, 0x10F, 0x20AC, 0x1F63D][4-i:] + + iter = s.char_iter() + l = [s.bytes.decode('utf8') for s in list(iter)] + if sys.maxunicode < 65536: + assert l[:3] == [u'A', u'\u010F', u'\u20AC'] + else: + assert l == [u'A', u'\u010F', u'\u20AC', u'\U00001F63D'] + +def test_builder_append_slice(): + builder = Utf8Builder() + builder.append_slice(Utf8Str.from_unicode(u"0ê0"), 1, 2) + builder.append_slice("Test", 1, 3) + + assert builder.build() == u"êes" + def test_unicode_literal_comparison(): builder = Utf8Builder() builder.append(0x10F) @@ -55,9 +92,65 @@ assert s[-1] == utf8chr(0x1F63D) assert s[-2] == utf8chr(0x20AC) + with py.test.raises(IndexError): + c = s[4] + def test_getslice(): s = build_utf8str() assert s[0:1] == u'A' assert s[0:2] == u'A\u010F' assert s[1:2] == u'\u010F' + +def test_convert_indices(): + s = build_utf8str() + + assert s.index_of_char(0) == 0 + assert s.index_of_char(1) == 1 + assert s.index_of_char(2) == 3 + assert s.index_of_char(3) == 6 + + for i in range(len(s)): + assert s.char_index_of_byte(s.index_of_char(i)) == i + +def test_join(): + s = Utf8Str(' ') + assert s.join([]) == u'' + + + assert s.join([Utf8Str('one')]) == u'one' + assert s.join([Utf8Str('one'), Utf8Str('two')]) == u'one two' + +def test_find(): + u = u"äëïöü" + s = Utf8Str.from_unicode(u) + + for c in u: + assert s.find(Utf8Str.from_unicode(u)) == u.find(u) + assert s.rfind(Utf8Str.from_unicode(u)) == u.rfind(u) + + assert s.find('') == u.find('') + assert s.rfind('') == u.rfind('') + + assert s.find('1') == u.find('1') + assert s.rfind('1') == u.rfind('1') + + assert Utf8Str.from_unicode(u'abcdefghiabc').rfind(u'') == 12 + +def test_count(): + u = u"12äëïöü223" + s = Utf8Str.from_unicode(u) + + assert s.count("1") == u.count("1") + assert s.count("2") == u.count("2") + assert s.count(Utf8Str.from_unicode(u"ä")) == u.count(u"ä") + +def test_split(): + # U+00A0 is a non-breaking space + u = u"one two three\xA0four" + s = Utf8Str.from_unicode(u) + + assert s.split() == u.split() + assert s.split(' ') == u.split(' ') + assert s.split(maxsplit=1) == u.split(None, 1) + assert s.split('\n') == [s] diff --git a/pypy/interpreter/test/test_utf8_codecs.py b/pypy/interpreter/test/test_utf8_codecs.py --- a/pypy/interpreter/test/test_utf8_codecs.py +++ b/pypy/interpreter/test/test_utf8_codecs.py @@ -6,19 +6,6 @@ from pypy.interpreter.utf8 import Utf8Str from pypy.interpreter import utf8_codecs -''' -try: - import signal -except ImportError: - pass -else: - class MyKeyboardInterrupt(BaseException): - pass - def _interrupt(*args): - __tracebackhide__ = True - raise MyKeyboardInterrupt - signal.signal(signal.SIGINT, _interrupt) -''' class UnicodeTests(object): def typeequals(self, x, y): @@ -697,19 +684,13 @@ for s in ["\xd7\x90", "\xd6\x96", "\xeb\x96\x95", "\xf0\x90\x91\x93"]: self.checkencode(s, "utf-8") - # TODO: Is this test useful? def test_utf8_surrogates(self): # make sure that the string itself is not marshalled u = u"\ud800" for i in range(4): u += u"\udc00" - if utf8_codecs.MAXUNICODE < 65536: - # Check replacing of two surrogates by single char while encoding - self.checkencode(u, "utf-8") - else: - # This is not done in wide unicode builds - py.test.raises(UnicodeEncodeError, self.checkencode, u, "utf-8") + py.test.raises(UnicodeEncodeError, self.checkencode, u, "utf-8") def test_ascii_error(self): self.checkencodeerror( @@ -780,13 +761,13 @@ u = runicode.UNICHR(0xD800) + runicode.UNICHR(0xDC00) if runicode.MAXUNICODE < 65536: # Narrow unicode build, consider utf16 surrogate pairs - assert runicode.unicode_encode_unicode_escape( + assert utf8_codecs.unicode_encode_unicode_escape( u, len(u), True) == r'\U00010000' - assert runicode.unicode_encode_raw_unicode_escape( + assert utf8_codecs.unicode_encode_raw_unicode_escape( u, len(u), True) == r'\U00010000' else: # Wide unicode build, don't merge utf16 surrogate pairs - assert runicode.unicode_encode_unicode_escape( + assert utf8_codecs.unicode_encode_unicode_escape( u, len(u), True) == r'\ud800\udc00' - assert runicode.unicode_encode_raw_unicode_escape( + assert utf8_codecs.unicode_encode_raw_unicode_escape( u, len(u), True) == r'\ud800\udc00' diff --git a/pypy/interpreter/utf8.py b/pypy/interpreter/utf8.py --- a/pypy/interpreter/utf8.py +++ b/pypy/interpreter/utf8.py @@ -1,6 +1,7 @@ from rpython.rlib.rstring import StringBuilder from rpython.rlib.objectmodel import specialize from rpython.rlib.runicode import utf8_code_length +from rpython.rlib.unicodedata import unicodedb_5_2_0 as unicodedb from rpython.rlib.rarithmetic import r_uint def utf8chr(value): @@ -9,9 +10,7 @@ b.append(value) return b.build() -def utf8ord(ustr, start=0): - bytes = ustr.bytes - start = ustr.index_of_char(start) +def utf8ord_bytes(bytes, start): codepoint_length = utf8_code_length[ord(bytes[start])] if codepoint_length == 1: @@ -31,6 +30,16 @@ (ord(bytes[start + 2]) & 0x3F) << 6 | (ord(bytes[start + 3]) & 0x3F)) +def utf8ord(ustr, start=0): + start = ustr.index_of_char(start) + return utf8ord_bytes(ustr.bytes, start) + + at specialize.argtype(0) +def ORD(s, pos): + if isinstance(s, Utf8Str): + return utf8ord(s, pos) + else: + return ord(s[pos]) class Utf8Str(object): _immutable_fields_ = ['bytes', '_is_ascii', '_len'] @@ -72,14 +81,27 @@ return byte + def char_index_of_byte(self, byte_): + byte = 0 + pos = 0 + while byte < byte_: + pos += 1 + byte += utf8_code_length[ord(self.bytes[byte])] + + return pos + def __getitem__(self, char_pos): # This if statement is needed for [-1:0] to slice correctly + if char_pos >= self._len: + raise IndexError() if char_pos < 0: char_pos += self._len return self[char_pos:char_pos+1] def __getslice__(self, start, stop): - assert start < stop + assert start <= stop + if start == stop: + return Utf8Str('') # TODO: If start > _len or stop >= _len, then raise exception if self._is_ascii: @@ -102,6 +124,13 @@ return Utf8Str(self.bytes[start_byte:stop_byte], is_ascii, stop - start) + def __add__(self, other): + return Utf8Str(self.bytes + other.bytes, + self._is_ascii and other._is_ascii) + + def __mul__(self, count): + return Utf8Str(self.bytes * count, self._is_ascii) + def __len__(self): return self._len @@ -127,32 +156,162 @@ raise TypeError() def __iter__(self): - byte_pos = 0 - while byte_pos < len(self.bytes): - cplen = utf8_code_length[ord(self.bytes[byte_pos])] - yield Utf8Str(self.bytes[byte_pos:byte_pos+cplen]) - byte_pos += cplen + return self.char_iter() - @specialize.argtype(1) - def find(self, other): + def char_iter(self): + return Utf8StrCharIterator(self) + + def codepoint_iter(self): + return Utf8StrCodePointIterator(self) + + @specialize.argtype(1, 2) + def _bound_check(self, start, end): + if start is None: + start = 0 + elif start < 0: + start += len(self) + if start < 0: + start = 0 + else: + start = self.index_of_char(start) + elif start > len(self): + start = -1 + else: + start = self.index_of_char(start) + + if end is None or end >= len(self): + end = len(self.bytes) + elif end < 0: + end += len(self) + if end < 0: + end = 0 + else: + end = self.index_of_char(end) + elif end > len(self): + end = len(self.bytes) + else: + end = self.index_of_char(end) + + return start, end + + @specialize.argtype(2, 3) + def find(self, other, start=None, end=None): + start, end = self._bound_check(start, end) + if start == -1: + return -1 + if isinstance(other, Utf8Str): - return self.bytes.find(other.bytes) - if isinstance(other, unicode): - return unicode(self.bytes, 'utf8').find(other) - if isinstance(other, str): - return self.bytes.find(other) + pos = self.bytes.find(other.bytes, start, end) + elif isinstance(other, unicode): + pos = unicode(self.bytes, 'utf8').find(other, start, end) + elif isinstance(other, str): + pos = self.bytes.find(other, start, end) - def rfind(self, other): + if pos == -1: + return -1 + + return self.char_index_of_byte(pos) + + @specialize.argtype(2, 3) + def rfind(self, other, start=None, end=None): + start, end = self._bound_check(start, end) + if start == -1: + return -1 + if isinstance(other, Utf8Str): - return self.bytes.rfind(other.bytes) - if isinstance(other, unicode): - return unicode(self.bytes, 'utf8').rfind(other) - if isinstance(other, str): - return self.bytes.rfind(other) + pos = self.bytes.rfind(other.bytes, start, end) + elif isinstance(other, unicode): + return unicode(self.bytes, 'utf8').rfind(other, start, end) + elif isinstance(other, str): + pos = self.bytes.rfind(other, start, end) + + if pos == -1: + return -1 + + return self.char_index_of_byte(pos) + + @specialize.argtype(2, 3) + def count(self, other, start=None, end=None): + start, end = self._bound_check(start, end) + if start == -1: + return 0 + + if isinstance(other, Utf8Str): + count = self.bytes.count(other.bytes, start, end) + elif isinstance(other, unicode): + return unicode(self.bytes, 'utf8').count(other, start, end) + elif isinstance(other, str): + count = self.bytes.count(other, start, end) + + if count == -1: + return -1 + + return count def endswith(self, other): return self.rfind(other) == len(self) - len(other) + @specialize.argtype(1) + def split(self, other=None, maxsplit=-1): + if other is not None: + if isinstance(other, str): + other_bytes = other + if isinstance(other, Utf8Str): + other_bytes = other.bytes + return [Utf8Str(s) for s in self.bytes.split(other_bytes, maxsplit)] + + res = [] + iter = self.codepoint_iter() + while True: + # the start of the first word + for cd in iter: + if not unicodedb.isspace(cd): + break + else: + break + + iter.prev_count(1) + start_byte = iter.byte_pos + iter.next_count(1) + + if maxsplit == 0: + res.append(Utf8Str(self.bytes[start_byte:len(self.bytes)])) + break + + for cd in iter: + if unicodedb.isspace(cd): + break + else: + # Hit the end of the string + res.append(Utf8Str(self.bytes[start_byte:len(self.bytes)])) + break + + iter.prev_count(1) + res.append(Utf8Str(self.bytes[start_byte:iter.byte_pos])) + iter.next_count(1) + maxsplit -= 1 + + return res + + @specialize.argtype(1) + def rsplit(self, other=None, maxsplit=-1): + if other is not None: + if isinstance(other, str): + other_bytes = other + if isinstance(other, Utf8Str): + other_bytes = other.bytes + return [Utf8Str(s) for s in self.bytes.rsplit(other_bytes, maxsplit)] + + # TODO: I need to make a reverse_codepoint_iter first + + def join(self, other): + if len(other) == 0: + return Utf8Str('') + + assert isinstance(other[0], Utf8Str) + return Utf8Str(self.bytes.join([s.bytes for s in other]), + self._is_ascii and all(s._is_ascii for s in other)) + def as_unicode(self): """NOT_RPYTHON""" return self.bytes.decode('utf-8') @@ -162,6 +321,84 @@ """NOT_RPYTHON""" return Utf8Str(u.encode('utf-8')) +class Utf8StrCodePointIterator(object): + def __init__(self, ustr): + self.ustr = ustr + self.pos = 0 + self.byte_pos = 0 + + if len(ustr) != 0: + self.current = utf8ord_bytes(ustr.bytes, 0) + else: + self.current = -1 + + def __iter__(self): + return self + + def next(self): + if self.pos == len(self.ustr): + raise StopIteration() + self.current = utf8ord_bytes(self.ustr.bytes, self.byte_pos) + + self.byte_pos += utf8_code_length[ord(self.ustr.bytes[self.byte_pos])] + self.pos += 1 + + return self.current + + def next_count(self, count=1): + self.pos += count + while count > 1: + self.byte_pos += utf8_code_length[ord(self.ustr.bytes[self.byte_pos])] + count -= 1 + self.current = utf8ord_bytes(self.ustr.bytes, self.byte_pos) + self.byte_pos += utf8_code_length[ord(self.ustr.bytes[self.byte_pos])] + + def prev_count(self, count=1): + self.pos -= count + while count > 0: + self.byte_pos -= 1 + while utf8_code_length[ord(self.ustr.bytes[self.byte_pos])] == 0: + self.byte_pos -= 1 + count -= 1 + + self.current = utf8ord_bytes(self.ustr.bytes, self.byte_pos) + + def move(self, count): + if count > 0: + self.next_count(count) + elif count < 0: + self.prev_count(-count) + + def peek_next(self): + return utf8ord_bytes(self.ustr.bytes, self.byte_pos) + +class Utf8StrCharIterator(object): + def __init__(self, ustr): + self.ustr = ustr + self.byte_pos = 0 + self.current = self._get_current() + + def __iter__(self): + return self + + def _get_current(self): + if self.byte_pos == len(self.ustr.bytes): + return None + length = utf8_code_length[ord(self.ustr.bytes[self.byte_pos])] + return Utf8Str(''.join([self.ustr.bytes[i] + for i in range(self.byte_pos, self.byte_pos + length)]), + length == 1) + + def next(self): + #import pdb; pdb.set_trace() + ret = self.current + if ret is None: + raise StopIteration() + + self.byte_pos += utf8_code_length[ord(self.ustr.bytes[self.byte_pos])] + self.current = self._get_current() + return ret + class Utf8Builder(object): @specialize.argtype(1) def __init__(self, init_size=None): @@ -204,10 +441,19 @@ assert ord(c) < 128 self._builder.append(c) - def append_slice(self, s, start, end, is_ascii=False): - self._builder.append_slice(s, start, end) - if not is_ascii: - self._is_ascii = False + @specialize.argtype(1) + def append_slice(self, s, start, end): + if isinstance(s, str): + self._builder.append_slice(s, start, end) + elif isinstance(s, Utf8Str): + self._builder.append_slice(s.bytes, s.index_of_char(start), + s.index_of_char(end)) + else: + raise TypeError("Invalid type '%s' for Utf8Str.append_slice" % + type(s)) + + def append_multiple_char(self, c, count): + self._builder.append_multiple_char(c, count) def build(self): return Utf8Str(self._builder.build(), self._is_ascii) diff --git a/pypy/interpreter/utf8_codecs.py b/pypy/interpreter/utf8_codecs.py --- a/pypy/interpreter/utf8_codecs.py +++ b/pypy/interpreter/utf8_codecs.py @@ -6,7 +6,7 @@ from rpython.rlib.unicodedata import unicodedb from rpython.rlib.runicode import utf8_code_length -from pypy.interpreter.utf8 import Utf8Str, Utf8Builder, utf8chr, utf8ord +from pypy.interpreter.utf8 import Utf8Str, Utf8Builder, utf8chr, utf8ord, ORD BYTEORDER = sys.byteorder @@ -33,7 +33,7 @@ # Non-escape characters are interpreted as Unicode ordinals if ch != '\\': - builder.append(ch) + builder.append(ord(ch)) pos += 1 continue @@ -383,6 +383,8 @@ @specialize.arg_or_var(3) def unicode_encode_ucs1_helper(p, size, errors, errorhandler=None, limit=256): + if len(p) == 0: + return '' if errorhandler is None: errorhandler = default_unicode_error_encode if limit == 256: @@ -415,8 +417,9 @@ result.append(rs) continue for ch in ru: - if ord(ch) < limit: - result.append(chr(ord(ch))) + cd = ORD(ch, 0) + if cd < limit: + result.append(chr(cd)) else: errorhandler("strict", encoding, reason, p, collstart, collend) @@ -436,15 +439,60 @@ # ____________________________________________________________ # utf-8 {{{ -# Converting bytes (utf8) to unicode? -# I guess we just make sure we're looking at valid utf-8 and then make the -# object? def unicode_encode_utf_8(s, size, errors, errorhandler=None, allow_surrogates=False): - if size < len(s): - return s.bytes[0:s.index_of_char(size)] - return s.bytes + if len(s) == 0: + return '' + if errorhandler is None: + errorhandler = default_unicode_error_encode + + return unicode_encode_utf_8_impl(s, size, errors, errorhandler, + allow_surrogates) + +def unicode_encode_utf_8_impl(s, size, errors, errorhandler, allow_surrogates): + iter = s.codepoint_iter() + for oc in iter: + if oc >= 0xD800 and oc <= 0xDFFF: + break + if iter.pos == size: + return s.bytes + else: + return s.bytes + + iter.move(-1) + result = Utf8Builder(len(s.bytes)) + result.append_slice(s.bytes, 0, iter.byte_pos) + + for oc in iter: + if oc >= 0xD800 and oc <= 0xDFFF: + # Check the next character to see if this is a surrogate pair + if (iter.pos != len(s) and oc <= 0xDBFF and + 0xDC00 <= iter.peek_next() <= 0xDFFF): + oc2 = iter.next() + result.append(((oc - 0xD800) << 10 | (oc2 - 0xDC00)) + 0x10000) + elif allow_surrogates: + result.append(oc) + else: + ru, rs, pos = errorhandler(errors, 'utf8', + 'surrogates not allowed', s, + iter.pos-1, iter.pos) + iter.move(pos - iter.pos) + if rs is not None: + # py3k only + result.append(rs) + continue + for ch in ru: + if ord(ch) < 0x80: + result.append(ch) + else: + errorhandler('strict', 'utf8', + 'surrogates not allowed', + s, pos-1, pos) + else: + result.append(oc) + + return result.build().bytes def str_decode_utf_8(s, size, errors, final=False, errorhandler=None, allow_surrogates=False): @@ -1219,7 +1267,7 @@ # ____________________________________________________________ # Charmap {{{ -ERROR_CHAR = u'\ufffe' +ERROR_CHAR = Utf8Str.from_unicode(u'\ufffe') @specialize.argtype(5) def str_decode_charmap(s, size, errors, final=False, @@ -1296,84 +1344,16 @@ def str_decode_unicode_internal(s, size, errors, final=False, errorhandler=None): - if errorhandler is None: - errorhandler = default_unicode_error_decode - if size == 0: - return u'', 0 - - if MAXUNICODE < 65536: - unicode_bytes = 2 + if BYTEORDER == 'little': + return str_decode_utf_32_le(s, size, errors, errorhandler) else: - unicode_bytes = 4 - if BYTEORDER == "little": - start = 0 - stop = unicode_bytes - step = 1 - else: - start = unicode_bytes - 1 - stop = -1 - step = -1 - - result = UnicodeBuilder(size // unicode_bytes) - pos = 0 - while pos < size: - if pos > size - unicode_bytes: - res, pos = errorhandler(errors, "unicode_internal", - "truncated input", - s, pos, size) - result.append(res) - if pos > size - unicode_bytes: - break - continue - t = r_uint(0) - h = 0 - for j in range(start, stop, step): - t += r_uint(ord(s[pos + j])) << (h*8) - h += 1 - if t > MAXUNICODE: - res, pos = errorhandler(errors, "unicode_internal", - "unichr(%d) not in range" % (t,), - s, pos, pos + unicode_bytes) - result.append(res) - continue - result.append(UNICHR(t)) - pos += unicode_bytes - return result.build(), pos + return str_decode_utf_32_be(s, size, errors, errorhandler) def unicode_encode_unicode_internal(s, size, errors, errorhandler=None): - if size == 0: - return '' - - if MAXUNICODE < 65536: - unicode_bytes = 2 + if BYTEORDER == 'little': + return unicode_encode_utf_32_le(s, size, errors, errorhandler) else: - unicode_bytes = 4 - - result = StringBuilder(size * unicode_bytes) - pos = 0 - while pos < size: - oc = utf8ord(s, pos) - if MAXUNICODE < 65536: - if BYTEORDER == "little": - result.append(chr(oc & 0xFF)) - result.append(chr(oc >> 8 & 0xFF)) - else: - result.append(chr(oc >> 8 & 0xFF)) - result.append(chr(oc & 0xFF)) - else: - if BYTEORDER == "little": - result.append(chr(oc & 0xFF)) - result.append(chr(oc >> 8 & 0xFF)) - result.append(chr(oc >> 16 & 0xFF)) - result.append(chr(oc >> 24 & 0xFF)) - else: - result.append(chr(oc >> 24 & 0xFF)) - result.append(chr(oc >> 16 & 0xFF)) - result.append(chr(oc >> 8 & 0xFF)) - result.append(chr(oc & 0xFF)) - pos += 1 - - return result.build() + return unicode_encode_utf_32_be(s, size, errors, errorhandler) # }}} diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -3,6 +3,7 @@ from rpython.rlib.rstring import UnicodeBuilder from rpython.rlib.runicode import code_to_unichr, MAXUNICODE +from pypy.interpreter.utf8 import Utf8Builder, Utf8Str, utf8chr, utf8ord from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault @@ -206,13 +207,13 @@ w_end = space.getattr(w_exc, space.wrap('end')) size = space.int_w(w_end) - space.int_w(w_start) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): - text = u'?' * size + text = Utf8Str('?' * size, True) return space.newtuple([space.wrap(text), w_end]) elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError): - text = u'\ufffd' + text = utf8chr(0xfffd) return space.newtuple([space.wrap(text), w_end]) elif space.isinstance_w(w_exc, space.w_UnicodeTranslateError): - text = u'\ufffd' * size + text = utf8chr(0xfffd) * size return space.newtuple([space.wrap(text), w_end]) else: raise oefmt(space.w_TypeError, @@ -251,25 +252,26 @@ start = space.int_w(space.getattr(w_exc, space.wrap('start'))) w_end = space.getattr(w_exc, space.wrap('end')) end = space.int_w(w_end) - builder = UnicodeBuilder() + + builder = Utf8Builder() pos = start while pos < end: - oc = ord(obj[pos]) + oc = utf8ord(obj, pos) num = hex(oc) if (oc >= 0x10000): - builder.append(u"\\U") + builder.append("\\U") zeros = 8 elif (oc >= 0x100): - builder.append(u"\\u") + builder.append("\\u") zeros = 4 else: - builder.append(u"\\x") + builder.append("\\x") zeros = 2 lnum = len(num) nb = zeros + 2 - lnum # num starts with '0x' if nb > 0: builder.append_multiple_char(u'0', nb) - builder.append_slice(unicode(num), 2, lnum) + builder.append_slice(num, 2, lnum) pos += 1 return space.newtuple([space.wrap(builder.build()), w_end]) else: @@ -378,7 +380,6 @@ # ____________________________________________________________ # delegation to runicode -#from rpython.rlib import runicode from pypy.interpreter import utf8_codecs def make_encoder_wrapper(name): @@ -548,7 +549,7 @@ if not 0 <= x <= 0x10FFFF: raise oefmt(space.w_TypeError, "character mapping must be in range(0x110000)") - return code_to_unichr(x) + return utf8chr(x) elif space.is_w(w_ch, space.w_None): # Charmap may return None return errorchar @@ -566,7 +567,7 @@ # get the character from the mapping try: - w_ch = space.getitem(self.w_mapping, space.newint(ord(ch))) + w_ch = space.getitem(self.w_mapping, space.newint(utf8ord(ch))) except OperationError, e: if not e.match(space, space.w_LookupError): raise @@ -595,7 +596,7 @@ if errors is None: errors = 'strict' if len(string) == 0: - return space.newtuple([space.wrap(u''), space.wrap(0)]) + return space.newtuple([space.wrap(Utf8Str('')), space.wrap(0)]) if space.is_none(w_mapping): mapping = None @@ -631,7 +632,7 @@ w_charmap = space.newdict() for num in range(len(chars)): elem = chars[num] - space.setitem(w_charmap, space.newint(ord(elem)), space.newint(num)) + space.setitem(w_charmap, space.newint(utf8ord(elem)), space.newint(num)) return w_charmap # ____________________________________________________________ diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -10,6 +10,7 @@ from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import ( WrappedDefault, interp2app, interpindirect2app, unwrap_spec) +from pypy.interpreter.utf8 import Utf8Str from pypy.objspace.std import newformat from pypy.objspace.std.basestringtype import basestring_typedef from pypy.objspace.std.formatting import mod_format @@ -715,11 +716,11 @@ sub = self_as_uni._op_val(space, w_old) by = self_as_uni._op_val(space, w_new) try: - res = replace(input, sub, by, count) + res = replace(input.bytes, sub.bytes, by.bytes, count) except OverflowError: raise oefmt(space.w_OverflowError, "replace string is too long") - return self_as_uni._new(res) + return self_as_uni._new(Utf8Str(res)) return self._StringMethods_descr_replace(space, w_old, w_new, count) _StringMethods_descr_join = descr_join diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -2,13 +2,14 @@ String formatting routines. """ import sys -from pypy.interpreter.error import OperationError, oefmt from rpython.rlib import jit from rpython.rlib.rfloat import formatd, DTSF_ALT, isnan, isinf -from rpython.rlib.rstring import StringBuilder, UnicodeBuilder +from rpython.rlib.rstring import StringBuilder from rpython.rlib.unroll import unrolling_iterable from rpython.rlib.rarithmetic import INT_MAX from rpython.tool.sourcetools import func_with_new_name +from pypy.interpreter.error import OperationError, oefmt +from pypy.interpreter.utf8 import Utf8Builder, ORD class BaseStringFormatter(object): @@ -168,7 +169,7 @@ def peekchr(self): # return the 'current' character try: - return self.fmt[self.fmtpos] + return ORD(self.fmt, self.fmtpos) except IndexError: space = self.space raise OperationError(space.w_ValueError, @@ -185,16 +186,16 @@ pcount = 1 while 1: try: - c = fmt[i] + c = ORD(fmt, i) except IndexError: space = self.space raise OperationError(space.w_ValueError, space.wrap("incomplete format key")) - if c == ')': + if c == ord(')'): pcount -= 1 if pcount == 0: break - elif c == '(': + elif c == ord('('): pcount += 1 i += 1 self.fmtpos = i + 1 # first character after ')' @@ -210,7 +211,7 @@ return space.getitem(self.w_valuedict, w_key) def parse_fmt(self): - if self.peekchr() == '(': + if self.peekchr() == ord('('): w_value = self.getmappingvalue(self.getmappingkey()) else: w_value = None @@ -223,7 +224,7 @@ self.f_ljust = True self.width = -self.width - if self.peekchr() == '.': + if self.peekchr() == ord('.'): self.forward() self.prec = self.peel_num('prec', INT_MAX) if self.prec < 0: @@ -232,7 +233,7 @@ self.prec = -1 c = self.peekchr() - if c == 'h' or c == 'l' or c == 'L': + if c == ord('h') or c == ord('l') or c == ord('L'): self.forward() return w_value @@ -247,15 +248,15 @@ self.f_zero = False while True: c = self.peekchr() - if c == '-': + if c == ord('-'): self.f_ljust = True - elif c == '+': + elif c == ord('+'): self.f_sign = True - elif c == ' ': + elif c == ord(' '): self.f_blank = True - elif c == '#': + elif c == ord('#'): self.f_alt = True - elif c == '0': + elif c == ord('0'): self.f_zero = True else: break @@ -266,7 +267,7 @@ def peel_num(self, name, maxval): space = self.space c = self.peekchr() - if c == '*': + if c == ord('*'): self.forward() w_value = self.nextinputvalue() if name == 'width': @@ -277,7 +278,7 @@ assert False result = 0 while True: - digit = ord(c) - ord('0') + digit = c - ord('0') if not (0 <= digit <= 9): break if result > (maxval - digit) / 10: @@ -291,16 +292,17 @@ def format(self): lgt = len(self.fmt) + 4 * len(self.values_w) + 10 if do_unicode: - result = UnicodeBuilder(lgt) + result = Utf8Builder(lgt) else: result = StringBuilder(lgt) self.result = result + while True: # fast path: consume as many characters as possible fmt = self.fmt i = i0 = self.fmtpos while i < len(fmt): - if fmt[i] == '%': + if ORD(fmt, i) == ord('%'): break i += 1 else: @@ -313,8 +315,8 @@ w_value = self.parse_fmt() c = self.peekchr() self.forward() - if c == '%': - self.std_wp(const('%')) + if c == ord('%'): + self.std_wp('%') continue if w_value is None: w_value = self.nextinputvalue() @@ -325,7 +327,7 @@ if c == c1: # 'c1' is an annotation constant here, # so this getattr() is ok - do_fmt = getattr(self, 'fmt_' + c1) + do_fmt = getattr(self, 'fmt_' + chr(c1)) do_fmt(w_value) break else: @@ -348,7 +350,7 @@ else: s = c msg = "unsupported format character '%s' (0x%x) at index %d" % ( - s, ord(c), self.fmtpos - 1) + s, ORD(c, 0), self.fmtpos - 1) raise OperationError(space.w_ValueError, space.wrap(msg)) def std_wp(self, r): @@ -359,7 +361,7 @@ prec = self.prec if prec == -1 and self.width == 0: # fast path - self.result.append(const(r)) + self.result.append(r) return if prec >= 0 and prec < length: length = prec # ignore the end of the string if too long @@ -369,12 +371,12 @@ padding = 0 assert padding >= 0 if not self.f_ljust and padding > 0: - result.append_multiple_char(const(' '), padding) + result.append_multiple_char(' ', padding) # add any padding at the left of 'r' padding = 0 result.append_slice(r, 0, length) # add 'r' itself if padding > 0: - result.append_multiple_char(const(' '), padding) + result.append_multiple_char(' ', padding) # add any remaining padding at the right std_wp._annspecialcase_ = 'specialize:argtype(1)' @@ -405,18 +407,19 @@ assert padding >= 0 if padnumber == '>': - result.append_multiple_char(const(' '), padding) + result.append_multiple_char(' ', padding) # pad with spaces on the left if sign: - result.append(const(r[0])) # the sign - result.append(const(prefix)) # the prefix + # TODO: Why r[0]? + result.append(r[0]) # the sign + result.append(prefix) # the prefix if padnumber == '0': - result.append_multiple_char(const('0'), padding) + result.append_multiple_char('0', padding) # pad with zeroes - result.append_slice(const(r), int(sign), len(r)) + result.append_slice(r, int(sign), len(r)) # the rest of the number if padnumber == '<': # spaces on the right - result.append_multiple_char(const(' '), padding) + result.append_multiple_char(' ', padding) def string_formatting(self, w_value): space = self.space @@ -499,7 +502,7 @@ # an "unrolling" list of all the known format characters, # collected from which fmt_X() functions are defined in the class FORMATTER_CHARS = unrolling_iterable( - [_name[-1] for _name in dir(StringFormatter) + [ord(_name[-1]) for _name in dir(StringFormatter) if len(_name) == 5 and _name.startswith('fmt_')]) def format(space, w_fmt, values_w, w_valuedict, do_unicode): diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py --- a/pypy/objspace/std/newformat.py +++ b/pypy/objspace/std/newformat.py @@ -4,6 +4,7 @@ import string from pypy.interpreter.error import OperationError, oefmt +from pypy.interpreter.utf8 import Utf8Str, Utf8Builder, ORD from rpython.rlib import rstring, runicode, rlocale, rfloat, jit from rpython.rlib.objectmodel import specialize from rpython.rlib.rfloat import copysign, formatd @@ -47,7 +48,7 @@ def __init__(self, space, is_unicode, template): self.space = space self.is_unicode = is_unicode - self.empty = u"" if is_unicode else "" + self.empty = Utf8Str("") if is_unicode else "" self.template = template def build(self, args): @@ -59,7 +60,7 @@ def _build_string(self, start, end, level): space = self.space if self.is_unicode: - out = rstring.UnicodeBuilder() + out = Utf8Builder() else: out = rstring.StringBuilder() if not level: @@ -74,23 +75,23 @@ space = self.space last_literal = i = start while i < end: - c = s[i] + c = ORD(s, i) i += 1 - if c == "{" or c == "}": + if c == ord("{") or c == ord("}"): at_end = i == end # Find escaped "{" and "}" markup_follows = True - if c == "}": - if at_end or s[i] != "}": + if c == ord("}"): + if at_end or ORD(s, i) != ord("}"): raise OperationError(space.w_ValueError, space.wrap("Single '}'")) i += 1 markup_follows = False - if c == "{": + if c == ord("{"): if at_end: raise OperationError(space.w_ValueError, space.wrap("Single '{'")) - if s[i] == "{": + if ORD(s, i) == ord("{"): i += 1 markup_follows = False # Attach literal data, ending with { or } @@ -111,11 +112,11 @@ field_start = i recursive = False while i < end: - c = s[i] - if c == "{": + c = ORD(s, i) + if c == ord("{"): recursive = True nested += 1 - elif c == "}": + elif c == ord("}"): nested -= 1 if not nested: break @@ -139,9 +140,9 @@ i = start while i < end: c = s[i] - if c == ":" or c == "!": + if c == ord(":") or c == ord("!"): end_name = i - if c == "!": + if c == ord("!"): i += 1 if i == end: w_msg = self.space.wrap("expected conversion") @@ -170,7 +171,7 @@ end = len(name) while i < end: c = name[i] - if c == "[" or c == ".": + if c == ord("[") or c == ord("."): break i += 1 empty = not i @@ -228,12 +229,12 @@ i = start while i < end: c = name[i] - if c == ".": + if c == ord("."): i += 1 start = i while i < end: c = name[i] - if c == "[" or c == ".": + if c == ord("[") or c == ord("."): break i += 1 if start == i: @@ -245,13 +246,13 @@ else: self.parser_list_w.append(space.newtuple([ space.w_True, w_attr])) - elif c == "[": + elif c == ord("["): got_bracket = False i += 1 start = i while i < end: c = name[i] - if c == "]": + if c == ord("]"): got_bracket = True break i += 1 @@ -280,8 +281,8 @@ i = 0 end = len(name) while i < end: - c = name[i] - if c == "[" or c == ".": + c = ORD(name, i) + if c == ord("[") or c == ord("."): break i += 1 if i == 0: @@ -303,10 +304,10 @@ def _convert(self, w_obj, conversion): space = self.space - conv = conversion[0] - if conv == "r": + conv = ORD(conversion, 0) + if conv == ord("r"): return space.repr(w_obj) - elif conv == "s": + elif conv == ord("s"): if self.is_unicode: return space.call_function(space.w_unicode, w_obj) return space.str(w_obj) @@ -416,15 +417,15 @@ self.spec = spec def _is_alignment(self, c): - return (c == "<" or - c == ">" or - c == "=" or - c == "^") + return (c == ord("<") or + c == ord(">") or + c == ord("=") or + c == ord("^")) def _is_sign(self, c): - return (c == " " or - c == "+" or - c == "-") + return (c == ord(" ") or + c == ord("+") or + c == ord("-")) def _parse_spec(self, default_type, default_align): space = self.space diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -9,6 +9,7 @@ from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import WrappedDefault, unwrap_spec +from pypy.interpreter.utf8 import ORD from pypy.objspace.std import slicetype from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice @@ -141,7 +142,7 @@ if d > 0: offset = d//2 + (d & width & 1) fillchar = self._multi_chr(fillchar[0]) - centered = offset * fillchar + value + (d - offset) * fillchar + centered = fillchar * offset + value + fillchar * (d - offset) else: centered = value @@ -192,9 +193,9 @@ return self._empty() if self._use_rstr_ops(space, self): - splitted = value.split(self._chr('\t')) + splitted = value.split('\t') else: - splitted = split(value, self._chr('\t')) + splitted = split(value, '\t') try: ovfcheck(len(splitted) * tabsize) @@ -203,7 +204,7 @@ expanded = oldtoken = splitted.pop(0) for token in splitted: - expanded += self._multi_chr(self._chr(' ')) * self._tabindent(oldtoken, + expanded += self._multi_chr(' ') * self._tabindent(oldtoken, tabsize) + token oldtoken = token @@ -218,7 +219,8 @@ offset = len(token) while 1: - if token[offset-1] == "\n" or token[offset-1] == "\r": + if (ORD(token, offset-1) == ord("\n") or + ORD(token, offset-1) == ord("\r")): break distance += 1 offset -= 1 @@ -455,7 +457,7 @@ d = width - len(value) if d > 0: fillchar = self._multi_chr(fillchar[0]) - value += d * fillchar + value += fillchar * d return self._new(value) @@ -469,7 +471,7 @@ d = width - len(value) if d > 0: fillchar = self._multi_chr(fillchar[0]) - value = d * fillchar + value + value = fillchar * d + value return self._new(value) @@ -558,31 +560,39 @@ res = [] value = self._val(space) if space.is_none(w_sep): - res = split(value, maxsplit=maxsplit) + res = self._split(value, None, maxsplit) return self._newlist_unwrapped(space, res) by = self._op_val(space, w_sep) if len(by) == 0: raise oefmt(space.w_ValueError, "empty separator") - res = split(value, by, maxsplit) + res = self._split(value, by, maxsplit) return self._newlist_unwrapped(space, res) + @staticmethod + def _split(value, sep=None, maxsplit=-1): + return split(value, sep, maxsplit) + @unwrap_spec(maxsplit=int) def descr_rsplit(self, space, w_sep=None, maxsplit=-1): res = [] value = self._val(space) if space.is_none(w_sep): - res = rsplit(value, maxsplit=maxsplit) + res = self._rsplit(value, maxsplit=maxsplit) return self._newlist_unwrapped(space, res) by = self._op_val(space, w_sep) if len(by) == 0: raise oefmt(space.w_ValueError, "empty separator") - res = rsplit(value, by, maxsplit) + res = self._split(value, by, maxsplit) return self._newlist_unwrapped(space, res) + @staticmethod + def _rsplit(value, sep=None, maxsplit=-1): + return value.split(sep, maxsplit) + @unwrap_spec(keepends=bool) def descr_splitlines(self, space, keepends=False): value = self._val(space) @@ -757,20 +767,21 @@ def descr_zfill(self, space, width): selfval = self._val(space) if len(selfval) == 0: - return self._new(self._multi_chr(self._chr('0')) * width) + return self._new(self._multi_chr('0') * width) num_zeros = width - len(selfval) if num_zeros <= 0: # cannot return self, in case it is a subclass of str return self._new(selfval) builder = self._builder(width) - if len(selfval) > 0 and (selfval[0] == '+' or selfval[0] == '-'): + if len(selfval) > 0 and (ORD(selfval, 0) == ord('+') or + ORD(selfval, 0) == ord('-')): # copy sign to first position builder.append(selfval[0]) start = 1 else: start = 0 - builder.append_multiple_char(self._chr('0'), num_zeros) + builder.append_multiple_char('0', num_zeros) builder.append_slice(selfval, start, len(selfval)) return self._new(builder.build()) diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1,5 +1,6 @@ import py import sys +from pypy.interpreter.utf8 import Utf8Str class TestUnicodeObject: @@ -22,12 +23,12 @@ assert len(warnings) == 2 def test_listview_unicode(self): - w_str = self.space.wrap(u'abcd') + w_str = self.space.wrap(Utf8Str.from_unicode(u'abcd')) assert self.space.listview_unicode(w_str) == list(u"abcd") def test_new_shortcut(self): space = self.space - w_uni = self.space.wrap(u'abcd') + w_uni = self.space.wrap(Utf8Str.from_unicode(u'abcd')) w_new = space.call_method( space.w_unicode, "__new__", space.w_unicode, w_uni) assert w_new is w_uni diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -1,13 +1,14 @@ """The builtin unicode implementation""" from rpython.rlib.objectmodel import ( - compute_hash, compute_unique_id, import_from_mixin) + compute_hash, compute_unique_id, import_from_mixin, specialize) from rpython.rlib.buffer import StringBuffer -from rpython.rlib.rstring import StringBuilder, UnicodeBuilder +from rpython.rlib.rstring import ( + StringBuilder, replace, startswith, endswith) from pypy.interpreter import unicodehelper from pypy.interpreter.baseobjspace import W_Root -from pypy.interpreter.utf8 import Utf8Str, utf8chr, utf8ord +from pypy.interpreter.utf8 import Utf8Str, Utf8Builder, utf8chr, utf8ord from pypy.interpreter.utf8_codecs import ( make_unicode_escape_function, str_decode_ascii, str_decode_utf_8, unicode_encode_ascii, unicode_encode_utf_8) @@ -67,11 +68,12 @@ return self._value def readbuf_w(self, space): - from rpython.rlib.rstruct.unichar import pack_unichar, UNICODE_SIZE - builder = StringBuilder(len(self._value) * UNICODE_SIZE) - for unich in self._value: - pack_unichar(unich, builder) - return StringBuffer(builder.build()) + return StringBuffer(self._value.bytes) + #from rpython.rlib.rstruct.unichar import pack_unichar, UNICODE_SIZE + #builder = StringBuilder(len(self._value) * UNICODE_SIZE) + #for unich in self._value: + # pack_unichar(unich, builder) + #return StringBuffer(builder.build()) def writebuf_w(self, space): raise OperationError(space.w_TypeError, space.wrap( @@ -87,7 +89,7 @@ raise oefmt(space.w_TypeError, "ord() expected a character, but string of length %d " "found", len(self._value)) - return space.wrap(utf8ord(self)) + return space.wrap(utf8ord(self._value)) def _new(self, value): return W_UnicodeObject(value) @@ -120,9 +122,18 @@ def _chr(self, char): assert len(char) == 1 - return unicode(char)[0] + assert ord(char) < 127 + return Utf8Str(char, True) - _builder = UnicodeBuilder + @specialize.argtype(1) + def _multi_chr(self, c): + if isinstance(c, str): + assert ord(c) < 127 + return Utf8Str(c, True) + else: + return c + + _builder = Utf8Builder def _isupper(self, ch): return unicodedb.isupper(utf8ord(ch)) @@ -158,13 +169,13 @@ return unicodedb.islinebreak(utf8ord(ch)) def _upper(self, ch): - return unichr(unicodedb.toupper(utf8ord(ch))) + return utf8chr(unicodedb.toupper(utf8ord(ch))) def _lower(self, ch): - return unichr(unicodedb.tolower(utf8ord(ch))) + return utf8chr(unicodedb.tolower(utf8ord(ch))) def _title(self, ch): - return unichr(unicodedb.totitle(utf8ord(ch))) + return utf8chr(unicodedb.totitle(utf8ord(ch))) def _newlist_unwrapped(self, space, lst): return space.newlist_unicode(lst) @@ -302,6 +313,35 @@ def descr_mod(self, space, w_values): return mod_format(space, self, w_values, do_unicode=True) + @unwrap_spec(count=int) + def descr_replace(self, space, w_old, w_new, count=-1): + input = self._val(space) + + sub = self._op_val(space, w_old) + by = self._op_val(space, w_new) + try: + res = replace(input.bytes, sub.bytes, by.bytes, count) + except OverflowError: + raise oefmt(space.w_OverflowError, "replace string is too long") + + return self._new(Utf8Str(res)) + + def _startswith(self, space, value, w_prefix, start, end): + return startswith(value.bytes, self._op_val(space, w_prefix).bytes, + start, end) + + def _endswith(self, space, value, w_prefix, start, end): + return endswith(value.bytes, self._op_val(space, w_prefix).bytes, + start, end) + + @staticmethod + def _split(value, sep=None, maxsplit=-1): + return value.split(sep, maxsplit) + + @staticmethod + def _rsplit(value, sep=None, maxsplit=-1): + return value.split(sep, maxsplit) + def descr_translate(self, space, w_table): selfvalue = self._value w_sys = space.getbuiltinmodule('sys') @@ -313,7 +353,7 @@ w_newval = space.getitem(w_table, space.wrap(utf8ord(unichar))) except OperationError as e: if e.match(space, space.w_LookupError): - result.append(unichar) + result.append(unichar.bytes) else: raise else: @@ -325,14 +365,14 @@ raise oefmt(space.w_TypeError, "character mapping must be in range(%s)", hex(maxunicode + 1)) - result.append(unichr(newval)) + result.append(utf8chr(newval).bytes) elif space.isinstance_w(w_newval, space.w_unicode): - result.append(space.unicode_w(w_newval)) + result.append(space.unicode_w(w_newval).bytes) else: raise oefmt(space.w_TypeError, "character mapping must return integer, None " "or unicode") - return W_UnicodeObject(u''.join(result)) + return W_UnicodeObject(Utf8Str(''.join(result))) def descr_encode(self, space, w_encoding=None, w_errors=None): encoding, errors = _get_encoding_and_errors(space, w_encoding, @@ -1090,7 +1130,7 @@ digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] for i in xrange(len(unistr)): - uchr = ord(unistr[i]) + uchr = utf8ord(unistr, i) if unicodedb.isspace(uchr): result[i] = ' ' continue From noreply at buildbot.pypy.org Mon Jun 30 18:46:44 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 18:46:44 +0200 (CEST) Subject: [pypy-commit] pypy default: SETINTERIORFIELD_GC should also generate a COND_CALL_GC_WB_ARRAY. This Message-ID: <20140630164644.903561D2D06@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72292:b4bca9ebe10b Date: 2014-06-30 16:32 +0200 http://bitbucket.org/pypy/pypy/changeset/b4bca9ebe10b/ Log: SETINTERIORFIELD_GC should also generate a COND_CALL_GC_WB_ARRAY. This looks essential to get correct performance behavior in programs that do a couple of writes to very large dictionaries every minor collection. diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py --- a/rpython/jit/backend/llsupport/rewrite.py +++ b/rpython/jit/backend/llsupport/rewrite.py @@ -396,16 +396,6 @@ #op = op.copy_and_change(rop.SETFIELD_RAW) self.newops.append(op) - def handle_write_barrier_setinteriorfield(self, op): - val = op.getarg(0) - if val not in self.write_barrier_applied: - v = op.getarg(2) - if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and - bool(v.value)): # store a non-NULL - self.gen_write_barrier(val) - #op = op.copy_and_change(rop.SETINTERIORFIELD_RAW) - self.newops.append(op) - def handle_write_barrier_setarrayitem(self, op): val = op.getarg(0) if val not in self.write_barrier_applied: @@ -413,9 +403,11 @@ if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and bool(v.value)): # store a non-NULL self.gen_write_barrier_array(val, op.getarg(1)) - #op = op.copy_and_change(rop.SETARRAYITEM_RAW) + #op = op.copy_and_change(rop.SET{ARRAYITEM,INTERIORFIELD}_RAW) self.newops.append(op) + handle_write_barrier_setinteriorfield = handle_write_barrier_setarrayitem + def gen_write_barrier(self, v_base): write_barrier_descr = self.gc_ll_descr.write_barrier_descr args = [v_base] diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py --- a/rpython/jit/backend/llsupport/test/test_rewrite.py +++ b/rpython/jit/backend/llsupport/test/test_rewrite.py @@ -669,7 +669,7 @@ jump(p1, p2) """, """ [p1, p2] - cond_call_gc_wb(p1, descr=wbdescr) + cond_call_gc_wb_array(p1, 0, descr=wbdescr) setinteriorfield_gc(p1, 0, p2, descr=interiorzdescr) jump(p1, p2) """, interiorzdescr=interiorzdescr) From noreply at buildbot.pypy.org Mon Jun 30 18:47:40 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 18:47:40 +0200 (CEST) Subject: [pypy-commit] pypy stmgc-c7: import stmgc/e1df81263680 (branch card-marking) Message-ID: <20140630164740.5600C1D2D06@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stmgc-c7 Changeset: r72293:6060a7db4b02 Date: 2014-06-30 17:58 +0200 http://bitbucket.org/pypy/pypy/changeset/6060a7db4b02/ Log: import stmgc/e1df81263680 (branch card-marking) diff --git a/rpython/translator/stm/src_stm/revision b/rpython/translator/stm/src_stm/revision --- a/rpython/translator/stm/src_stm/revision +++ b/rpython/translator/stm/src_stm/revision @@ -1,1 +1,1 @@ -bde7c7634878 +e1df81263680 diff --git a/rpython/translator/stm/src_stm/stm/core.c b/rpython/translator/stm/src_stm/stm/core.c --- a/rpython/translator/stm/src_stm/stm/core.c +++ b/rpython/translator/stm/src_stm/stm/core.c @@ -236,6 +236,23 @@ return (size >= _STM_MIN_CARD_OBJ_SIZE); } +char _stm_write_slowpath_card_extra(object_t *obj) +{ + /* the PyPy JIT calls this function directly if it finds that an + array doesn't have the GCFLAG_CARDS_SET */ + bool mark_card = obj_should_use_cards(obj); + write_slowpath_common(obj, mark_card); + return mark_card; +} + +char *_stm_write_slowpath_card_extra_base(void) +{ + /* for the PyPy JIT: _stm_write_slowpath_card_extra_base[obj >> 4] + is the byte that must be set to CARD_MARKED. The logic below + does the same, but more explicitly. */ + return (char *)write_locks - WRITELOCK_START + 1; +} + void _stm_write_slowpath_card(object_t *obj, uintptr_t index) { /* If CARDS_SET is not set so far, issue a normal write barrier. @@ -243,8 +260,7 @@ card marking instead. */ if (!(obj->stm_flags & GCFLAG_CARDS_SET)) { - bool mark_card = obj_should_use_cards(obj); - write_slowpath_common(obj, mark_card); + char mark_card = _stm_write_slowpath_card_extra(obj); if (!mark_card) return; } diff --git a/rpython/translator/stm/src_stm/stm/core.h b/rpython/translator/stm/src_stm/stm/core.h --- a/rpython/translator/stm/src_stm/stm/core.h +++ b/rpython/translator/stm/src_stm/stm/core.h @@ -226,10 +226,10 @@ static uint8_t write_locks[WRITELOCK_END - WRITELOCK_START]; enum /* card values for write_locks */ { - CARD_CLEAR = 0, /* card not used at all */ - CARD_MARKED = 100, /* card marked for tracing in the next gc */ - CARD_MARKED_OLD = 101, /* card was marked before, but cleared - in a GC */ + CARD_CLEAR = 0, /* card not used at all */ + CARD_MARKED = _STM_CARD_MARKED, /* card marked for tracing in the next gc */ + CARD_MARKED_OLD = 101, /* card was marked before, but cleared + in a GC */ }; diff --git a/rpython/translator/stm/src_stm/stmgc.h b/rpython/translator/stm/src_stm/stmgc.h --- a/rpython/translator/stm/src_stm/stmgc.h +++ b/rpython/translator/stm/src_stm/stmgc.h @@ -109,6 +109,9 @@ but it's not exposed to C code so far */ void _stm_write_slowpath(object_t *); void _stm_write_slowpath_card(object_t *, uintptr_t); +char _stm_write_slowpath_card_extra(object_t *); +char *_stm_write_slowpath_card_extra_base(void); +#define _STM_CARD_MARKED 100 object_t *_stm_allocate_slowpath(ssize_t); object_t *_stm_allocate_external(ssize_t); void _stm_become_inevitable(const char*); From noreply at buildbot.pypy.org Mon Jun 30 18:47:41 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 18:47:41 +0200 (CEST) Subject: [pypy-commit] pypy stmgc-c7: in-progress Message-ID: <20140630164741.BC55C1D2D06@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stmgc-c7 Changeset: r72294:a006a6263e15 Date: 2014-06-30 18:41 +0200 http://bitbucket.org/pypy/pypy/changeset/a006a6263e15/ Log: in-progress diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py --- a/rpython/jit/backend/llsupport/gc.py +++ b/rpython/jit/backend/llsupport/gc.py @@ -302,9 +302,11 @@ self.extract_flag_byte(self.jit_wb_cards_set)) # # the x86 backend uses the following "accidental" facts to - # avoid one instruction: - assert self.jit_wb_cards_set_byteofs == self.jit_wb_if_flag_byteofs - assert self.jit_wb_cards_set_singlebyte == -0x80 + # avoid one instruction (not with stm): + if not gc_ll_descr.stm: + assert (self.jit_wb_cards_set_byteofs == + self.jit_wb_if_flag_byteofs) + assert self.jit_wb_cards_set_singlebyte == -0x80 else: self.jit_wb_cards_set = 0 diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py --- a/rpython/jit/backend/llsupport/rewrite.py +++ b/rpython/jit/backend/llsupport/rewrite.py @@ -416,18 +416,14 @@ self.gen_write_barrier(val, op.stm_location) self.newops.append(op) - def handle_write_barrier_setinteriorfield(self, op): - val = op.getarg(0) - if self.must_apply_write_barrier(val, op.getarg(2)): - self.gen_write_barrier(val, op.stm_location) - self.newops.append(op) - def handle_write_barrier_setarrayitem(self, op): val = op.getarg(0) if self.must_apply_write_barrier(val, op.getarg(2)): self.gen_write_barrier_array(val, op.getarg(1), op.stm_location) self.newops.append(op) + handle_write_barrier_setinteriorfield = handle_write_barrier_setarrayitem + def gen_write_barrier(self, v_base, stm_location): write_barrier_descr = self.gc_ll_descr.write_barrier_descr args = [v_base] diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py --- a/rpython/jit/backend/llsupport/test/test_rewrite.py +++ b/rpython/jit/backend/llsupport/test/test_rewrite.py @@ -678,7 +678,7 @@ jump(p1, p2) """, """ [p1, p2] - cond_call_gc_wb(p1, descr=wbdescr) + cond_call_gc_wb_array(p1, 0, descr=wbdescr) setinteriorfield_gc(p1, 0, p2, descr=interiorzdescr) jump(p1, p2) """, interiorzdescr=interiorzdescr) diff --git a/rpython/jit/backend/llsupport/test/test_stmrewrite.py b/rpython/jit/backend/llsupport/test/test_stmrewrite.py --- a/rpython/jit/backend/llsupport/test/test_stmrewrite.py +++ b/rpython/jit/backend/llsupport/test/test_stmrewrite.py @@ -51,7 +51,7 @@ self.gc_ll_descr = GcLLDescr_framework(gcdescr, None, None, None, really_not_translated=True) self.gc_ll_descr.write_barrier_descr.has_write_barrier_from_array = ( - lambda cpu: False) # for now + lambda cpu: True) self.gc_ll_descr.minimal_size_in_nursery = 16 # class FakeCPU(BaseFakeCPU): @@ -515,9 +515,9 @@ jump() """, """ [p1, i1, p2, p3, i3, p4] - cond_call_gc_wb(p1, descr=wbdescr) + cond_call_gc_wb_array(p1, i1, descr=wbdescr) setarrayitem_gc(p1, i1, p2, descr=adescr) - cond_call_gc_wb(p3, descr=wbdescr) + cond_call_gc_wb_array(p3, i3, descr=wbdescr) setarrayitem_gc(p3, i3, p4, descr=adescr) jump() @@ -532,9 +532,10 @@ jump() """, """ [p1, p2, i2, p3, i3] - cond_call_gc_wb(p1, descr=wbdescr) + cond_call_gc_wb_array(p1, i2, descr=wbdescr) setarrayitem_gc(p1, i2, p2, descr=adescr) i4 = read_timestamp() + cond_call_gc_wb_array(p1, i3, descr=wbdescr) setarrayitem_gc(p1, i3, p3, descr=adescr) jump() @@ -549,9 +550,10 @@ jump() """, """ [p1, p2, i2, p3, i3] - cond_call_gc_wb(p1, descr=wbdescr) + cond_call_gc_wb_array(p1, i2, descr=wbdescr) setinteriorfield_gc(p1, i2, p2, descr=intzdescr) i4 = read_timestamp() + cond_call_gc_wb_array(p1, i3, descr=wbdescr) setinteriorfield_gc(p1, i3, p3, descr=intzdescr) jump() @@ -1115,7 +1117,7 @@ setfield_gc(p1, 8111, descr=tiddescr) setfield_gc(p1, 5, descr=clendescr) label(p1, i2, p3) - cond_call_gc_wb(p1, descr=wbdescr) + cond_call_gc_wb_array(p1, i2, descr=wbdescr) setarrayitem_gc(p1, i2, p3, descr=cdescr) """) diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -451,13 +451,17 @@ # A final TEST8 before the RET, for the caller. Careful to # not follow this instruction with another one that changes # the status of the CPU flags! - if IS_X86_32: - mc.MOV_rs(eax.value, 3*WORD) + if stm: + mc.TEST8_rr(eax.value | BYTE_REG_FLAG, + eax.value | BYTE_REG_FLAG) else: - mc.MOV_rs(eax.value, WORD) - mc.TEST8(addr_add_const(self.SEGMENT_GC, eax, - descr.jit_wb_if_flag_byteofs), - imm(-0x80)) + if IS_X86_32: + mc.MOV_rs(eax.value, 3*WORD) + else: + mc.MOV_rs(eax.value, WORD) + mc.TEST8(addr_add_const(self.SEGMENT_GC, eax, + descr.jit_wb_if_flag_byteofs), + imm(-0x80)) # if not for_frame: @@ -2218,15 +2222,17 @@ cls = self.cpu.gc_ll_descr.has_write_barrier_class() assert cls is not None and isinstance(descr, cls) # + stm = self.cpu.gc_ll_descr.stm card_marking = False mask = descr.jit_wb_if_flag_singlebyte if array and descr.jit_wb_cards_set != 0: - # assumptions the rest of the function depends on: - assert (descr.jit_wb_cards_set_byteofs == - descr.jit_wb_if_flag_byteofs) - assert descr.jit_wb_cards_set_singlebyte == -0x80 + if not stm: + # assumptions the rest of the function depends on: + assert (descr.jit_wb_cards_set_byteofs == + descr.jit_wb_if_flag_byteofs) + assert descr.jit_wb_cards_set_singlebyte == -0x80 + mask = descr.jit_wb_if_flag_singlebyte | -0x80 card_marking = True - mask = descr.jit_wb_if_flag_singlebyte | -0x80 # loc_base = arglocs[0] if is_frame: @@ -2242,10 +2248,18 @@ # for cond_call_gc_wb_array, also add another fast path: # if GCFLAG_CARDS_SET, then we can just set one bit and be done if card_marking: - # GCFLAG_CARDS_SET is in this byte at 0x80, so this fact can - # been checked by the status flags of the previous TEST8 - mc.J_il8(rx86.Conditions['S'], 0) # patched later - js_location = mc.get_relative_pos() + if stm: + loc2 = addr_add_const(self.SEGMENT_GC, loc_base, + descr.jit_wb_cards_set_byteofs) + mask2 = descr.jit_wb_cards_set_singlebyte + mc.TEST8(loc2, imm(mask2)) + mc.J_il8(rx86.Conditions['NZ'], 0) # patched later + js_location = mc.get_relative_pos() + else: + # GCFLAG_CARDS_SET is in this byte at 0x80, so this fact can + # been checked by the status flags of the previous TEST8 + mc.J_il8(rx86.Conditions['S'], 0) # patched later + js_location = mc.get_relative_pos() else: js_location = 0 @@ -2266,7 +2280,7 @@ # if not is_frame: mc.PUSH(loc_base) - if self.cpu.gc_ll_descr.stm: + if stm: # get the num and ref components of the stm_location, and # push them to the stack. It's 16 bytes, so alignment is # still ok. The one or three words pushed here are removed @@ -2286,7 +2300,10 @@ # The helper ends again with a check of the flag in the object. # So here, we can simply write again a 'JNS', which will be # taken if GCFLAG_CARDS_SET is still not set. - mc.J_il8(rx86.Conditions['NS'], 0) # patched later + if stm: + mc.J_il8(rx86.Conditions['Z'], 0) # patched later + else: + mc.J_il8(rx86.Conditions['NS'], 0) # patched later jns_location = mc.get_relative_pos() # # patch the JS above @@ -2297,7 +2314,56 @@ # case GCFLAG_CARDS_SET: emit a few instructions to do # directly the card flag setting loc_index = arglocs[1] - if isinstance(loc_index, RegLoc): + + if stm: + # must write the value CARD_MARKED into the byte at: + # write_locks_base + (object >> 4) + (index / CARD_SIZE) + # + write_locks_base = rstm.adr__stm_write_slowpath_card_extra_base + if rstm.CARD_SIZE == 32: + card_bits = 5 + elif rstm.CARD_SIZE == 64: + card_bits = 6 + elif rstm.CARD_SIZE == 128: + card_bits = 7 + else: + raise AssertionError("CARD_SIZE should be 32/64/128") + # + # idea: mov r11, loc_base # the object + # and r11, ~15 # align + # lea r11, [loc_index + r11<<(card_bits-4)] + # shr r11, card_bits + # mov [r11 + write_locks_base], card_marked + r11 = X86_64_SCRATCH_REG + if isinstance(loc_index, RegLoc): + if isinstance(loc_base, RegLoc): + mc.MOV_rr(r11.value, loc_base.value) + mc.AND_ri(r11.value, ~15) + else: + assert isinstance(loc_base, ImmedLoc) + mc.MOV_ri(r11.value, loc_base.value & ~15) # 32/64bit + mc.LEA_ra(r11.value, (self.SEGMENT_NO, + loc_index.value, + r11.value, + card_bits - 4, + 0)) + mc.SHR_ri(r11.value, card_bits) + else: + # XXX these cases could be slightly more optimized + assert isinstance(loc_index, ImmedLoc) + cardindex = loc_index.value >> card_bits + if isinstance(loc_base, RegLoc): + mc.MOV_ri(r11.value, cardindex << 4) # 32/64bit + mc.ADD_rr(r11.value, loc_base.value) + mc.SHR_ri(r11.value, 4) + else: + mc.MOV_ri(r11.value, cardindex + (loc_base.value >> 4)) + # + assert rx86.fits_in_32bits(write_locks_base), "XXX" + mc.MOV8_mi((self.SEGMENT_NO, r11.value, write_locks_base), + rstm.CARD_MARKED) + + elif isinstance(loc_index, RegLoc): if IS_X86_64 and isinstance(loc_base, RegLoc): # copy loc_index into r11 tmp1 = X86_64_SCRATCH_REG diff --git a/rpython/memory/gc/stmgc.py b/rpython/memory/gc/stmgc.py --- a/rpython/memory/gc/stmgc.py +++ b/rpython/memory/gc/stmgc.py @@ -34,6 +34,7 @@ VISIT_FPTR = lltype.Ptr(lltype.FuncType([llmemory.Address], lltype.Void)) JIT_WB_IF_FLAG = 0x01 # value of _STM_GCFLAG_WRITE_BARRIER + JIT_WB_CARDS_SET = 0x08 # value of _STM_GCFLAG_CARDS_SET stm_fast_alloc = 66*1024 # value of _STM_FAST_ALLOC in stmgc.h minimal_size_in_nursery = 16 # hard-coded lower limit diff --git a/rpython/memory/gctransform/stmframework.py b/rpython/memory/gctransform/stmframework.py --- a/rpython/memory/gctransform/stmframework.py +++ b/rpython/memory/gctransform/stmframework.py @@ -141,6 +141,12 @@ lltype.Signed, rstm.adr_write_slowpath) hop.genop("cast_int_to_ptr", [c_write_slowpath], resultvar=op.result) + def gct_get_write_barrier_from_array_failing_case(self, hop): + op = hop.spaceop + c_write_slowpath = rmodel.inputconst( + lltype.Signed, rstm.adr_write_slowpath_card_extra) + hop.genop("cast_int_to_ptr", [c_write_slowpath], resultvar=op.result) + def gct_gc_can_move(self, hop): hop.rename('stm_can_move') diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py --- a/rpython/rlib/rstm.py +++ b/rpython/rlib/rstm.py @@ -25,6 +25,12 @@ adr_segment_base = ( CFlexSymbolic('((long)&STM_SEGMENT->segment_base)')) adr_write_slowpath = CFlexSymbolic('((long)&_stm_write_slowpath)') +adr_write_slowpath_card_extra = ( + CFlexSymbolic('((long)&_stm_write_slowpath_card_extra)')) +adr__stm_write_slowpath_card_extra_base = ( + CFlexSymbolic('((long)&_stm_write_slowpath_card_extra_base)')) +CARD_MARKED = CFlexSymbolic('_STM_CARD_MARKED') +CARD_SIZE = CFlexSymbolic('_STM_CARD_SIZE') adr__pypy_stm_become_inevitable = ( CFlexSymbolic('((long)&_pypy_stm_become_inevitable)')) From noreply at buildbot.pypy.org Mon Jun 30 19:22:17 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 19:22:17 +0200 (CEST) Subject: [pypy-commit] pypy default: issue #1803 Message-ID: <20140630172217.6FF611D2E50@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: Changeset: r72295:26aee1f3691d Date: 2014-06-30 19:21 +0200 http://bitbucket.org/pypy/pypy/changeset/26aee1f3691d/ Log: issue #1803 resolved diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -458,6 +458,10 @@ self._check_init(space) return space.call_method(self.w_buffer, "seekable") + def isatty_w(self, space): + self._check_init(space) + return space.call_method(self.w_buffer, "isatty") + def fileno_w(self, space): self._check_init(space) return space.call_method(self.w_buffer, "fileno") @@ -1035,6 +1039,7 @@ readable = interp2app(W_TextIOWrapper.readable_w), writable = interp2app(W_TextIOWrapper.writable_w), seekable = interp2app(W_TextIOWrapper.seekable_w), + isatty = interp2app(W_TextIOWrapper.isatty_w), fileno = interp2app(W_TextIOWrapper.fileno_w), name = GetSetProperty(W_TextIOWrapper.name_get_w), buffer = interp_attrproperty_w("w_buffer", cls=W_TextIOWrapper), diff --git a/pypy/module/_io/test/test_textio.py b/pypy/module/_io/test/test_textio.py --- a/pypy/module/_io/test/test_textio.py +++ b/pypy/module/_io/test/test_textio.py @@ -25,6 +25,12 @@ t = _io.TextIOWrapper(b) assert t.readable() assert t.seekable() + # + class CustomFile(object): + def isatty(self): return 'YES' + readable = writable = seekable = lambda self: False + t = _io.TextIOWrapper(CustomFile()) + assert t.isatty() == 'YES' def test_default_implementations(self): import _io From noreply at buildbot.pypy.org Mon Jun 30 19:53:01 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 19:53:01 +0200 (CEST) Subject: [pypy-commit] pypy stmgc-c7: Typos Message-ID: <20140630175301.426EC1D34D6@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stmgc-c7 Changeset: r72296:49a3796d3798 Date: 2014-06-30 19:52 +0200 http://bitbucket.org/pypy/pypy/changeset/49a3796d3798/ Log: Typos diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py --- a/rpython/jit/backend/llsupport/gc.py +++ b/rpython/jit/backend/llsupport/gc.py @@ -297,7 +297,6 @@ # if hasattr(GCClass, 'JIT_WB_CARDS_SET'): self.jit_wb_cards_set = GCClass.JIT_WB_CARDS_SET - self.jit_wb_card_page_shift = GCClass.JIT_WB_CARD_PAGE_SHIFT self.jit_wb_cards_set_byteofs, self.jit_wb_cards_set_singlebyte = ( self.extract_flag_byte(self.jit_wb_cards_set)) # @@ -307,6 +306,7 @@ assert (self.jit_wb_cards_set_byteofs == self.jit_wb_if_flag_byteofs) assert self.jit_wb_cards_set_singlebyte == -0x80 + self.jit_wb_card_page_shift = GCClass.JIT_WB_CARD_PAGE_SHIFT else: self.jit_wb_cards_set = 0 diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -451,9 +451,9 @@ # A final TEST8 before the RET, for the caller. Careful to # not follow this instruction with another one that changes # the status of the CPU flags! - if stm: - mc.TEST8_rr(eax.value | BYTE_REG_FLAG, - eax.value | BYTE_REG_FLAG) + if self.cpu.gc_ll_descr.stm: + mc.TEST8_rr(eax.value | rx86.BYTE_REG_FLAG, + eax.value | rx86.BYTE_REG_FLAG) else: if IS_X86_32: mc.MOV_rs(eax.value, 3*WORD) From noreply at buildbot.pypy.org Mon Jun 30 20:58:31 2014 From: noreply at buildbot.pypy.org (alex_gaynor) Date: Mon, 30 Jun 2014 20:58:31 +0200 (CEST) Subject: [pypy-commit] pypy default: Use newlist_bytes() in posix.listdir() to save some allocations Message-ID: <20140630185831.640721D3493@cobra.cs.uni-duesseldorf.de> Author: Alex Gaynor Branch: Changeset: r72297:e3c95427844d Date: 2014-06-30 11:57 -0700 http://bitbucket.org/pypy/pypy/changeset/e3c95427844d/ Log: Use newlist_bytes() in posix.listdir() to save some allocations diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -578,13 +578,13 @@ except OperationError, e: # fall back to the original byte string result_w[i] = w_bytes + return space.newlist(result_w) else: dirname = space.str0_w(w_dirname) result = rposix.listdir(dirname) - result_w = [space.wrap(s) for s in result] + return space.newlist_bytes(result) except OSError, e: raise wrap_oserror2(space, e, w_dirname) - return space.newlist(result_w) def pipe(space): "Create a pipe. Returns (read_end, write_end)." From noreply at buildbot.pypy.org Mon Jun 30 21:07:19 2014 From: noreply at buildbot.pypy.org (arigo) Date: Mon, 30 Jun 2014 21:07:19 +0200 (CEST) Subject: [pypy-commit] pypy stmgc-c7: Oups Message-ID: <20140630190719.6C25A1D34D6@cobra.cs.uni-duesseldorf.de> Author: Armin Rigo Branch: stmgc-c7 Changeset: r72298:845e847fca2a Date: 2014-06-30 21:06 +0200 http://bitbucket.org/pypy/pypy/changeset/845e847fca2a/ Log: Oups diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py --- a/rpython/rlib/rstm.py +++ b/rpython/rlib/rstm.py @@ -28,7 +28,7 @@ adr_write_slowpath_card_extra = ( CFlexSymbolic('((long)&_stm_write_slowpath_card_extra)')) adr__stm_write_slowpath_card_extra_base = ( - CFlexSymbolic('((long)&_stm_write_slowpath_card_extra_base)')) + CFlexSymbolic('((long)_stm_write_slowpath_card_extra_base())')) CARD_MARKED = CFlexSymbolic('_STM_CARD_MARKED') CARD_SIZE = CFlexSymbolic('_STM_CARD_SIZE') From noreply at buildbot.pypy.org Mon Jun 30 21:35:13 2014 From: noreply at buildbot.pypy.org (alex_gaynor) Date: Mon, 30 Jun 2014 21:35:13 +0200 (CEST) Subject: [pypy-commit] pypy default: Temporary translation fix Message-ID: <20140630193513.C9C351D2D13@cobra.cs.uni-duesseldorf.de> Author: Alex Gaynor Branch: Changeset: r72299:9d5d4149f2a5 Date: 2014-06-30 12:34 -0700 http://bitbucket.org/pypy/pypy/changeset/9d5d4149f2a5/ Log: Temporary translation fix diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -582,7 +582,7 @@ else: dirname = space.str0_w(w_dirname) result = rposix.listdir(dirname) - return space.newlist_bytes(result) + return space.newlist_bytes(result[:]) except OSError, e: raise wrap_oserror2(space, e, w_dirname) From noreply at buildbot.pypy.org Mon Jun 30 21:43:45 2014 From: noreply at buildbot.pypy.org (alex_gaynor) Date: Mon, 30 Jun 2014 21:43:45 +0200 (CEST) Subject: [pypy-commit] pypy default: Yet Another Temporary Translation Fix Message-ID: <20140630194345.19B9C1D2D13@cobra.cs.uni-duesseldorf.de> Author: Alex Gaynor Branch: Changeset: r72300:8079e9df3d8c Date: 2014-06-30 12:43 -0700 http://bitbucket.org/pypy/pypy/changeset/8079e9df3d8c/ Log: Yet Another Temporary Translation Fix diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -582,7 +582,9 @@ else: dirname = space.str0_w(w_dirname) result = rposix.listdir(dirname) - return space.newlist_bytes(result[:]) + # The list comprehension is a workaround for an obscure translation + # bug. + return space.newlist_bytes([x for x in result]) except OSError, e: raise wrap_oserror2(space, e, w_dirname)