Branch: extradoc Changeset: r897:df872ae704e1 Date: 2017-08-21 13:51 +0300 http://bitbucket.org/pypy/pypy.org/changeset/df872ae704e1/ Log: remove outdated mention of pypy/numpy, regenerate diff --git a/compat.html b/compat.html --- a/compat.html +++ b/compat.html @@ -97,7 +97,6 @@

cPickle, ctypes, datetime, dbm, _functools, grp, readline, resource, sqlite3, syslog

All modules that are pure python in CPython of course work.

Numpy support is not complete. We maintain our own fork of numpy for now, further instructions can be found at https://bitbucker.org/pypy/numpy.git.

Python libraries known to work under PyPy (the list is not exhaustive). A fuller list is available.

Python2.7 compatible PyPy 5.8.0

Linux x86 binary (32bit, tar.bz2 built on Ubuntu 12.04 - 14.04) (see [1] below)
Linux x86-64 binary (64bit, tar.bz2 built on Ubuntu 12.04 - 14.04) (see [1] below)
Linux x86 binary (32bit, tar.bz2 built on Ubuntu 12.04 - 16.04) (see [1] below)
Linux x86-64 binary (64bit, tar.bz2 built on Ubuntu 12.04 - 16.04) (see [1] below)
ARM Hardfloat Linux binary (ARMHF/gnueabihf, tar.bz2, Raspbian) (see [1] below)
ARM Hardfloat Linux binary (ARMHF/gnueabihf, tar.bz2, Ubuntu Raring) (see [1] below)
ARM Softfloat Linux binary (ARMEL/gnueabi, tar.bz2, Ubuntu Precise) (see [1] below)

Linux x86-64 binary (64bit, tar.bz2 built on Ubuntu 12.04 - 14.04) (see [1] below)
Linux x86-64 binary (64bit, tar.bz2 built on Ubuntu 12.04 - 16.04) (see [1] below)
Source (tar.bz2)
Source (zip)
All our downloads, including previous versions. We also have a @@ -166,7 +166,7 @@
This is a special version of PyPy! See the Software Transactional Memory (STM) documentation.
- PyPy-STM Linux x86-64 binary (64bit, tar.bz2 built on Ubuntu 12.04 - 14.04)
- PyPy-STM Linux x86-64 binary (64bit, tar.bz2 built on Ubuntu 12.04 - 16.04)

diff --git a/source/compat.txt b/source/compat.txt --- a/source/compat.txt +++ b/source/compat.txt @@ -39,10 +39,6 @@ All modules that are pure python in CPython of course work. -Numpy support is not complete. We maintain our own fork of numpy for now, further instructions can be found at `https://bitbucker.org/pypy/numpy.git`__. - -.. __: https://bitbucket.org/pypy/numpy.git - Python libraries known to work under PyPy (the list is not exhaustive). A `fuller list`_ is available. From pypy.commits at gmail.com Mon Aug 21 06:58:41 2017 From: pypy.commits at gmail.com (tobweber) Date: Mon, 21 Aug 2017 03:58:41 -0700 (PDT) Subject: [pypy-commit] stmgc c8-efficient-serial-execution-master: Merge timing events enum so that all branches share the same interface with print_stm_log.py Message-ID: <599abce1.579c1c0a.492e0.d8b1@mx.google.com> Author: Tobias Weber Branch: c8-efficient-serial-execution-master Changeset: r2151:4a71ee20626e Date: 2017-08-05 14:17 +0200 http://bitbucket.org/pypy/stmgc/changeset/4a71ee20626e/ Log: Merge timing events enum so that all branches share the same interface with print_stm_log.py diff --git a/c8/stmgc.h b/c8/stmgc.h --- a/c8/stmgc.h +++ b/c8/stmgc.h @@ -201,7 +201,7 @@ /* ==================== PUBLIC API ==================== */ /* Number of segments (i.e. how many transactions can be executed in - parallel, in maximum). If you try to start transactions in more + parallel, at maximum). If you try to start transactions in more threads than the number of segments, it will block, waiting for the next segment to become free. */ @@ -574,21 +574,49 @@ STM_GC_MAJOR_START, STM_GC_MAJOR_DONE, + /* execution duration profiling events */ + STM_WARMUP_COMPLETE, + + STM_DURATION_START_TRX, + STM_DURATION_WRITE_GC_ONLY, + STM_DURATION_WRITE_SLOWPATH, + STM_DURATION_VALIDATION, + STM_DURATION_CREATE_CLE, + STM_DURATION_COMMIT_EXCEPT_GC, + STM_DURATION_MINOR_GC, + STM_DURATION_MAJOR_GC_LOG_ONLY, + STM_DURATION_MAJOR_GC_FULL, + + STM_SINGLE_THREAD_MODE_ON, + STM_SINGLE_THREAD_MODE_OFF, + STM_SINGLE_THREAD_MODE_ADAPTIVE, + _STM_EVENT_N }; -#define STM_EVENT_NAMES \ - "transaction start", \ - "transaction commit", \ - "transaction abort", \ - "contention write read", \ - "wait free segment", \ - "wait other inevitable", \ - "wait done", \ - "gc minor start", \ - "gc minor done", \ - "gc major start", \ - "gc major done" +#define STM_EVENT_NAMES \ + "transaction start", \ + "transaction commit", \ + "transaction abort", \ + "contention write read", \ + "wait free segment", \ + "wait other inevitable", \ + "wait done", \ + "gc minor start", \ + "gc minor done", \ + "gc major start", \ + "gc major done", \ + /* names of duration events */ \ + "marks completion of benchmark warm up phase" \ + "duration of transaction start", \ + "duration of gc due to write", \ + "duration of write slowpath", \ + "duration of validation", \ + "duration of commit log entry creation", \ + "duration of commit except gc", \ + "duration of minor gc", \ + "duration of major gc doing log clean up only", \ + "duration of full major gc" /* The markers pushed in the shadowstack are an odd number followed by a regular object pointer. */ From pypy.commits at gmail.com Mon Aug 21 06:58:43 2017 From: pypy.commits at gmail.com (tobweber) Date: Mon, 21 Aug 2017 03:58:43 -0700 (PDT) Subject: [pypy-commit] stmgc c8-efficient-serial-execution-master: Merge TCP style optimization Message-ID: <599abce3.4981df0a.601c0.7fa6@mx.google.com> Author: Tobias Weber Branch: c8-efficient-serial-execution-master Changeset: r2152:d56fd821ed46 Date: 2017-08-21 12:10 +0200 http://bitbucket.org/pypy/stmgc/changeset/d56fd821ed46/ Log: Merge TCP style optimization diff --git a/c8/stm/core.c b/c8/stm/core.c --- a/c8/stm/core.c +++ b/c8/stm/core.c @@ -1117,11 +1117,9 @@ _do_start_transaction(tl); STM_PSEGMENT->commit_if_not_atomic = false; - if (repeat_count == 0) { /* else, 'nursery_mark' was already set - in abort_data_structures_from_segment_num() */ - STM_SEGMENT->nursery_mark = ((stm_char *)_stm_nursery_start + - stm_fill_mark_nursery_bytes); - } + STM_SEGMENT->nursery_mark = ((stm_char *)_stm_nursery_start + + stm_get_transaction_length(tl)); + return repeat_count; } @@ -1304,6 +1302,8 @@ s_mutex_unlock(); + stm_transaction_length_handle_validation(thread_local_for_logging, false); + /* between transactions, call finalizers. this will execute a transaction itself */ if (tl != NULL) @@ -1468,22 +1468,6 @@ if (pseg->active_queues) queues_deactivate_all(pseg, /*at_commit=*/false); - - /* Set the next nursery_mark: first compute the value that - nursery_mark must have had at the start of the aborted transaction */ - stm_char *old_mark =pseg->pub.nursery_mark + pseg->total_throw_away_nursery; - - /* This means that the limit, in term of bytes, was: */ - uintptr_t old_limit = old_mark - (stm_char *)_stm_nursery_start; - - /* If 'total_throw_away_nursery' is smaller than old_limit, use that */ - if (pseg->total_throw_away_nursery < old_limit) - old_limit = pseg->total_throw_away_nursery; - - /* Now set the new limit to 90% of the old limit */ - pseg->pub.nursery_mark = ((stm_char *)_stm_nursery_start + - (uintptr_t)(old_limit * 0.9)); - #ifdef STM_NO_AUTOMATIC_SETJMP did_abort = 1; #endif @@ -1518,6 +1502,8 @@ tl->self_or_0_if_atomic = (intptr_t)tl; /* clear the 'atomic' flag */ STM_PSEGMENT->atomic_nesting_levels = 0; + stm_transaction_length_handle_validation(tl, true); + if (tl->mem_clear_on_abort) memset(tl->mem_clear_on_abort, 0, tl->mem_bytes_to_clear_on_abort); if (tl->mem_reset_on_abort) { diff --git a/c8/stm/nursery.c b/c8/stm/nursery.c --- a/c8/stm/nursery.c +++ b/c8/stm/nursery.c @@ -4,6 +4,8 @@ #endif #include "finalizer.h" +#include +#include /************************************************************/ @@ -13,14 +15,79 @@ static uintptr_t _stm_nursery_start; +#define DEFAULT_FILL_MARK_NURSERY_BYTES (NURSERY_SIZE / 4) -#define DEFAULT_FILL_MARK_NURSERY_BYTES (NURSERY_SIZE / 4) +// corresponds to ~4 GB +#define LARGE_FILL_MARK_NURSERY_BYTES 0x100000000L -uintptr_t stm_fill_mark_nursery_bytes = DEFAULT_FILL_MARK_NURSERY_BYTES; +// corresponds to ~4 MB nursery fill +#define STM_DEFAULT_RELATIVE_TRANSACTION_LENGTH (0.001) +// corresponds to ~400 KB nursery fill +#define STM_MIN_RELATIVE_TRANSACTION_LENGTH (0.0001) + +#define BACKOFF_COUNT (20) +#define BACKOFF_MULTIPLIER (BACKOFF_COUNT / -log10(STM_MIN_RELATIVE_TRANSACTION_LENGTH)) + +static inline void set_backoff(stm_thread_local_t *tl, double rel_trx_len) { + /* the shorter the trx, the more backoff: + think a*x + b = backoff, x := -log(rel-trx-len), + backoff is + b at default trx length, + linear decrease to b at max trx length */ + const int b = 5; + int new_backoff = (int)((BACKOFF_MULTIPLIER * -log10(rel_trx_len)) + b); + tl->transaction_length_backoff = new_backoff; + // printf("thread %d, backoff %d\n", tl->thread_local_counter, tl->transaction_length_backoff); + tl->linear_transaction_length_increment = rel_trx_len / new_backoff; +} + +static inline double get_new_transaction_length(stm_thread_local_t *tl, bool aborts) { + const int multiplier = 2; + double previous = tl->relative_transaction_length; + double new; + if (aborts) { + new = previous / multiplier; + if (new < STM_MIN_RELATIVE_TRANSACTION_LENGTH) { + new = STM_MIN_RELATIVE_TRANSACTION_LENGTH; + } + set_backoff(tl, new); + } else if (tl->transaction_length_backoff == 0) { + // backoff counter is zero, exponential increase up to 1 + new = previous * multiplier; + if (new > 1) { + new = 1; + } + if (tl->linear_transaction_length_increment != 0) { + // thread had to abort before: slow start + set_backoff(tl, new); + } + } else { // not abort and backoff != 0 + // in backoff, linear increase up to 1 + new = previous + tl->linear_transaction_length_increment; + if (new > 1) { + new = 1; + } + tl->transaction_length_backoff -= 1; + } + return new; +} + +static inline void stm_transaction_length_handle_validation(stm_thread_local_t *tl, bool aborts) { + tl->relative_transaction_length = get_new_transaction_length(tl, aborts); +} + +static inline uintptr_t stm_get_transaction_length(stm_thread_local_t *tl) { + double relative_additional_length = tl->relative_transaction_length; + publish_custom_value_event( + relative_additional_length, STM_SINGLE_THREAD_MODE_ADAPTIVE); + uintptr_t result = + (uintptr_t)(LARGE_FILL_MARK_NURSERY_BYTES * relative_additional_length); + // printf("%020" PRIxPTR "\n", result); + return result; +} + /************************************************************/ - static void setup_nursery(void) { assert(_STM_FAST_ALLOC <= NURSERY_SIZE); diff --git a/c8/stm/nursery.h b/c8/stm/nursery.h --- a/c8/stm/nursery.h +++ b/c8/stm/nursery.h @@ -56,4 +56,7 @@ static inline struct object_s *mark_loc(object_t *obj); static inline bool _is_from_same_transaction(object_t *obj); +static inline void stm_transaction_length_handle_validation(stm_thread_local_t *tl, bool aborts); +static inline uintptr_t stm_get_transaction_length(stm_thread_local_t *tl); + #endif diff --git a/c8/stm/setup.c b/c8/stm/setup.c --- a/c8/stm/setup.c +++ b/c8/stm/setup.c @@ -245,6 +245,12 @@ numbers automatically. */ tl->last_associated_segment_num = num + 1; tl->thread_local_counter = ++thread_local_counters; + + /* init adaptive transaction length mode */ + tl->relative_transaction_length = STM_DEFAULT_RELATIVE_TRANSACTION_LENGTH; + tl->transaction_length_backoff = 0; + tl->linear_transaction_length_increment = 0; + *_get_cpth(tl) = pthread_self(); _init_shadow_stack(tl); set_gs_register(get_segment_base(num + 1)); diff --git a/c8/stm/sync.c b/c8/stm/sync.c --- a/c8/stm/sync.c +++ b/c8/stm/sync.c @@ -176,6 +176,16 @@ /************************************************************/ +static uint8_t number_of_segments_in_use(void) { + uint8_t result = 0; + int num; + for (num = 1; num < NB_SEGMENTS; num++) { + if (sync_ctl.in_use1[num] > 0) { + result++; + } + } + return result; +} #if 0 void stm_wait_for_current_inevitable_transaction(void) @@ -202,7 +212,6 @@ } #endif - static void acquire_thread_segment(stm_thread_local_t *tl) { /* This function acquires a segment for the currently running thread, diff --git a/c8/stm/sync.h b/c8/stm/sync.h --- a/c8/stm/sync.h +++ b/c8/stm/sync.h @@ -22,6 +22,7 @@ static void set_gs_register(char *value); static void ensure_gs_register(long segnum); +static uint8_t number_of_segments_in_use(void); /* acquire and release one of the segments for running the given thread (must have the mutex acquired!) */ diff --git a/c8/stmgc.h b/c8/stmgc.h --- a/c8/stmgc.h +++ b/c8/stmgc.h @@ -88,6 +88,13 @@ struct stm_thread_local_s *prev, *next; intptr_t self_or_0_if_atomic; void *creating_pthread[2]; + /* == adaptive single thread mode == */ + /* factor that is multiplied with max transaction length before the start of the next transaction on this thread */ + double relative_transaction_length; + /* when zero, transaction length may increase exponentially, otherwise transaction length may only increase linearly. is (re-)set to some value upon abort and counted down until zero upon successful validation. */ + int transaction_length_backoff; + /* during the backoff, transaction length may increase linearly by this increment on every successful validation */ + double linear_transaction_length_increment; } stm_thread_local_t; From pypy.commits at gmail.com Mon Aug 21 06:58:45 2017 From: pypy.commits at gmail.com (tobweber) Date: Mon, 21 Aug 2017 03:58:45 -0700 (PDT) Subject: [pypy-commit] stmgc c8-efficient-serial-execution-master: Fix trx length update depends on instrumentation for thread local reference and remove logging of trx length Message-ID: <599abce5.8c8f1c0a.fcf57.08c6@mx.google.com> Author: Tobias Weber Branch: c8-efficient-serial-execution-master Changeset: r2153:cadbddf81079 Date: 2017-08-21 12:40 +0200 http://bitbucket.org/pypy/stmgc/changeset/cadbddf81079/ Log: Fix trx length update depends on instrumentation for thread local reference and remove logging of trx length diff --git a/c8/stm/core.c b/c8/stm/core.c --- a/c8/stm/core.c +++ b/c8/stm/core.c @@ -1255,6 +1255,8 @@ bool was_inev = STM_PSEGMENT->transaction_state == TS_INEVITABLE; _validate_and_add_to_commit_log(); + + stm_thread_local_t *tl_for_trx_len = STM_SEGMENT->running_thread; if (external) { /* from this point on, unlink the original 'stm_thread_local_t *' from its segment. Better do it as soon as possible, because @@ -1302,7 +1304,7 @@ s_mutex_unlock(); - stm_transaction_length_handle_validation(thread_local_for_logging, false); + stm_transaction_length_handle_validation(tl_for_trx_len, false); /* between transactions, call finalizers. this will execute a transaction itself */ diff --git a/c8/stm/nursery.c b/c8/stm/nursery.c --- a/c8/stm/nursery.c +++ b/c8/stm/nursery.c @@ -77,8 +77,6 @@ static inline uintptr_t stm_get_transaction_length(stm_thread_local_t *tl) { double relative_additional_length = tl->relative_transaction_length; - publish_custom_value_event( - relative_additional_length, STM_SINGLE_THREAD_MODE_ADAPTIVE); uintptr_t result = (uintptr_t)(LARGE_FILL_MARK_NURSERY_BYTES * relative_additional_length); // printf("%020" PRIxPTR "\n", result); From pypy.commits at gmail.com Mon Aug 21 09:28:00 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 21 Aug 2017 06:28:00 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: tweak: pick the longest-living useless variable Message-ID: <599adfe0.86acdf0a.99dd0.295f@mx.google.com> Author: Carl Friedrich Bolz Branch: regalloc-playground Changeset: r92198:dfada6cd5c1a Date: 2017-08-21 15:27 +0200 http://bitbucket.org/pypy/pypy/changeset/dfada6cd5c1a/ Log: tweak: pick the longest-living useless variable diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -420,8 +420,8 @@ """ cur_max_age = -1 candidate = None - # YYY we should pick a variable to spill that is only used in failargs - # from now on + cur_max_age_failargs = -1 + candidate_from_failargs = None for next in self.reg_bindings: reg = self.reg_bindings[next] if next in forbidden_vars: @@ -434,18 +434,22 @@ if need_lower_byte and reg in self.no_lower_byte_regs: continue lifetime = self.longevity[next] + max_age = lifetime.last_usage if lifetime.is_last_real_use_before(self.position): # this variable has no "real" use as an argument to an op left # it is only used in failargs, and maybe in a jump. spilling is # fine - return next - max_age = lifetime.last_usage + if cur_max_age_failargs < max_age: + cur_max_age_failargs = max_age + candidate_from_failargs = next if cur_max_age < max_age: cur_max_age = max_age candidate = next - if candidate is None: - raise NoVariableToSpill - return candidate + if candidate_from_failargs is not None: + return candidate_from_failargs + if candidate is not None: + return candidate + raise NoVariableToSpill def force_allocate_reg(self, v, forbidden_vars=[], selected_reg=None, need_lower_byte=False): diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -397,9 +397,9 @@ def test_spill_useless_vars_first(self): b0, b1, b2, b3, b4, b5 = newboxes(0, 1, 2, 3, 4, 5) - longevity = {b0: Lifetime(0, 5), b1: Lifetime(0, 5), - # b3 becomes useless but b2 lives longer - b3: Lifetime(0, 5, 3), b2: Lifetime(0, 6), + longevity = {b0: Lifetime(0, 5), b1: Lifetime(0, 10), + # b2 and b3 become useless but b3 lives longer + b3: Lifetime(0, 7, 3), b2: Lifetime(0, 6, 3), b4: Lifetime(4, 5), b5: Lifetime(4, 7)} fm = TFrameManager() asm = MockAsm() From pypy.commits at gmail.com Mon Aug 21 14:55:59 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 21 Aug 2017 11:55:59 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: refactoring: compute Lifetime objects directly Message-ID: <599b2cbf.42a7df0a.b750.6883@mx.google.com> Author: Carl Friedrich Bolz Branch: regalloc-playground Changeset: r92199:a163b0deca41 Date: 2017-08-21 16:24 +0200 http://bitbucket.org/pypy/pypy/changeset/a163b0deca41/ Log: refactoring: compute Lifetime objects directly diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -806,8 +806,11 @@ assert op.numargs() == 1 return [self.loc(op.getarg(0))] +UNDEF_POS = -42 + class Lifetime(object): - def __init__(self, definition_pos, last_usage, last_real_usage=-42): + def __init__(self, definition_pos=UNDEF_POS, last_usage=UNDEF_POS, + last_real_usage=UNDEF_POS): # all positions are indexes into the operations list # the position where the variable is defined @@ -815,7 +818,7 @@ # the position where the variable is last used. this includes failargs # and jumps self.last_usage = last_usage - if last_real_usage == -42: + if last_real_usage == UNDEF_POS: last_real_usage = last_usage # last *real* usage, ie as an argument to an operation # after last_real_usage and last_usage it does not matter whether the @@ -832,49 +835,41 @@ # compute a dictionary that maps variables to Lifetime information # if a variable is not in the dictionary, it's operation is dead because # it's side-effect-free and the result is unused - last_used = {} - last_real_usage = {} + longevity = {} for i in range(len(operations)-1, -1, -1): op = operations[i] - if op.type != 'v': - if op not in last_used and rop.has_no_side_effect(op.opnum): + opnum = op.getopnum() + if op not in longevity: + if op.type != 'v' and rop.has_no_side_effect(opnum): + # result not used, operation has no side-effect, it can be + # removed continue - opnum = op.getopnum() + longevity[op] = Lifetime(definition_pos=i, last_usage=i) + else: + longevity[op].definition_pos = i for j in range(op.numargs()): arg = op.getarg(j) if isinstance(arg, Const): continue - if arg not in last_used: - last_used[arg] = i + if arg not in longevity: + lifetime = longevity[arg] = Lifetime(last_usage=i) + else: + lifetime = longevity[arg] if opnum != rop.JUMP and opnum != rop.LABEL: - if arg not in last_real_usage: - last_real_usage[arg] = i + if lifetime.last_real_usage == UNDEF_POS: + lifetime.last_real_usage = i if rop.is_guard(op.opnum): for arg in op.getfailargs(): if arg is None: # hole continue assert not isinstance(arg, Const) - if arg not in last_used: - last_used[arg] = i + if arg not in longevity: + longevity[arg] = Lifetime(last_usage=i) # - longevity = {} - for i, arg in enumerate(operations): - if arg.type != 'v' and arg in last_used: - assert not isinstance(arg, Const) - assert i < last_used[arg] - longevity[arg] = Lifetime( - i, last_used[arg], last_real_usage.get(arg, -1)) - del last_used[arg] for arg in inputargs: assert not isinstance(arg, Const) - if arg not in last_used: - longevity[arg] = Lifetime( - -1, -1, -1) - else: - longevity[arg] = Lifetime( - 0, last_used[arg], last_real_usage.get(arg, -1)) - del last_used[arg] - assert len(last_used) == 0 + if arg not in longevity: + longevity[arg] = Lifetime(-1, -1, -1) if not we_are_translated(): produced = {} From pypy.commits at gmail.com Mon Aug 21 14:56:02 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 21 Aug 2017 11:56:02 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: move this case to the tests, where it belongs Message-ID: <599b2cc2.0a561c0a.90a96.2373@mx.google.com> Author: Carl Friedrich Bolz Branch: regalloc-playground Changeset: r92200:eeca1d43c304 Date: 2017-08-21 16:32 +0200 http://bitbucket.org/pypy/pypy/changeset/eeca1d43c304/ Log: move this case to the tests, where it belongs diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -818,8 +818,7 @@ # the position where the variable is last used. this includes failargs # and jumps self.last_usage = last_usage - if last_real_usage == UNDEF_POS: - last_real_usage = last_usage + # last *real* usage, ie as an argument to an operation # after last_real_usage and last_usage it does not matter whether the # variable is stored on the stack diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -1,7 +1,8 @@ import py from rpython.jit.metainterp.history import ConstInt, INT, FLOAT from rpython.jit.backend.llsupport.regalloc import FrameManager, LinkedList -from rpython.jit.backend.llsupport.regalloc import RegisterManager as BaseRegMan, Lifetime +from rpython.jit.backend.llsupport.regalloc import RegisterManager as BaseRegMan,\ + Lifetime as RealLifetime, UNDEF_POS from rpython.jit.metainterp.resoperation import InputArgInt, InputArgRef,\ InputArgFloat @@ -11,6 +12,13 @@ def newrefboxes(count): return [InputArgRef() for _ in range(count)] +def Lifetime(definition_pos=UNDEF_POS, last_usage=UNDEF_POS, + last_real_usage=UNDEF_POS): + if last_real_usage == UNDEF_POS: + last_real_usage = last_usage + return RealLifetime(definition_pos, last_usage, last_real_usage) + + def boxes_and_longevity(num): res = [] longevity = {} From pypy.commits at gmail.com Mon Aug 21 14:56:04 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 21 Aug 2017 11:56:04 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: implement the most common spilling heuristic used in linear scan Message-ID: <599b2cc4.0187df0a.26ae1.c7fb@mx.google.com> Author: Carl Friedrich Bolz Branch: regalloc-playground Changeset: r92201:a961fe5b9c4a Date: 2017-08-21 20:55 +0200 http://bitbucket.org/pypy/pypy/changeset/a961fe5b9c4a/ Log: implement the most common spilling heuristic used in linear scan implementations: spill the variable where the next use as as argument is the furthest away from the current position. diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -416,9 +416,13 @@ def _pick_variable_to_spill(self, v, forbidden_vars, selected_reg=None, need_lower_byte=False): - """ Slightly less silly algorithm. - """ - cur_max_age = -1 + # try to spill a variable that has no further real usages, ie that only + # appears in failargs or in a jump + # if that doesn't exist, spill the variable that has a real_usage that + # is the furthest away from the current position + + cur_max_use_distance = -1 + position = self.position candidate = None cur_max_age_failargs = -1 candidate_from_failargs = None @@ -434,17 +438,19 @@ if need_lower_byte and reg in self.no_lower_byte_regs: continue lifetime = self.longevity[next] - max_age = lifetime.last_usage - if lifetime.is_last_real_use_before(self.position): + if lifetime.is_last_real_use_before(position): # this variable has no "real" use as an argument to an op left # it is only used in failargs, and maybe in a jump. spilling is # fine + max_age = lifetime.last_usage if cur_max_age_failargs < max_age: cur_max_age_failargs = max_age candidate_from_failargs = next - if cur_max_age < max_age: - cur_max_age = max_age - candidate = next + else: + use_distance = lifetime.next_real_usage(position) - position + if cur_max_use_distance < use_distance: + cur_max_use_distance = use_distance + candidate = next if candidate_from_failargs is not None: return candidate_from_failargs if candidate is not None: @@ -809,8 +815,7 @@ UNDEF_POS = -42 class Lifetime(object): - def __init__(self, definition_pos=UNDEF_POS, last_usage=UNDEF_POS, - last_real_usage=UNDEF_POS): + def __init__(self, definition_pos=UNDEF_POS, last_usage=UNDEF_POS): # all positions are indexes into the operations list # the position where the variable is defined @@ -819,16 +824,38 @@ # and jumps self.last_usage = last_usage - # last *real* usage, ie as an argument to an operation - # after last_real_usage and last_usage it does not matter whether the - # variable is stored on the stack - self.last_real_usage = last_real_usage + # *real* usages, ie as an argument to an operation (as opposed to jump + # arguments or in failargs) + self.real_usages = None def is_last_real_use_before(self, position): - return self.last_real_usage <= position + if self.real_usages is None: + return True + return self.real_usages[-1] <= position + + def next_real_usage(self, position): + assert position >= self.definition_pos + # binary search + l = self.real_usages + low = 0 + high = len(l) + while low < high: + mid = low + (high - low) // 2 # no overflow ;-) + if position < l[mid]: + high = mid + else: + low = mid + 1 + return l[low] + + def _check_invariants(self): + assert self.definition_pos <= self.last_usage + if self.real_usages is not None: + assert sorted(self.real_usages) == self.real_usages + assert self.last_usage >= max(self.real_usages) + assert self.definition_pos < min(self.real_usages) def __repr__(self): - return "%s:%s(%s)" % (self.definition_pos, self.last_real_usage, self.last_usage) + return "%s:%s(%s)" % (self.definition_pos, self.real_usages, self.last_usage) def compute_vars_longevity(inputargs, operations): # compute a dictionary that maps variables to Lifetime information @@ -855,8 +882,9 @@ else: lifetime = longevity[arg] if opnum != rop.JUMP and opnum != rop.LABEL: - if lifetime.last_real_usage == UNDEF_POS: - lifetime.last_real_usage = i + if lifetime.real_usages is None: + lifetime.real_usages = [] + lifetime.real_usages.append(i) if rop.is_guard(op.opnum): for arg in op.getfailargs(): if arg is None: # hole @@ -868,7 +896,7 @@ for arg in inputargs: assert not isinstance(arg, Const) if arg not in longevity: - longevity[arg] = Lifetime(-1, -1, -1) + longevity[arg] = Lifetime(-1, -1) if not we_are_translated(): produced = {} @@ -879,6 +907,11 @@ if not isinstance(arg, Const): assert arg in produced produced[op] = None + for lifetime in longevity.itervalues(): + if lifetime.real_usages is not None: + lifetime.real_usages.reverse() + if not we_are_translated(): + lifetime._check_invariants() return longevity diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -13,10 +13,14 @@ return [InputArgRef() for _ in range(count)] def Lifetime(definition_pos=UNDEF_POS, last_usage=UNDEF_POS, - last_real_usage=UNDEF_POS): - if last_real_usage == UNDEF_POS: - last_real_usage = last_usage - return RealLifetime(definition_pos, last_usage, last_real_usage) + real_usages=UNDEF_POS): + if real_usages == UNDEF_POS: + real_usages = last_usage + lifetime = RealLifetime(definition_pos, last_usage) + if isinstance(real_usages, int): + real_usages = [real_usages] + lifetime.real_usages = real_usages + return lifetime def boxes_and_longevity(num): @@ -94,6 +98,16 @@ def regalloc_mov(self, from_loc, to_loc): self.moves.append((from_loc, to_loc)) + +def test_lifetime_next_real_usage(): + lt = RealLifetime(0, 1000) + lt.real_usages = [0, 1, 5, 10, 24, 35, 55, 56, 57, 90, 92, 100] + for i in range(100): + next = lt.next_real_usage(i) + assert next in lt.real_usages + assert next > i + assert lt.real_usages[lt.real_usages.index(next) - 1] <= i + class TestRegalloc(object): def test_freeing_vars(self): b0, b1, b2 = newboxes(0, 0, 0) @@ -382,7 +396,7 @@ xrm.loc(f0) rm.loc(b0) assert fm.get_frame_depth() == 3 - + def test_spilling(self): b0, b1, b2, b3, b4, b5 = newboxes(0, 1, 2, 3, 4, 5) longevity = {b0: Lifetime(0, 3), b1: Lifetime(0, 3), @@ -403,6 +417,27 @@ assert spilled2 is loc rm._check_invariants() + def test_spilling_furthest_next_real_use(self): + b0, b1, b2, b3, b4, b5 = newboxes(0, 1, 2, 3, 4, 5) + longevity = {b0: Lifetime(0, 3, [1, 2, 3]), b1: Lifetime(0, 3, [3]), + b3: Lifetime(0, 4, [1, 2, 3, 4]), b2: Lifetime(0, 2), + b4: Lifetime(1, 4), b5: Lifetime(1, 3)} + fm = TFrameManager() + asm = MockAsm() + rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) + rm.next_instruction() + for b in b0, b1, b2, b3: + rm.force_allocate_reg(b) + assert len(rm.free_regs) == 0 + rm.next_instruction() + loc = rm.loc(b1) + spilled = rm.force_allocate_reg(b4) + assert spilled is loc + spilled2 = rm.force_allocate_reg(b5) + assert spilled2 is loc + rm._check_invariants() + + def test_spill_useless_vars_first(self): b0, b1, b2, b3, b4, b5 = newboxes(0, 1, 2, 3, 4, 5) longevity = {b0: Lifetime(0, 5), b1: Lifetime(0, 10), diff --git a/rpython/jit/backend/llsupport/test/test_regalloc_integration.py b/rpython/jit/backend/llsupport/test/test_regalloc_integration.py --- a/rpython/jit/backend/llsupport/test/test_regalloc_integration.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc_integration.py @@ -409,28 +409,35 @@ def test_longevity(self): ops = """ - [i0, i1, i2, i3, i4] - i5 = int_add(i0, i1) - i6 = int_is_true(i5) - guard_true(i6) [i0, i4] - jump(i5, i1, i2, i3, i5) + [i0, i1, i2, i3, i4, i10] + i5 = int_add(i0, i1) # 0 + i8 = int_add(i0, i1) # 1 unused result, so not in real_usages + i6 = int_is_true(i5) # 2 + i11 = int_add(i5, i10) # 3 + guard_true(i6) [i0, i4] # 4 + jump(i5, i1, i2, i3, i5, i11) # 5 """ regalloc = self.prepare_loop(ops) - i0, i1, i2, i3, i4 = self.loop.inputargs + i0, i1, i2, i3, i4, i10 = self.loop.inputargs i5 = self.loop.operations[0] + i6 = self.loop.operations[2] longevity = regalloc.longevity - longevity[i0].last_usage == 2 - longevity[i0].last_real_usage == 0 - longevity[i1].last_usage == 3 - longevity[i1].last_real_usage == 0 - longevity[i2].last_usage == 3 - longevity[i2].last_real_usage == 0 - longevity[i3].last_usage == 3 - longevity[i3].last_real_usage == 0 - longevity[i4].last_usage == 3 - longevity[i4].last_real_usage == -1 - longevity[i5].last_usage == 3 - longevity[i5].last_real_usage == 2 + assert longevity[i0].last_usage == 4 + assert longevity[i0].real_usages == [0] + assert longevity[i1].last_usage == 5 + assert longevity[i1].real_usages == [0] + assert longevity[i2].last_usage == 5 + assert longevity[i2].real_usages is None + assert longevity[i3].last_usage == 5 + assert longevity[i3].real_usages is None + assert longevity[i4].last_usage == 4 + assert longevity[i4].real_usages is None + assert longevity[i5].last_usage == 5 + assert longevity[i5].real_usages == [2, 3] + assert longevity[i6].last_usage == 4 + assert longevity[i6].real_usages == [4] + assert longevity[i10].last_usage == 3 + assert longevity[i10].real_usages == [3] class TestRegallocCompOps(BaseTestRegalloc): From pypy.commits at gmail.com Tue Aug 22 11:05:47 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Aug 2017 08:05:47 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: - implement __contains__ support in rpython Message-ID: <599c484b.ce8bdf0a.71ef2.790c@mx.google.com> Author: Carl Friedrich Bolz Branch: regalloc-playground Changeset: r92203:d8e2a043e8eb Date: 2017-08-22 12:43 +0200 http://bitbucket.org/pypy/pypy/changeset/d8e2a043e8eb/ Log: - implement __contains__ support in rpython - wrap a class around the Lifetime dict diff --git a/rpython/annotator/binaryop.py b/rpython/annotator/binaryop.py --- a/rpython/annotator/binaryop.py +++ b/rpython/annotator/binaryop.py @@ -729,6 +729,10 @@ return [get_setitem, op.simple_call(get_setitem.result, v_idx, v_value)] + at op.contains.register_transform(SomeInstance) +def contains_SomeInstance(annotator, v_ins, v_idx): + get_contains = op.getattr(v_ins, const('__contains__')) + return [get_contains, op.simple_call(get_contains.result, v_idx)] class __extend__(pairtype(SomeIterator, SomeIterator)): diff --git a/rpython/annotator/test/test_annrpython.py b/rpython/annotator/test/test_annrpython.py --- a/rpython/annotator/test/test_annrpython.py +++ b/rpython/annotator/test/test_annrpython.py @@ -4077,6 +4077,20 @@ assert len(a.translator.graphs) == 2 # fn, __setitem__ assert isinstance(s, annmodel.SomeInteger) + def test_instance_contains(self): + class A(object): + def __contains__(self, i): + return i & 1 == 0 + + def fn(i): + a = A() + return 0 in a and 1 not in a + + a = self.RPythonAnnotator() + s = a.build_types(fn, [int]) + assert len(a.translator.graphs) == 2 # fn, __contains__ + assert isinstance(s, annmodel.SomeBool) + def test_instance_getslice(self): class A(object): def __getslice__(self, stop, start): diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -856,6 +856,22 @@ def __repr__(self): return "%s:%s(%s)" % (self.definition_pos, self.real_usages, self.last_usage) +class LifetimeManager(object): + def __init__(self, longevity): + self.longevity = longevity + + def register_hint(self, opindex, var, register): + raise NotImplementedError + + def __contains__(self, var): + return var in self.longevity + + def __getitem__(self, var): + return self.longevity[var] + + def __setitem__(self, var, val): + self.longevity[var] = val + def compute_vars_longevity(inputargs, operations): # compute a dictionary that maps variables to Lifetime information # if a variable is not in the dictionary, it's operation is dead because @@ -912,7 +928,7 @@ if not we_are_translated(): lifetime._check_invariants() - return longevity + return LifetimeManager(longevity) def is_comparison_or_ovf_op(opnum): return rop.is_comparison(opnum) or rop.is_ovf(opnum) From pypy.commits at gmail.com Tue Aug 22 11:05:45 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Aug 2017 08:05:45 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: a kludgy and lengthy explicit way to test the register allocator with a fake Message-ID: <599c4849.24addf0a.bcb4.b628@mx.google.com> Author: Carl Friedrich Bolz Branch: regalloc-playground Changeset: r92202:7ffc9b6f6e75 Date: 2017-08-22 12:14 +0200 http://bitbucket.org/pypy/pypy/changeset/7ffc9b6f6e75/ Log: a kludgy and lengthy explicit way to test the register allocator with a fake set or registers diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -1,4 +1,3 @@ -import os from rpython.jit.metainterp.history import Const, REF, JitCellToken from rpython.rlib.objectmodel import we_are_translated, specialize from rpython.jit.metainterp.resoperation import rop, AbstractValue diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -1,10 +1,17 @@ import py from rpython.jit.metainterp.history import ConstInt, INT, FLOAT +from rpython.jit.metainterp.history import BasicFailDescr, TargetToken +from rpython.jit.metainterp.resoperation import rop +from rpython.jit.metainterp.resoperation import InputArgInt, InputArgRef,\ + InputArgFloat +from rpython.jit.backend.detect_cpu import getcpuclass from rpython.jit.backend.llsupport.regalloc import FrameManager, LinkedList from rpython.jit.backend.llsupport.regalloc import RegisterManager as BaseRegMan,\ - Lifetime as RealLifetime, UNDEF_POS -from rpython.jit.metainterp.resoperation import InputArgInt, InputArgRef,\ - InputArgFloat + Lifetime as RealLifetime, UNDEF_POS, BaseRegalloc, compute_vars_longevity +from rpython.jit.tool.oparser import parse +from rpython.jit.codewriter.effectinfo import EffectInfo +from rpython.rtyper.lltypesystem import lltype +from rpython.rtyper.annlowlevel import llhelper def newboxes(*values): return [InputArgInt(v) for v in values] @@ -49,6 +56,7 @@ class FakeFramePos(object): def __init__(self, pos, box_type): self.pos = pos + self.value = pos self.box_type = box_type def __repr__(self): return 'FramePos<%d,%s>' % (self.pos, self.box_type) @@ -78,9 +86,15 @@ assert isinstance(loc, FakeFramePos) return loc.pos +class FakeCPU(object): + def get_baseofs_of_frame_field(self): + return 0 + class MockAsm(object): def __init__(self): self.moves = [] + self.emitted = [] + self.cpu = FakeCPU() # XXX register allocation statistics to be removed later self.num_moves_calls = 0 @@ -97,6 +111,7 @@ def regalloc_mov(self, from_loc, to_loc): self.moves.append((from_loc, to_loc)) + self.emitted.append(("move", to_loc, from_loc)) def test_lifetime_next_real_usage(): @@ -649,3 +664,171 @@ assert fm.get_loc_index(floc) == 0 for box in fm.bindings.keys(): fm.mark_as_free(box) + +# _____________________________________________________ +# tests that assign registers in a mocked way for a fake CPU + +r4, r5, r6, r7, r8, r9 = [FakeReg(i) for i in range(4, 10)] + +class RegisterManager2(BaseRegMan): + all_regs = [r0, r1, r2, r3, r4, r5, r6, r7] + + save_around_call_regs = [r0, r1, r2, r3] + + frame_reg = r8 + + # calling conventions: r0 is result + # r1 r2 r3 are arguments and callee-saved registers + # r4 r5 r6 r7 are caller-saved registers + + def convert_to_imm(self, v): + return v.value + + +class FakeRegalloc(BaseRegalloc): + def __init__(self): + self.assembler = MockAsm() + + def prepare_loop(self, inputargs, operations, looptoken, allgcrefs): + operations = self._prepare(inputargs, operations, allgcrefs) + self.operations = operations + self._set_initial_bindings(inputargs, looptoken) + # note: we need to make a copy of inputargs because possibly_free_vars + # is also used on op args, which is a non-resizable list + self.possibly_free_vars(list(inputargs)) + return operations + + def _prepare(self, inputargs, operations, allgcrefs): + self.fm = TFrameManager() + # compute longevity of variables + longevity = compute_vars_longevity(inputargs, operations) + self.longevity = longevity + self.rm = RegisterManager2( + longevity, assembler=self.assembler, frame_manager=self.fm) + return operations + + def possibly_free_var(self, var): + self.rm.possibly_free_var(var) + + def possibly_free_vars(self, vars): + for var in vars: + if var is not None: # xxx kludgy + self.possibly_free_var(var) + + def loc(self, x): + return self.rm.loc(x) + + def force_allocate_reg_or_cc(self, var): + assert var.type == INT + if self.next_op_can_accept_cc(self.operations, self.rm.position): + # hack: return the ebp location to mean "lives in CC". This + # ebp will not actually be used, and the location will be freed + # after the next op as usual. + self.rm.force_allocate_frame_reg(var) + return r8 + else: + # else, return a regular register (not ebp). + return self.rm.force_allocate_reg(var, need_lower_byte=True) + + def fake_allocate(self, loop): + emit = self.assembler.emitted.append + for i, op in enumerate(loop.operations): + self.rm.position = i + if rop.is_comparison(op.getopnum()): + locs = [self.loc(x) for x in op.getarglist()] + loc = self.force_allocate_reg_or_cc(op) + emit((op.getopname(), loc, locs)) + elif op.getopname().startswith("int_"): + locs = [self.loc(x) for x in op.getarglist()] + loc = self.rm.force_result_in_reg( + op, op.getarg(0), op.getarglist()) + emit((op.getopname(), loc, locs[1:])) + elif op.is_guard(): + emit((op.getopname(), self.loc(op.getarg(0)))) + else: + locs = [self.loc(x) for x in op.getarglist()] + if op.type != "v": + loc = self.rm.force_allocate_reg(op) + emit((op.getopname(), loc, locs)) + else: + emit((op.getopname(), locs)) + return self.assembler.emitted + +CPU = getcpuclass() +class TestFullRegallocFakeCPU(object): + # XXX copy-paste from test_regalloc_integration + cpu = CPU(None, None) + cpu.setup_once() + + targettoken = TargetToken() + targettoken2 = TargetToken() + fdescr1 = BasicFailDescr(1) + fdescr2 = BasicFailDescr(2) + fdescr3 = BasicFailDescr(3) + + def setup_method(self, meth): + self.targettoken._ll_loop_code = 0 + self.targettoken2._ll_loop_code = 0 + + def f1(x): + return x+1 + + def f2(x, y): + return x*y + + def f10(*args): + assert len(args) == 10 + return sum(args) + + F1PTR = lltype.Ptr(lltype.FuncType([lltype.Signed], lltype.Signed)) + F2PTR = lltype.Ptr(lltype.FuncType([lltype.Signed]*2, lltype.Signed)) + F10PTR = lltype.Ptr(lltype.FuncType([lltype.Signed]*10, lltype.Signed)) + f1ptr = llhelper(F1PTR, f1) + f2ptr = llhelper(F2PTR, f2) + f10ptr = llhelper(F10PTR, f10) + + f1_calldescr = cpu.calldescrof(F1PTR.TO, F1PTR.TO.ARGS, F1PTR.TO.RESULT, + EffectInfo.MOST_GENERAL) + f2_calldescr = cpu.calldescrof(F2PTR.TO, F2PTR.TO.ARGS, F2PTR.TO.RESULT, + EffectInfo.MOST_GENERAL) + f10_calldescr = cpu.calldescrof(F10PTR.TO, F10PTR.TO.ARGS, F10PTR.TO.RESULT, + EffectInfo.MOST_GENERAL) + + namespace = locals().copy() + + def parse(self, s, boxkinds=None, namespace=None): + return parse(s, self.cpu, namespace or self.namespace, + boxkinds=boxkinds) + + def allocate(self, s): + loop = self.parse(s) + self.loop = loop + regalloc = FakeRegalloc() + regalloc.prepare_loop(loop.inputargs, loop.operations, + loop.original_jitcell_token, []) + return regalloc.fake_allocate(loop) + + def _consider_binop(self, op): + loc, argloc = self._consider_binop_part(op) + self.perform(op, [loc, argloc], loc) + + + def test_simple(self): + ops = ''' + [i0] + label(i0, descr=targettoken) + i1 = int_add(i0, 1) + i2 = int_lt(i1, 20) + guard_true(i2) [i1] + jump(i1, descr=targettoken) + ''' + emitted = self.allocate(ops) + fp0 = FakeFramePos(0, INT) + assert emitted == [ + ("label", [fp0]), + ("move", r0, fp0), + ("int_add", r0, [1]), + ("int_lt", r8, [r0, 20]), + ("guard_true", r8), + ("jump", [r0]), + ] From pypy.commits at gmail.com Tue Aug 22 11:05:51 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Aug 2017 08:05:51 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: some fundamental data structures for supporting putting boxes into fixed Message-ID: <599c484f.52d31c0a.72835.b46b@mx.google.com> Author: Carl Friedrich Bolz Branch: regalloc-playground Changeset: r92205:f6baf7f14279 Date: 2017-08-22 15:01 +0200 http://bitbucket.org/pypy/pypy/changeset/f6baf7f14279/ Log: some fundamental data structures for supporting putting boxes into fixed registers diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -1,3 +1,4 @@ +import sys from rpython.jit.metainterp.history import Const, REF, JitCellToken from rpython.rlib.objectmodel import we_are_translated, specialize from rpython.jit.metainterp.resoperation import rop, AbstractValue @@ -827,6 +828,10 @@ # arguments or in failargs) self.real_usages = None + # fixed registers are positions where the variable *needs* to be in a + # specific register + self.fixed_positions = None + def is_last_real_use_before(self, position): if self.real_usages is None: return True @@ -846,6 +851,14 @@ low = mid + 1 return l[low] + def fixed_register(self, position, reg): + assert self.definition_pos <= position <= self.last_usage + if self.fixed_positions is None: + self.fixed_positions = [] + else: + assert position > self.fixed_positions[-1][0] + self.fixed_positions.append((position, reg)) + def _check_invariants(self): assert self.definition_pos <= self.last_usage if self.real_usages is not None: @@ -856,12 +869,65 @@ def __repr__(self): return "%s:%s(%s)" % (self.definition_pos, self.real_usages, self.last_usage) + +class FixedRegisterPositions(object): + def __init__(self, register): + self.register = register + + self.index_lifetimes = [] + + def fixed_register(self, opindex, varlifetime): + if self.index_lifetimes: + assert opindex > self.index_lifetimes[-1][0] + self.index_lifetimes.append((opindex, varlifetime)) + + def compute_free_until_pos(self, opindex): + for (index, varlifetime) in self.index_lifetimes: + if opindex <= index: + if varlifetime.definition_pos >= opindex: + return varlifetime.definition_pos + else: + # the variable didn't make it into the register despite + # being defined already. so we don't care too much, and can + # say that the variable is free until index + return index + return sys.maxint + class LifetimeManager(object): def __init__(self, longevity): self.longevity = longevity - def register_hint(self, opindex, var, register): - raise NotImplementedError + # dictionary maps register to FixedRegisterPositions + self.fixed_register_use = {} + + def fixed_register(self, opindex, register, var=None): + """ Tell the LifetimeManager that variable var *must* be in register at + operation opindex. var can be None, if no variable at all can be in + that register at the point.""" + varlifetime = self.longevity[var] + if register not in self.fixed_register_use: + self.fixed_register_use[register] = FixedRegisterPositions(register) + self.fixed_register_use[register].fixed_register(opindex, varlifetime) + varlifetime.fixed_register(opindex, register) + + def compute_longest_free_reg(self, position, free_regs): + """ for every register in free_regs, compute how far into the + future that register can remain free, according to the constraints of + the fixed registers. Find the register that is free the longest. Return a tuple + (reg, free_until_pos). """ + free_until_pos = {} + max_free_pos = -1 + best_reg = None + for reg in free_regs: + fixed_reg_pos = self.fixed_register_use.get(reg, None) + if fixed_reg_pos is None: + return reg, sys.maxint + else: + free_until_pos = fixed_reg_pos.compute_free_until_pos(position) + if free_until_pos > max_free_pos: + best_reg = reg + max_free_pos = free_until_pos + return best_reg, max_free_pos def __contains__(self, var): return var in self.longevity diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -1,4 +1,5 @@ import py +import sys from rpython.jit.metainterp.history import ConstInt, INT, FLOAT from rpython.jit.metainterp.history import BasicFailDescr, TargetToken from rpython.jit.metainterp.resoperation import rop @@ -7,7 +8,8 @@ from rpython.jit.backend.detect_cpu import getcpuclass from rpython.jit.backend.llsupport.regalloc import FrameManager, LinkedList from rpython.jit.backend.llsupport.regalloc import RegisterManager as BaseRegMan,\ - Lifetime as RealLifetime, UNDEF_POS, BaseRegalloc, compute_vars_longevity + Lifetime as RealLifetime, UNDEF_POS, BaseRegalloc, compute_vars_longevity,\ + LifetimeManager from rpython.jit.tool.oparser import parse from rpython.jit.codewriter.effectinfo import EffectInfo from rpython.rtyper.lltypesystem import lltype @@ -123,6 +125,73 @@ assert next > i assert lt.real_usages[lt.real_usages.index(next) - 1] <= i +def test_fixed_position(): + b0, b1, b2 = newboxes(0, 0, 0) + l0 = Lifetime(0, 5) + l1 = Lifetime(2, 9) + l2 = Lifetime(0, 9) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2}) + longevity.fixed_register(1, r0, b0) + longevity.fixed_register(4, r2, b0) + longevity.fixed_register(5, r1, b1) + longevity.fixed_register(8, r1, b1) + + assert l0.fixed_positions == [(1, r0), (4, r2)] + assert l1.fixed_positions == [(5, r1), (8, r1)] + assert l2.fixed_positions is None + + fpr0 = longevity.fixed_register_use[r0] + fpr1 = longevity.fixed_register_use[r1] + fpr2 = longevity.fixed_register_use[r2] + assert r3 not in longevity.fixed_register_use + assert fpr0.index_lifetimes == [(1, l0)] + assert fpr1.index_lifetimes == [(5, l1), (8, l1)] + assert fpr2.index_lifetimes == [(4, l0)] + + +def test_compute_free_until_pos(): + b0, b1, b2 = newboxes(0, 0, 0) + l0 = Lifetime(0, 5) + l1 = Lifetime(2, 9) + l2 = Lifetime(30, 40) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2}) + longevity.fixed_register(1, r0, b0) + longevity.fixed_register(4, r2, b0) + longevity.fixed_register(5, r1, b1) + longevity.fixed_register(8, r1, b1) + longevity.fixed_register(35, r1, b2) + + fpr1 = longevity.fixed_register_use[r1] + + # simple cases: we are before the beginning of the lifetime of the variable + # in the fixed register, then it's free until the definition of the + # variable + assert fpr1.compute_free_until_pos(0) == 2 + assert fpr1.compute_free_until_pos(1) == 2 + assert fpr1.compute_free_until_pos(2) == 2 + assert fpr1.compute_free_until_pos(10) == 30 + assert fpr1.compute_free_until_pos(20) == 30 + assert fpr1.compute_free_until_pos(30) == 30 + + # after the fixed use, we are fined anyway + assert fpr1.compute_free_until_pos(36) == sys.maxint + assert fpr1.compute_free_until_pos(50) == sys.maxint + + # asking for a position *after* the definition of the variable in the fixed + # register means the variable didn't make it into the fixed register, but + # at the latest by the use point it will have to go there + assert fpr1.compute_free_until_pos(3) == 5 + assert fpr1.compute_free_until_pos(4) == 5 + assert fpr1.compute_free_until_pos(5) == 5 + assert fpr1.compute_free_until_pos(6) == 8 + assert fpr1.compute_free_until_pos(7) == 8 + assert fpr1.compute_free_until_pos(8) == 8 + assert fpr1.compute_free_until_pos(31) == 35 + assert fpr1.compute_free_until_pos(32) == 35 + assert fpr1.compute_free_until_pos(33) == 35 + assert fpr1.compute_free_until_pos(34) == 35 + assert fpr1.compute_free_until_pos(35) == 35 + class TestRegalloc(object): def test_freeing_vars(self): b0, b1, b2 = newboxes(0, 0, 0) @@ -223,7 +292,7 @@ assert isinstance(loc, FakeReg) assert loc not in [r2, r3] rm._check_invariants() - + def test_make_sure_var_in_reg(self): boxes, longevity = boxes_and_longevity(5) fm = TFrameManager() @@ -237,7 +306,7 @@ loc = rm.make_sure_var_in_reg(b0) assert isinstance(loc, FakeReg) rm._check_invariants() - + def test_force_result_in_reg_1(self): b0, b1 = newboxes(0, 0) longevity = {b0: Lifetime(0, 1), b1: Lifetime(1, 3)} From pypy.commits at gmail.com Tue Aug 22 11:05:53 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Aug 2017 08:05:53 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: support not specifying a variable (eg for caller-saved regs) Message-ID: <599c4851.4c0e1c0a.f723c.7e2a@mx.google.com> Author: Carl Friedrich Bolz Branch: regalloc-playground Changeset: r92206:85eb224c8d6c Date: 2017-08-22 15:06 +0200 http://bitbucket.org/pypy/pypy/changeset/85eb224c8d6c/ Log: support not specifying a variable (eg for caller-saved regs) diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -884,12 +884,13 @@ def compute_free_until_pos(self, opindex): for (index, varlifetime) in self.index_lifetimes: if opindex <= index: - if varlifetime.definition_pos >= opindex: + if varlifetime is not None and varlifetime.definition_pos >= opindex: return varlifetime.definition_pos else: - # the variable didn't make it into the register despite - # being defined already. so we don't care too much, and can - # say that the variable is free until index + # the variable doesn't exist or didn't make it into the + # register despite being defined already. so we don't care + # too much, and can say that the variable is free until + # index return index return sys.maxint @@ -904,11 +905,14 @@ """ Tell the LifetimeManager that variable var *must* be in register at operation opindex. var can be None, if no variable at all can be in that register at the point.""" - varlifetime = self.longevity[var] + if var is None: + varlifetime = None + else: + varlifetime = self.longevity[var] + varlifetime.fixed_register(opindex, register) if register not in self.fixed_register_use: self.fixed_register_use[register] = FixedRegisterPositions(register) self.fixed_register_use[register].fixed_register(opindex, varlifetime) - varlifetime.fixed_register(opindex, register) def compute_longest_free_reg(self, position, free_regs): """ for every register in free_regs, compute how far into the diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -148,6 +148,46 @@ assert fpr1.index_lifetimes == [(5, l1), (8, l1)] assert fpr2.index_lifetimes == [(4, l0)] +def test_fixed_position_none(): + b0, b1, b2 = newboxes(0, 0, 0) + l0 = Lifetime(0, 5) + l1 = Lifetime(2, 9) + l2 = Lifetime(0, 9) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2}) + longevity.fixed_register(1, r0) + longevity.fixed_register(4, r2) + longevity.fixed_register(5, r1) + longevity.fixed_register(8, r1) + + fpr0 = longevity.fixed_register_use[r0] + fpr1 = longevity.fixed_register_use[r1] + fpr2 = longevity.fixed_register_use[r2] + assert r3 not in longevity.fixed_register_use + assert fpr0.index_lifetimes == [(1, None)] + assert fpr1.index_lifetimes == [(5, None), (8, None)] + assert fpr2.index_lifetimes == [(4, None)] + + +def test_compute_free_until_pos_none(): + longevity = LifetimeManager({}) + longevity.fixed_register(1, r0, None) + longevity.fixed_register(4, r2, None) + longevity.fixed_register(5, r1, None) + longevity.fixed_register(8, r1, None) + longevity.fixed_register(35, r1, None) + + fpr1 = longevity.fixed_register_use[r1] + + assert fpr1.compute_free_until_pos(0) == 5 + assert fpr1.compute_free_until_pos(1) == 5 + assert fpr1.compute_free_until_pos(2) == 5 + assert fpr1.compute_free_until_pos(3) == 5 + assert fpr1.compute_free_until_pos(4) == 5 + assert fpr1.compute_free_until_pos(5) == 5 + assert fpr1.compute_free_until_pos(10) == 35 + assert fpr1.compute_free_until_pos(20) == 35 + assert fpr1.compute_free_until_pos(30) == 35 + assert fpr1.compute_free_until_pos(36) == sys.maxint def test_compute_free_until_pos(): b0, b1, b2 = newboxes(0, 0, 0) From pypy.commits at gmail.com Tue Aug 22 11:05:49 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Aug 2017 08:05:49 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: refactoring Message-ID: <599c484d.8b841c0a.91b1c.4f31@mx.google.com> Author: Carl Friedrich Bolz Branch: regalloc-playground Changeset: r92204:b4a1f3830c25 Date: 2017-08-22 13:55 +0200 http://bitbucket.org/pypy/pypy/changeset/b4a1f3830c25/ Log: refactoring diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -734,24 +734,31 @@ emit = self.assembler.emitted.append for i, op in enumerate(loop.operations): self.rm.position = i - if rop.is_comparison(op.getopnum()): + opnum = op.getopnum() + opname = op.getopname() + if rop.is_comparison(opnum): locs = [self.loc(x) for x in op.getarglist()] loc = self.force_allocate_reg_or_cc(op) - emit((op.getopname(), loc, locs)) - elif op.getopname().startswith("int_"): + emit((opname, loc, locs)) + elif opname.startswith("int_"): locs = [self.loc(x) for x in op.getarglist()] loc = self.rm.force_result_in_reg( op, op.getarg(0), op.getarglist()) - emit((op.getopname(), loc, locs[1:])) + emit((opname, loc, locs[1:])) elif op.is_guard(): - emit((op.getopname(), self.loc(op.getarg(0)))) + emit((opname, self.loc(op.getarg(0)))) + elif opname == "label": + descr = op.getdescr() + locs = [self.loc(x) for x in op.getarglist()] + emit((opname, locs)) + descr._fake_arglocs = locs else: locs = [self.loc(x) for x in op.getarglist()] if op.type != "v": loc = self.rm.force_allocate_reg(op) - emit((op.getopname(), loc, locs)) + emit((opname, loc, locs)) else: - emit((op.getopname(), locs)) + emit((opname, locs)) return self.assembler.emitted CPU = getcpuclass() From pypy.commits at gmail.com Tue Aug 22 11:05:55 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Aug 2017 08:05:55 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: simplify Message-ID: <599c4853.43491c0a.aea41.a068@mx.google.com> Author: Carl Friedrich Bolz Branch: regalloc-playground Changeset: r92207:50dd22a26d67 Date: 2017-08-22 15:10 +0200 http://bitbucket.org/pypy/pypy/changeset/50dd22a26d67/ Log: simplify diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -881,7 +881,7 @@ assert opindex > self.index_lifetimes[-1][0] self.index_lifetimes.append((opindex, varlifetime)) - def compute_free_until_pos(self, opindex): + def free_until_pos(self, opindex): for (index, varlifetime) in self.index_lifetimes: if opindex <= index: if varlifetime is not None and varlifetime.definition_pos >= opindex: @@ -914,7 +914,7 @@ self.fixed_register_use[register] = FixedRegisterPositions(register) self.fixed_register_use[register].fixed_register(opindex, varlifetime) - def compute_longest_free_reg(self, position, free_regs): + def longest_free_reg(self, position, free_regs): """ for every register in free_regs, compute how far into the future that register can remain free, according to the constraints of the fixed registers. Find the register that is free the longest. Return a tuple @@ -927,7 +927,7 @@ if fixed_reg_pos is None: return reg, sys.maxint else: - free_until_pos = fixed_reg_pos.compute_free_until_pos(position) + free_until_pos = fixed_reg_pos.free_until_pos(position) if free_until_pos > max_free_pos: best_reg = reg max_free_pos = free_until_pos diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -168,35 +168,31 @@ assert fpr2.index_lifetimes == [(4, None)] -def test_compute_free_until_pos_none(): +def test_free_until_pos_none(): longevity = LifetimeManager({}) - longevity.fixed_register(1, r0, None) - longevity.fixed_register(4, r2, None) longevity.fixed_register(5, r1, None) longevity.fixed_register(8, r1, None) longevity.fixed_register(35, r1, None) fpr1 = longevity.fixed_register_use[r1] - assert fpr1.compute_free_until_pos(0) == 5 - assert fpr1.compute_free_until_pos(1) == 5 - assert fpr1.compute_free_until_pos(2) == 5 - assert fpr1.compute_free_until_pos(3) == 5 - assert fpr1.compute_free_until_pos(4) == 5 - assert fpr1.compute_free_until_pos(5) == 5 - assert fpr1.compute_free_until_pos(10) == 35 - assert fpr1.compute_free_until_pos(20) == 35 - assert fpr1.compute_free_until_pos(30) == 35 - assert fpr1.compute_free_until_pos(36) == sys.maxint + assert fpr1.free_until_pos(0) == 5 + assert fpr1.free_until_pos(1) == 5 + assert fpr1.free_until_pos(2) == 5 + assert fpr1.free_until_pos(3) == 5 + assert fpr1.free_until_pos(4) == 5 + assert fpr1.free_until_pos(5) == 5 + assert fpr1.free_until_pos(10) == 35 + assert fpr1.free_until_pos(20) == 35 + assert fpr1.free_until_pos(30) == 35 + assert fpr1.free_until_pos(36) == sys.maxint -def test_compute_free_until_pos(): +def test_free_until_pos(): b0, b1, b2 = newboxes(0, 0, 0) l0 = Lifetime(0, 5) l1 = Lifetime(2, 9) l2 = Lifetime(30, 40) longevity = LifetimeManager({b0: l0, b1: l1, b2: l2}) - longevity.fixed_register(1, r0, b0) - longevity.fixed_register(4, r2, b0) longevity.fixed_register(5, r1, b1) longevity.fixed_register(8, r1, b1) longevity.fixed_register(35, r1, b2) @@ -206,31 +202,32 @@ # simple cases: we are before the beginning of the lifetime of the variable # in the fixed register, then it's free until the definition of the # variable - assert fpr1.compute_free_until_pos(0) == 2 - assert fpr1.compute_free_until_pos(1) == 2 - assert fpr1.compute_free_until_pos(2) == 2 - assert fpr1.compute_free_until_pos(10) == 30 - assert fpr1.compute_free_until_pos(20) == 30 - assert fpr1.compute_free_until_pos(30) == 30 + assert fpr1.free_until_pos(0) == 2 + assert fpr1.free_until_pos(1) == 2 + assert fpr1.free_until_pos(2) == 2 + assert fpr1.free_until_pos(10) == 30 + assert fpr1.free_until_pos(20) == 30 + assert fpr1.free_until_pos(30) == 30 # after the fixed use, we are fined anyway - assert fpr1.compute_free_until_pos(36) == sys.maxint - assert fpr1.compute_free_until_pos(50) == sys.maxint + assert fpr1.free_until_pos(36) == sys.maxint + assert fpr1.free_until_pos(50) == sys.maxint # asking for a position *after* the definition of the variable in the fixed # register means the variable didn't make it into the fixed register, but # at the latest by the use point it will have to go there - assert fpr1.compute_free_until_pos(3) == 5 - assert fpr1.compute_free_until_pos(4) == 5 - assert fpr1.compute_free_until_pos(5) == 5 - assert fpr1.compute_free_until_pos(6) == 8 - assert fpr1.compute_free_until_pos(7) == 8 - assert fpr1.compute_free_until_pos(8) == 8 - assert fpr1.compute_free_until_pos(31) == 35 - assert fpr1.compute_free_until_pos(32) == 35 - assert fpr1.compute_free_until_pos(33) == 35 - assert fpr1.compute_free_until_pos(34) == 35 - assert fpr1.compute_free_until_pos(35) == 35 + assert fpr1.free_until_pos(3) == 5 + assert fpr1.free_until_pos(4) == 5 + assert fpr1.free_until_pos(5) == 5 + assert fpr1.free_until_pos(6) == 8 + assert fpr1.free_until_pos(7) == 8 + assert fpr1.free_until_pos(8) == 8 + assert fpr1.free_until_pos(31) == 35 + assert fpr1.free_until_pos(32) == 35 + assert fpr1.free_until_pos(33) == 35 + assert fpr1.free_until_pos(34) == 35 + assert fpr1.free_until_pos(35) == 35 + class TestRegalloc(object): def test_freeing_vars(self): From pypy.commits at gmail.com Tue Aug 22 11:05:57 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Aug 2017 08:05:57 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: a special case for repeated uses of the same fixed register (with different vars). a test for longest_free_reg Message-ID: <599c4855.4981df0a.601c0.a29a@mx.google.com> Author: Carl Friedrich Bolz Branch: regalloc-playground Changeset: r92208:ffb5755ee817 Date: 2017-08-22 15:41 +0200 http://bitbucket.org/pypy/pypy/changeset/ffb5755ee817/ Log: a special case for repeated uses of the same fixed register (with different vars). a test for longest_free_reg diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -852,12 +852,18 @@ return l[low] def fixed_register(self, position, reg): + """ registers a fixed register use for the variable at position in + register reg. returns the position from where on the register should be + held free. """ assert self.definition_pos <= position <= self.last_usage if self.fixed_positions is None: self.fixed_positions = [] + res = self.definition_pos else: assert position > self.fixed_positions[-1][0] + res = self.fixed_positions[-1][0] self.fixed_positions.append((position, reg)) + return res def _check_invariants(self): assert self.definition_pos <= self.last_usage @@ -876,16 +882,17 @@ self.index_lifetimes = [] - def fixed_register(self, opindex, varlifetime): + def fixed_register(self, opindex, definition_pos): if self.index_lifetimes: assert opindex > self.index_lifetimes[-1][0] - self.index_lifetimes.append((opindex, varlifetime)) + self.index_lifetimes.append((opindex, definition_pos)) def free_until_pos(self, opindex): - for (index, varlifetime) in self.index_lifetimes: + # XXX could use binary search + for (index, definition_pos) in self.index_lifetimes: if opindex <= index: - if varlifetime is not None and varlifetime.definition_pos >= opindex: - return varlifetime.definition_pos + if definition_pos >= opindex: + return definition_pos else: # the variable doesn't exist or didn't make it into the # register despite being defined already. so we don't care @@ -894,6 +901,7 @@ return index return sys.maxint + class LifetimeManager(object): def __init__(self, longevity): self.longevity = longevity @@ -906,19 +914,19 @@ operation opindex. var can be None, if no variable at all can be in that register at the point.""" if var is None: - varlifetime = None + definition_pos = opindex else: varlifetime = self.longevity[var] - varlifetime.fixed_register(opindex, register) + definition_pos = varlifetime.fixed_register(opindex, register) if register not in self.fixed_register_use: self.fixed_register_use[register] = FixedRegisterPositions(register) - self.fixed_register_use[register].fixed_register(opindex, varlifetime) + self.fixed_register_use[register].fixed_register(opindex, definition_pos) def longest_free_reg(self, position, free_regs): - """ for every register in free_regs, compute how far into the - future that register can remain free, according to the constraints of - the fixed registers. Find the register that is free the longest. Return a tuple - (reg, free_until_pos). """ + """ for every register in free_regs, compute how far into the future + that register can remain free, according to the constraints of the + fixed registers. Find the register that is free the longest. Return a + tuple (reg, free_until_pos). """ free_until_pos = {} max_free_pos = -1 best_reg = None diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -144,9 +144,9 @@ fpr1 = longevity.fixed_register_use[r1] fpr2 = longevity.fixed_register_use[r2] assert r3 not in longevity.fixed_register_use - assert fpr0.index_lifetimes == [(1, l0)] - assert fpr1.index_lifetimes == [(5, l1), (8, l1)] - assert fpr2.index_lifetimes == [(4, l0)] + assert fpr0.index_lifetimes == [(1, 0)] + assert fpr1.index_lifetimes == [(5, 2), (8, 5)] + assert fpr2.index_lifetimes == [(4, 1)] def test_fixed_position_none(): b0, b1, b2 = newboxes(0, 0, 0) @@ -163,9 +163,9 @@ fpr1 = longevity.fixed_register_use[r1] fpr2 = longevity.fixed_register_use[r2] assert r3 not in longevity.fixed_register_use - assert fpr0.index_lifetimes == [(1, None)] - assert fpr1.index_lifetimes == [(5, None), (8, None)] - assert fpr2.index_lifetimes == [(4, None)] + assert fpr0.index_lifetimes == [(1, 1)] + assert fpr1.index_lifetimes == [(5, 5), (8, 8)] + assert fpr2.index_lifetimes == [(4, 4)] def test_free_until_pos_none(): @@ -228,6 +228,33 @@ assert fpr1.free_until_pos(34) == 35 assert fpr1.free_until_pos(35) == 35 +def test_free_until_pos_different_regs(): + b0, b1, b2 = newboxes(0, 0, 0) + l0 = Lifetime(0, 5) + l1 = Lifetime(2, 9) + l2 = Lifetime(30, 40) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2}) + longevity.fixed_register(1, r0, b0) + longevity.fixed_register(4, r2, b0) + fpr2 = longevity.fixed_register_use[r2] + # the definition of b0 is before the other fixed register use of r0, so the + # earliest b0 can be in r2 is that use point at index 1 + assert fpr2.free_until_pos(0) == 1 + + +def test_longest_free_reg(): + b0, b1, b2 = newboxes(0, 0, 0) + l0 = Lifetime(0, 5) + l1 = Lifetime(2, 9) + l2 = Lifetime(30, 40) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2}) + longevity.fixed_register(1, r0, b0) + longevity.fixed_register(4, r2, b0) + longevity.fixed_register(5, r1, b1) + longevity.fixed_register(8, r1, b1) + longevity.fixed_register(35, r1, b2) + + assert longevity.longest_free_reg(0, [r0, r1, r2]) == (r2, 2) class TestRegalloc(object): def test_freeing_vars(self): From pypy.commits at gmail.com Tue Aug 22 11:06:06 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Aug 2017 08:06:06 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: enough mocking for a simple call test (but with wrong results!) Message-ID: <599c485e.03081c0a.5d75b.0b95@mx.google.com> Author: Carl Friedrich Bolz Branch: regalloc-playground Changeset: r92209:84209398e70f Date: 2017-08-22 17:04 +0200 http://bitbucket.org/pypy/pypy/changeset/84209398e70f/ Log: enough mocking for a simple call test (but with wrong results!) diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -579,9 +579,9 @@ if v not in self.reg_bindings: # v not in a register. allocate one for result_v and move v there prev_loc = self.frame_manager.loc(v) - loc = self.force_allocate_reg(result_v, forbidden_vars) + loc = self.force_allocate_reg(v, forbidden_vars) self.assembler.regalloc_mov(prev_loc, loc) - return loc + assert v in self.reg_bindings if self.longevity[v].last_usage > self.position: # we need to find a new place for variable v and # store result in the same place diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -44,6 +44,10 @@ class FakeReg(object): def __init__(self, i): self.n = i + def _getregkey(self): + return self.n + def is_memory_reference(self): + return False def __repr__(self): return 'r%d' % self.n @@ -60,6 +64,10 @@ self.pos = pos self.value = pos self.box_type = box_type + def _getregkey(self): + return ~self.value + def is_memory_reference(self): + return True def __repr__(self): return 'FramePos<%d,%s>' % (self.pos, self.box_type) def __eq__(self, other): @@ -254,7 +262,7 @@ longevity.fixed_register(8, r1, b1) longevity.fixed_register(35, r1, b2) - assert longevity.longest_free_reg(0, [r0, r1, r2]) == (r2, 2) + assert longevity.longest_free_reg(0, [r0, r1, r2]) == (r1, 2) class TestRegalloc(object): def test_freeing_vars(self): @@ -811,12 +819,15 @@ frame_reg = r8 # calling conventions: r0 is result - # r1 r2 r3 are arguments and callee-saved registers - # r4 r5 r6 r7 are caller-saved registers + # r1 r2 r3 are arguments and caller-saved registers + # r4 r5 r6 r7 are callee-saved registers def convert_to_imm(self, v): return v.value + def call_result_location(self, v): + return r0 + class FakeRegalloc(BaseRegalloc): def __init__(self): @@ -864,6 +875,8 @@ return self.rm.force_allocate_reg(var, need_lower_byte=True) def fake_allocate(self, loop): + from rpython.jit.backend.x86.jump import remap_frame_layout + emit = self.assembler.emitted.append for i, op in enumerate(loop.operations): self.rm.position = i @@ -879,7 +892,16 @@ op, op.getarg(0), op.getarglist()) emit((opname, loc, locs[1:])) elif op.is_guard(): - emit((opname, self.loc(op.getarg(0)))) + fail_locs = [self.loc(x) for x in op.getfailargs()] + emit((opname, self.loc(op.getarg(0)), fail_locs)) + elif rop.is_call(opnum): + # calling convention! + src_locs = [self.loc(x) for x in op.getarglist()[1:]] + self.rm.before_call() + loc = self.rm.after_call(op) + dst_locs = [r1, r2, r3][:len(src_locs)] + remap_frame_layout(self.assembler, src_locs, dst_locs, r8) + emit((opname, loc, dst_locs)) elif opname == "label": descr = op.getdescr() locs = [self.loc(x) for x in op.getarglist()] @@ -946,6 +968,7 @@ regalloc = FakeRegalloc() regalloc.prepare_loop(loop.inputargs, loop.operations, loop.original_jitcell_token, []) + self.regalloc = regalloc return regalloc.fake_allocate(loop) def _consider_binop(self, op): @@ -969,6 +992,23 @@ ("move", r0, fp0), ("int_add", r0, [1]), ("int_lt", r8, [r0, 20]), - ("guard_true", r8), + ("guard_true", r8, [r0]), ("jump", [r0]), - ] + ] + + def test_call(self): + ops = ''' + [i0] + i1 = int_mul(i0, 2) + i2 = call_i(ConstClass(f1ptr), i1, descr=f1_calldescr) + guard_false(i2) [] + ''' + emitted = self.allocate(ops) + fp0 = FakeFramePos(0, INT) + assert emitted == [ + ("move", r0, fp0), + ("int_mul", r0, [2]), + ("move", r1, r0), + ("call_i", r0, [r1]), + ("guard_false", r0, []), + ] diff --git a/rpython/jit/backend/tool/viewcode.py b/rpython/jit/backend/tool/viewcode.py --- a/rpython/jit/backend/tool/viewcode.py +++ b/rpython/jit/backend/tool/viewcode.py @@ -223,7 +223,7 @@ addr = addrs[-1] final = '\tjmp' in line yield i, addr, final - if self.fallthrough and '\tret' not in line: + if self.fallthrough and '\tret' not in line and "\tjmp r11" not in line: yield len(lines), self.addr + len(self.data), True From pypy.commits at gmail.com Tue Aug 22 13:28:42 2017 From: pypy.commits at gmail.com (mattip) Date: Tue, 22 Aug 2017 10:28:42 -0700 (PDT) Subject: [pypy-commit] pypy default: cextension types should have the short names and long module for pickling Message-ID: <599c69ca.3198df0a.49111.bcb1@mx.google.com> Author: Matti Picus Branch: Changeset: r92210:0087987731d8 Date: 2017-08-22 20:27 +0300 http://bitbucket.org/pypy/pypy/changeset/0087987731d8/ Log: cextension types should have the short names and long module for pickling diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py --- a/pypy/objspace/std/typeobject.py +++ b/pypy/objspace/std/typeobject.py @@ -536,23 +536,27 @@ space = self.space if self.is_heaptype(): return self.getdictvalue(space, '__module__') + elif self.is_cpytype(): + dot = self.name.rfind('.') else: dot = self.name.find('.') - if dot >= 0: - mod = self.name[:dot] - else: - mod = "__builtin__" - return space.newtext(mod) + if dot >= 0: + mod = self.name[:dot] + else: + mod = "__builtin__" + return space.newtext(mod) def getname(self, space): if self.is_heaptype(): return self.name + elif self.is_cpytype(): + dot = self.name.rfind('.') else: dot = self.name.find('.') - if dot >= 0: - return self.name[dot+1:] - else: - return self.name + if dot >= 0: + return self.name[dot+1:] + else: + return self.name def add_subclass(self, w_subclass): space = self.space From pypy.commits at gmail.com Tue Aug 22 15:21:33 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Aug 2017 12:21:33 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: chose the fixed register if it is available Message-ID: <599c843d.08e61c0a.e6ed1.0459@mx.google.com> Author: Carl Friedrich Bolz Branch: regalloc-playground Changeset: r92211:bb3d99077725 Date: 2017-08-22 21:07 +0200 http://bitbucket.org/pypy/pypy/changeset/bb3d99077725/ Log: chose the fixed register if it is available diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -381,24 +381,27 @@ loc = self.reg_bindings.get(v, None) if loc is not None and loc not in self.no_lower_byte_regs: return loc - for i in range(len(self.free_regs) - 1, -1, -1): - reg = self.free_regs[i] - if reg not in self.no_lower_byte_regs: - if loc is not None: - self.free_regs[i] = loc - else: - del self.free_regs[i] - self.reg_bindings[v] = reg - return reg - return None + free_regs = [reg for reg in self.free_regs + if reg not in self.no_lower_byte_regs] + newloc = self.longevity.try_pick_free_reg( + self.position, v, free_regs) + if newloc is None: + return None + self.free_regs.remove(newloc) + if loc is not None: + self.free_regs.append(loc) + self.reg_bindings[v] = newloc + return newloc try: return self.reg_bindings[v] except KeyError: - # YYY here we should chose the free variable a bit more carefully - if self.free_regs: - loc = self.free_regs.pop() - self.reg_bindings[v] = loc - return loc + loc = self.longevity.try_pick_free_reg( + self.position, v, self.free_regs) + if loc is None: + return None + self.reg_bindings[v] = loc + self.free_regs.remove(loc) + return loc def _spill_var(self, v, forbidden_vars, selected_reg, need_lower_byte=False): @@ -421,6 +424,8 @@ # if that doesn't exist, spill the variable that has a real_usage that # is the furthest away from the current position + # YYY check for fixed variable usages + cur_max_use_distance = -1 position = self.position candidate = None @@ -579,9 +584,9 @@ if v not in self.reg_bindings: # v not in a register. allocate one for result_v and move v there prev_loc = self.frame_manager.loc(v) - loc = self.force_allocate_reg(v, forbidden_vars) + loc = self.force_allocate_reg(result_v, forbidden_vars) self.assembler.regalloc_mov(prev_loc, loc) - assert v in self.reg_bindings + return loc if self.longevity[v].last_usage > self.position: # we need to find a new place for variable v and # store result in the same place @@ -865,6 +870,14 @@ self.fixed_positions.append((position, reg)) return res + def find_fixed_register(self, opindex): + # XXX could use binary search + if self.fixed_positions is None: + return None + for (index, reg) in self.fixed_positions: + if opindex <= index: + return reg + def _check_invariants(self): assert self.definition_pos <= self.last_usage if self.real_usages is not None: @@ -928,9 +941,11 @@ fixed registers. Find the register that is free the longest. Return a tuple (reg, free_until_pos). """ free_until_pos = {} - max_free_pos = -1 + max_free_pos = position best_reg = None - for reg in free_regs: + # reverse for compatibility with old code + for i in range(len(free_regs) - 1, -1, -1): + reg = free_regs[i] fixed_reg_pos = self.fixed_register_use.get(reg, None) if fixed_reg_pos is None: return reg, sys.maxint @@ -941,6 +956,26 @@ max_free_pos = free_until_pos return best_reg, max_free_pos + def try_pick_free_reg(self, position, v, free_regs): + if not free_regs: + return None + longevityvar = self[v] + reg = longevityvar.find_fixed_register(position) + if reg is not None and reg in free_regs: + return reg + return free_regs[-1] + # more advanced stuff below, needs tests + + + + loc, free_until = self.longevity.longest_free_reg( + self.position, free_regs) + if loc is None: + return None + # YYY could check whether it's best to spill v here, but hard + # to do in the current system + return loc + def __contains__(self, var): return var in self.longevity diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -56,6 +56,12 @@ class RegisterManager(BaseRegMan): all_regs = regs + + def __init__(self, longevity, frame_manager=None, assembler=None): + if isinstance(longevity, dict): + longevity = LifetimeManager(longevity) + BaseRegMan.__init__(self, longevity, frame_manager, assembler) + def convert_to_imm(self, v): return v @@ -840,6 +846,7 @@ # note: we need to make a copy of inputargs because possibly_free_vars # is also used on op args, which is a non-resizable list self.possibly_free_vars(list(inputargs)) + self._add_fixed_registers() return operations def _prepare(self, inputargs, operations, allgcrefs): @@ -916,6 +923,16 @@ emit((opname, locs)) return self.assembler.emitted + def _add_fixed_registers(self): + for i, op in enumerate(self.operations): + if rop.is_call(op.getopnum()): + # calling convention! + arglist = op.getarglist()[1:] + for arg, reg in zip(arglist + [None] * (3 - len(arglist)), [r1, r2, r3]): + self.longevity.fixed_register(i, reg, arg) + self.longevity.fixed_register(i, r0, op) + + CPU = getcpuclass() class TestFullRegallocFakeCPU(object): # XXX copy-paste from test_regalloc_integration @@ -1006,9 +1023,8 @@ emitted = self.allocate(ops) fp0 = FakeFramePos(0, INT) assert emitted == [ - ("move", r0, fp0), - ("int_mul", r0, [2]), - ("move", r1, r0), + ("move", r1, fp0), + ("int_mul", r1, [2]), ("call_i", r0, [r1]), ("guard_false", r0, []), ] From pypy.commits at gmail.com Tue Aug 22 15:21:35 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Aug 2017 12:21:35 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: a variable that survives a call gets put into a callee-saved register Message-ID: <599c843f.53e81c0a.e51bb.042f@mx.google.com> Author: Carl Friedrich Bolz Branch: regalloc-playground Changeset: r92212:ce386eba1dfa Date: 2017-08-22 21:20 +0200 http://bitbucket.org/pypy/pypy/changeset/ce386eba1dfa/ Log: a variable that survives a call gets put into a callee-saved register diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -960,16 +960,13 @@ if not free_regs: return None longevityvar = self[v] + # check whether there is a fixed register and whether it's free reg = longevityvar.find_fixed_register(position) if reg is not None and reg in free_regs: return reg - return free_regs[-1] - # more advanced stuff below, needs tests - - - loc, free_until = self.longevity.longest_free_reg( - self.position, free_regs) + # pick the register that's free the longest + loc, free_until = self.longest_free_reg(position, free_regs) if loc is None: return None # YYY could check whether it's best to spill v here, but hard diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -1028,3 +1028,25 @@ ("call_i", r0, [r1]), ("guard_false", r0, []), ] + + def test_call_2(self): + ops = ''' + [i0, i1] + i2 = int_mul(i0, 2) + i3 = int_add(i1, 1) + i4 = call_i(ConstClass(f1ptr), i2, descr=f1_calldescr) + guard_false(i4) [i3] + ''' + emitted = self.allocate(ops) + fp0 = FakeFramePos(0, INT) + fp1 = FakeFramePos(1, INT) + assert emitted == [ + ("move", r1, fp0), + ("int_mul", r1, [2]), + ("move", r4, fp1), # r4 gets picked since it's callee-saved + ("int_add", r4, [1]), + ("call_i", r0, [r1]), + ("guard_false", r0, [r4]), + ] + + From pypy.commits at gmail.com Tue Aug 22 16:55:42 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Aug 2017 13:55:42 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: another heuristic: if there's a fixed register around and the current Message-ID: <599c9a4e.248fdf0a.23227.ceb4@mx.google.com> Author: Carl Friedrich Bolz Branch: regalloc-playground Changeset: r92213:e8eede93629e Date: 2017-08-22 22:54 +0200 http://bitbucket.org/pypy/pypy/changeset/e8eede93629e/ Log: another heuristic: if there's a fixed register around and the current to-be-allocated one fits before the next fixed use, use that (and use the smallest lifetime hole) diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -956,6 +956,30 @@ max_free_pos = free_until_pos return best_reg, max_free_pos + def free_reg_whole_lifetime(self, position, v, free_regs): + """ try to find a register from free_regs for v at position that's + free for the whole lifetime of v. pick the one that is blocked first + *after* the lifetime of v. """ + longevityvar = self[v] + min_fixed_use_after = sys.maxint + best_reg = None + unfixed_reg = None + for reg in free_regs: + fixed_reg_pos = self.fixed_register_use.get(reg, None) + if fixed_reg_pos is None: + unfixed_reg = reg + continue + use_after = fixed_reg_pos.free_until_pos(longevityvar.last_usage) + assert use_after >= longevityvar.last_usage + if use_after < min_fixed_use_after: + best_reg = reg + min_fixed_use_after = use_after + if best_reg is not None: + return best_reg + + # no fitting fixed registers. pick a non-fixed one + return unfixed_reg + def try_pick_free_reg(self, position, v, free_regs): if not free_regs: return None @@ -965,13 +989,19 @@ if reg is not None and reg in free_regs: return reg - # pick the register that's free the longest + # try to find a register that's free for the whole lifetime of v + # pick the one that is blocked first *after* the lifetime of v + loc = self.free_reg_whole_lifetime(position, v, free_regs) + if loc is not None: + return loc + + # can't fit v completely, so pick the register that's free the longest loc, free_until = self.longest_free_reg(position, free_regs) - if loc is None: - return None + if loc is not None: + return loc # YYY could check whether it's best to spill v here, but hard # to do in the current system - return loc + return None def __contains__(self, var): return var in self.longevity diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -52,6 +52,8 @@ return 'r%d' % self.n r0, r1, r2, r3 = [FakeReg(i) for i in range(4)] +r4, r5, r6, r7, r8, r9 = [FakeReg(i) for i in range(4, 10)] + regs = [r0, r1, r2, r3] class RegisterManager(BaseRegMan): @@ -270,6 +272,27 @@ assert longevity.longest_free_reg(0, [r0, r1, r2]) == (r1, 2) +def test_try_pick_free_reg(): + b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) + l0 = Lifetime(0, 4) + l1 = Lifetime(2, 20) + l2 = Lifetime(6, 20) + l3 = Lifetime(8, 20) + l4 = Lifetime(0, 10) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2, b3: l3, b4: l4}) + longevity.fixed_register(3, r1, b1) + longevity.fixed_register(7, r2, b2) + longevity.fixed_register(9, r3, b3) + + # a best fit + loc = longevity.try_pick_free_reg(0, b0, [r1, r2, r3, r4, r5]) + assert loc is r2 + + # does not fit into any of the fixed regs, use a non-fixed one + loc = longevity.try_pick_free_reg(0, b4, [r5, r2, r3, r4, r1]) + assert loc in [r4, r5] + + class TestRegalloc(object): def test_freeing_vars(self): b0, b1, b2 = newboxes(0, 0, 0) @@ -815,7 +838,6 @@ # _____________________________________________________ # tests that assign registers in a mocked way for a fake CPU -r4, r5, r6, r7, r8, r9 = [FakeReg(i) for i in range(4, 10)] class RegisterManager2(BaseRegMan): all_regs = [r0, r1, r2, r3, r4, r5, r6, r7] From pypy.commits at gmail.com Tue Aug 22 16:55:44 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Aug 2017 13:55:44 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: harder test for later Message-ID: <599c9a50.c6141c0a.f616d.1461@mx.google.com> Author: Carl Friedrich Bolz Branch: regalloc-playground Changeset: r92214:f6f7d81e72e9 Date: 2017-08-22 22:55 +0200 http://bitbucket.org/pypy/pypy/changeset/f6f7d81e72e9/ Log: harder test for later diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -1071,4 +1071,21 @@ ("guard_false", r0, [r4]), ] - + def test_coalescing(self): + py.test.skip("hard - later") + ops = ''' + [i0] + i2 = int_mul(i0, 2) + i3 = int_add(i2, 1) # i2 and i3 need to be coalesced + i4 = call_i(ConstClass(f1ptr), i3, descr=f1_calldescr) + guard_false(i4) [] + ''' + emitted = self.allocate(ops) + fp0 = FakeFramePos(0, INT) + assert emitted == [ + ("move", r1, fp0), + ("int_mul", r1, [2]), + ("int_add", r1, [1]), + ("call_i", r0, [r1]), + ("guard_false", r0, []), + ] From pypy.commits at gmail.com Wed Aug 23 02:42:19 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Aug 2017 23:42:19 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: a test about the remaining case, and a comment Message-ID: <599d23cb.db85df0a.c853d.6a56@mx.google.com> Author: Carl Friedrich Bolz Branch: regalloc-playground Changeset: r92215:358d42ebcdea Date: 2017-08-23 07:04 +0200 http://bitbucket.org/pypy/pypy/changeset/358d42ebcdea/ Log: a test about the remaining case, and a comment diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -700,6 +700,7 @@ if len(move_or_spill) > 0: while len(self.free_regs) > 0: + # YYY here we need to use the new information to pick stuff new_reg = self.free_regs.pop() if new_reg in self.save_around_call_regs: new_free_regs.append(new_reg) # not this register... diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -292,6 +292,11 @@ loc = longevity.try_pick_free_reg(0, b4, [r5, r2, r3, r4, r1]) assert loc in [r4, r5] + # all available are fixed but var doesn't fit completely into any of these. + # pick the biggest interval + loc = longevity.try_pick_free_reg(0, b4, [r1, r2, r3]) + assert loc is r3 + class TestRegalloc(object): def test_freeing_vars(self): From pypy.commits at gmail.com Wed Aug 23 02:42:21 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Aug 2017 23:42:21 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: first stab at coalescing support Message-ID: <599d23cd.45b01c0a.2278b.4565@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: regalloc-playground Changeset: r92216:5c8cbd8502e7 Date: 2017-08-23 07:41 +0200 http://bitbucket.org/pypy/pypy/changeset/5c8cbd8502e7/ Log: first stab at coalescing support diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -838,6 +838,11 @@ # specific register self.fixed_positions = None + # another Lifetime that lives after the current one that would like to + # share a register with this variable + self.share_with = None + + def is_last_real_use_before(self, position): if self.real_usages is None: return True @@ -873,11 +878,12 @@ def find_fixed_register(self, opindex): # XXX could use binary search - if self.fixed_positions is None: - return None - for (index, reg) in self.fixed_positions: - if opindex <= index: - return reg + if self.fixed_positions is not None: + for (index, reg) in self.fixed_positions: + if opindex <= index: + return reg + if self.share_with is not None: + return self.share_with.find_fixed_register(opindex) def _check_invariants(self): assert self.definition_pos <= self.last_usage @@ -936,6 +942,17 @@ self.fixed_register_use[register] = FixedRegisterPositions(register) self.fixed_register_use[register].fixed_register(opindex, definition_pos) + def try_use_same_register(self, v0, v1): + """ Try to arrange things to put v0 and v1 into the same register. + v0 must be defined before v1""" + # only works in limited situations now + longevityvar0 = self[v0] + longevityvar1 = self[v1] + assert longevityvar0.definition_pos < longevityvar1.definition_pos + if longevityvar0.last_usage != longevityvar1.definition_pos: + return # not supported for now + longevityvar0.share_with = longevityvar1 + def longest_free_reg(self, position, free_regs): """ for every register in free_regs, compute how far into the future that register can remain free, according to the constraints of the diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -297,6 +297,18 @@ loc = longevity.try_pick_free_reg(0, b4, [r1, r2, r3]) assert loc is r3 +def test_simple_coalescing(): + b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) + l0 = Lifetime(0, 4) + l1 = Lifetime(4, 20) + l2 = Lifetime(4, 20) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2}) + longevity.fixed_register(10, r1, b1) + longevity.fixed_register(10, r2, b2) + longevity.try_use_same_register(b0, b2) + + loc = longevity.try_pick_free_reg(0, b0, [r0, r1, r2, r3, r4]) + assert loc is r2 class TestRegalloc(object): def test_freeing_vars(self): From pypy.commits at gmail.com Wed Aug 23 02:42:23 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Aug 2017 23:42:23 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: block the fixed register earlier after coalescing Message-ID: <599d23cf.4692df0a.f2949.2b20@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: regalloc-playground Changeset: r92217:666fb2557e24 Date: 2017-08-23 07:52 +0200 http://bitbucket.org/pypy/pypy/changeset/666fb2557e24/ Log: block the fixed register earlier after coalescing diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -842,6 +842,8 @@ # share a register with this variable self.share_with = None + # the other lifetime will have this variable set to self.definition_pos + self.definition_pos_shared = UNDEF_POS def is_last_real_use_before(self, position): if self.real_usages is None: @@ -869,7 +871,10 @@ assert self.definition_pos <= position <= self.last_usage if self.fixed_positions is None: self.fixed_positions = [] - res = self.definition_pos + if self.definition_pos_shared != UNDEF_POS: + res = self.definition_pos_shared + else: + res = self.definition_pos else: assert position > self.fixed_positions[-1][0] res = self.fixed_positions[-1][0] @@ -952,6 +957,7 @@ if longevityvar0.last_usage != longevityvar1.definition_pos: return # not supported for now longevityvar0.share_with = longevityvar1 + longevityvar1.definition_pos_shared = longevityvar0.definition_pos def longest_free_reg(self, position, free_regs): """ for every register in free_regs, compute how far into the future diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -310,6 +310,26 @@ loc = longevity.try_pick_free_reg(0, b0, [r0, r1, r2, r3, r4]) assert loc is r2 +def test_coalescing_blocks_regs_correctly(): + b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) + l0 = Lifetime(10, 30) + l1 = Lifetime(30, 40) + l2 = Lifetime(30, 40) + l3 = Lifetime(0, 15) + l4 = Lifetime(0, 5) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2, b3: l3, b4: l4}) + longevity.try_use_same_register(b0, b1) + longevity.fixed_register(35, r1, b1) + longevity.fixed_register(35, r2, b2) + + loc = longevity.try_pick_free_reg(0, b3, [r1, r2]) + # r2 is picked, otherwise b0 can't b0 can't end up in r1 + assert loc is r2 + + loc = longevity.try_pick_free_reg(0, b4, [r1, r2]) + # r1 is picked, because b4 fits before b0 + assert loc is r1 + class TestRegalloc(object): def test_freeing_vars(self): b0, b1, b2 = newboxes(0, 0, 0) From pypy.commits at gmail.com Wed Aug 23 02:42:25 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Aug 2017 23:42:25 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: implement chained coalescing. fix a bug in free_reg_whole_lifetime Message-ID: <599d23d1.59451c0a.b46a8.9917@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: regalloc-playground Changeset: r92218:0c3b2571af1c Date: 2017-08-23 08:41 +0200 http://bitbucket.org/pypy/pypy/changeset/0c3b2571af1c/ Log: implement chained coalescing. fix a bug in free_reg_whole_lifetime diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -843,7 +843,7 @@ self.share_with = None # the other lifetime will have this variable set to self.definition_pos - self.definition_pos_shared = UNDEF_POS + self._definition_pos_shared = UNDEF_POS def is_last_real_use_before(self, position): if self.real_usages is None: @@ -864,6 +864,12 @@ low = mid + 1 return l[low] + def definition_pos_shared(self): + if self._definition_pos_shared != UNDEF_POS: + return self._definition_pos_shared + else: + return self.definition_pos + def fixed_register(self, position, reg): """ registers a fixed register use for the variable at position in register reg. returns the position from where on the register should be @@ -871,10 +877,7 @@ assert self.definition_pos <= position <= self.last_usage if self.fixed_positions is None: self.fixed_positions = [] - if self.definition_pos_shared != UNDEF_POS: - res = self.definition_pos_shared - else: - res = self.definition_pos + res = self.definition_pos_shared() else: assert position > self.fixed_positions[-1][0] res = self.fixed_positions[-1][0] @@ -957,7 +960,7 @@ if longevityvar0.last_usage != longevityvar1.definition_pos: return # not supported for now longevityvar0.share_with = longevityvar1 - longevityvar1.definition_pos_shared = longevityvar0.definition_pos + longevityvar1._definition_pos_shared = longevityvar0.definition_pos_shared() def longest_free_reg(self, position, free_regs): """ for every register in free_regs, compute how far into the future @@ -993,7 +996,10 @@ if fixed_reg_pos is None: unfixed_reg = reg continue - use_after = fixed_reg_pos.free_until_pos(longevityvar.last_usage) + use_after = fixed_reg_pos.free_until_pos(position) + if use_after < longevityvar.last_usage: + # can't fit + continue assert use_after >= longevityvar.last_usage if use_after < min_fixed_use_after: best_reg = reg diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -297,6 +297,17 @@ loc = longevity.try_pick_free_reg(0, b4, [r1, r2, r3]) assert loc is r3 +def test_try_pick_free_reg_bug(): + b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) + l0 = Lifetime(10, 30) + l1 = Lifetime(0, 15) + longevity = LifetimeManager({b0: l0, b1: l1}) + longevity.fixed_register(20, r0, b0) + + # does not fit into r0, use r1 + loc = longevity.try_pick_free_reg(0, b1, [r0, r1]) + assert loc == r1 + def test_simple_coalescing(): b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) l0 = Lifetime(0, 4) @@ -330,6 +341,42 @@ # r1 is picked, because b4 fits before b0 assert loc is r1 + +def test_chained_coalescing(): + # 5 + b4 + # | + # 10 + b0 | + # | | + # | 15 + + # | + # + + # 20 + # + b1 + # | + # | + # | + # + + # 30 + # + b2 + # | + # r1 * + # | + # + + # 40 + b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) + l0 = Lifetime(10, 20) + l1 = Lifetime(20, 30) + l2 = Lifetime(30, 40) + l4 = Lifetime(5, 15) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2, b4: l4}) + longevity.try_use_same_register(b0, b1) + longevity.try_use_same_register(b1, b2) + longevity.fixed_register(35, r1, b2) + + loc = longevity.try_pick_free_reg(5, b4, [r0, r1]) + assert loc is r0 + + class TestRegalloc(object): def test_freeing_vars(self): b0, b1, b2 = newboxes(0, 0, 0) @@ -356,7 +403,7 @@ rm._check_invariants() assert len(rm.free_regs) == 4 assert len(rm.reg_bindings) == 0 - + def test_register_exhaustion(self): boxes, longevity = boxes_and_longevity(5) rm = RegisterManager(longevity) From pypy.commits at gmail.com Wed Aug 23 02:56:41 2017 From: pypy.commits at gmail.com (mattip) Date: Tue, 22 Aug 2017 23:56:41 -0700 (PDT) Subject: [pypy-commit] pypy default: dummy implementation to make cython happier Message-ID: <599d2729.83671c0a.13bc1.375f@mx.google.com> Author: Matti Picus Branch: Changeset: r92219:bd4073222df1 Date: 2017-08-23 09:55 +0300 http://bitbucket.org/pypy/pypy/changeset/bd4073222df1/ Log: dummy implementation to make cython happier diff --git a/pypy/module/cpyext/object.py b/pypy/module/cpyext/object.py --- a/pypy/module/cpyext/object.py +++ b/pypy/module/cpyext/object.py @@ -94,6 +94,13 @@ the fields used by the tp_traverse handler become invalid.""" pass + at cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL) +def PyType_IS_GC(space, o): + """Return true if the type object includes support for the cycle detector; this + tests the type flag Py_TPFLAGS_HAVE_GC. + """ + return False + @cpython_api([PyObject], PyObjectP, error=CANNOT_FAIL) def _PyObject_GetDictPtr(space, op): return lltype.nullptr(PyObjectP.TO) From pypy.commits at gmail.com Wed Aug 23 03:05:57 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 23 Aug 2017 00:05:57 -0700 (PDT) Subject: [pypy-commit] pypy default: remove implemented function from stubs Message-ID: <599d2955.24addf0a.5f478.4318@mx.google.com> Author: Matti Picus Branch: Changeset: r92220:d91e29b93f3a Date: 2017-08-23 10:04 +0300 http://bitbucket.org/pypy/pypy/changeset/d91e29b93f3a/ Log: remove implemented function from stubs diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py --- a/pypy/module/cpyext/stubs.py +++ b/pypy/module/cpyext/stubs.py @@ -1506,13 +1506,6 @@ """ raise NotImplementedError - at cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL) -def PyType_IS_GC(space, o): - """Return true if the type object includes support for the cycle detector; this - tests the type flag Py_TPFLAGS_HAVE_GC. - """ - raise NotImplementedError - @cpython_api([], rffi.INT_real, error=CANNOT_FAIL) def PyUnicode_ClearFreeList(space): """Clear the free list. Return the total number of freed items. From pypy.commits at gmail.com Wed Aug 23 09:15:48 2017 From: pypy.commits at gmail.com (exarkun) Date: Wed, 23 Aug 2017 06:15:48 -0700 (PDT) Subject: [pypy-commit] buildbot cleanup-hg-bookmarks: Optionally delete .hg/bookmarks Message-ID: <599d8004.11331c0a.b27fa.9bba@mx.google.com> Author: Jean-Paul Calderone Branch: cleanup-hg-bookmarks Changeset: r1023:aea6e451355f Date: 2017-08-23 09:15 -0400 http://bitbucket.org/pypy/buildbot/changeset/aea6e451355f/ Log: Optionally delete .hg/bookmarks diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py --- a/bot2/pypybuildbot/builds.py +++ b/bot2/pypybuildbot/builds.py @@ -333,11 +333,24 @@ workdir=workdir)) def update_hg(platform, factory, repourl, workdir, use_branch, - force_branch=None): + force_branch=None, wipe_bookmarks=False): if not use_branch: assert force_branch is None update_hg_old_method(platform, factory, repourl, workdir) return + + if wipe_bookmarks: + # We don't use bookmarks at all. If a bookmark accidentally gets + # created and pushed to the server and we pull it down, it gets stuck + # here. Deleting it from the server doesn't seem to delete it from + # the local checkout. So, manually clean it up. + factory.addStep(ShellCmd( + description="cleanup bookmarks", + command=["rm", "-f", ".hg/bookmarks"], + workdir=workdir, + haltOnFailure=False, + )) + factory.addStep( Mercurial( repourl=repourl, @@ -374,7 +387,7 @@ doStepIf=ParseRevision.doStepIf)) # update_hg(platform, factory, repourl, workdir, use_branch=True, - force_branch=force_branch) + force_branch=force_branch, wipe_bookmarks=True) # factory.addStep(CheckGotRevision(workdir=workdir)) @@ -410,7 +423,7 @@ # If target_tmpdir is empty, crash. tmp_or_crazy = '%(prop:target_tmpdir:-crazy/name/so/mkdir/fails/)s' pytest = "pytest" - factory.addStep(ShellCmd( + factory.addStep(ShellCmd( description="mkdir for tests", command=['python', '-c', Interpolate("import os; os.mkdir(r'" + \ tmp_or_crazy + pytest + "') if not os.path.exists(r'" + \ @@ -424,7 +437,7 @@ '/D', '-' + nDays, '/c', "cmd /c rmdir /q /s @path"] else: command = ['find', Interpolate(tmp_or_crazy + pytest), '-mtime', - '+' + nDays, '-exec', 'rm', '-r', '{}', ';'] + '+' + nDays, '-exec', 'rm', '-r', '{}', ';'] factory.addStep(SuccessAlways( description="cleanout old test files", command = command, @@ -481,7 +494,7 @@ # If target_tmpdir is empty, crash. tmp_or_crazy = '%(prop:target_tmpdir:-crazy/name/so/mkdir/fails/)s' pytest = "pytest" - self.addStep(ShellCmd( + self.addStep(ShellCmd( description="mkdir for tests", command=['python', '-c', Interpolate("import os; os.mkdir(r'" + \ tmp_or_crazy + pytest + "') if not os.path.exists(r'" + \ @@ -495,7 +508,7 @@ '/D', '-' + nDays, '/c', "cmd /c rmdir /q /s @path"] else: command = ['find', Interpolate(tmp_or_crazy + pytest), '-mtime', - '+' + nDays, '-exec', 'rm', '-r', '{}', ';'] + '+' + nDays, '-exec', 'rm', '-r', '{}', ';'] self.addStep(SuccessAlways( description="cleanout old test files", command = command, @@ -976,7 +989,7 @@ workdir='pypy-c', haltOnFailure=True, )) - + if platform == 'win32': self.addStep(ShellCmd( description='move decompressed dir', @@ -1010,7 +1023,7 @@ # obtain a pypy-compatible branch of numpy numpy_url = 'https://www.bitbucket.org/pypy/numpy' update_git(platform, self, numpy_url, 'numpy_src', branch='master', - alwaysUseLatest=True, # ignore pypy rev number when + alwaysUseLatest=True, # ignore pypy rev number when # triggered by a pypy build ) From pypy.commits at gmail.com Wed Aug 23 09:40:00 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 23 Aug 2017 06:40:00 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: Issue #2638 Message-ID: <599d85b0.090b1c0a.41761.a788@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r92221:b4d19ffb3525 Date: 2017-08-23 15:39 +0200 http://bitbucket.org/pypy/pypy/changeset/b4d19ffb3525/ Log: Issue #2638 Workaround for .python_history files that have non-utf-8 chars. But ideally we should encode/decode the content in some locale-aware format, instead of using UTF-8 all the time. diff --git a/lib_pypy/pyrepl/readline.py b/lib_pypy/pyrepl/readline.py --- a/lib_pypy/pyrepl/readline.py +++ b/lib_pypy/pyrepl/readline.py @@ -314,7 +314,8 @@ # history item: we use \r\n instead of just \n. If the history # file is passed to GNU readline, the extra \r are just ignored. history = self.get_reader().history - f = open(os.path.expanduser(filename), 'r', encoding='utf-8') + f = open(os.path.expanduser(filename), 'r', encoding='utf-8', + errors='replace') buffer = [] for line in f: if line.endswith('\r\n'): From pypy.commits at gmail.com Wed Aug 23 10:49:30 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 23 Aug 2017 07:49:30 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: Fix error message Message-ID: <599d95fa.11421c0a.729dd.e4cd@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r92222:d459f7e19937 Date: 2017-08-23 16:48 +0200 http://bitbucket.org/pypy/pypy/changeset/d459f7e19937/ Log: Fix error message diff --git a/pypy/objspace/std/objectobject.py b/pypy/objspace/std/objectobject.py --- a/pypy/objspace/std/objectobject.py +++ b/pypy/objspace/std/objectobject.py @@ -143,7 +143,7 @@ from pypy.objspace.std.typeobject import W_TypeObject if not isinstance(w_newcls, W_TypeObject): raise oefmt(space.w_TypeError, - "__class__ must be set to new-style class, not '%T' " + "__class__ must be set to a class, not '%T' " "object", w_newcls) if not w_newcls.is_heaptype(): raise oefmt(space.w_TypeError, From pypy.commits at gmail.com Wed Aug 23 11:31:55 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 23 Aug 2017 08:31:55 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: Simplification Message-ID: <599d9feb.4b6b1c0a.4ff12.b7a9@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r92223:34f222bdfa0b Date: 2017-08-23 17:29 +0200 http://bitbucket.org/pypy/pypy/changeset/34f222bdfa0b/ Log: Simplification diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py --- a/pypy/interpreter/typedef.py +++ b/pypy/interpreter/typedef.py @@ -130,7 +130,7 @@ return subcls _unique_subclass_cache = {} -def _getusercls(cls, reallywantdict=False): +def _getusercls(cls): from rpython.rlib import objectmodel from pypy.objspace.std.objectobject import W_ObjectObject from pypy.objspace.std.mapdict import (BaseUserClassMapdict, @@ -144,7 +144,7 @@ else: base_mixin = MapdictStorageMixin copy_methods = [BaseUserClassMapdict] - if reallywantdict or not typedef.hasdict: + if not typedef.hasdict: # the type has no dict, mapdict to provide the dict copy_methods.append(MapdictDictSupport) name += "Dict" From pypy.commits at gmail.com Wed Aug 23 11:31:58 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 23 Aug 2017 08:31:58 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: Issue #2639 Message-ID: <599d9fee.9aa4df0a.99c3f.9044@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r92224:8fabef083b67 Date: 2017-08-23 17:31 +0200 http://bitbucket.org/pypy/pypy/changeset/8fabef083b67/ Log: Issue #2639 Assigning '__class__' between ModuleType and subclasses diff --git a/pypy/interpreter/module.py b/pypy/interpreter/module.py --- a/pypy/interpreter/module.py +++ b/pypy/interpreter/module.py @@ -10,9 +10,10 @@ class Module(W_Root): """A module.""" - _immutable_fields_ = ["w_dict?"] + _immutable_fields_ = ["w_dict?", "w_userclass?"] _frozen = False + w_userclass = None def __init__(self, space, w_name, w_dict=None): self.space = space @@ -148,6 +149,26 @@ self) return space.call_function(space.w_list, w_dict) + # These three methods are needed to implement '__class__' assignment + # between a module and a subclass of module. They give every module + # the ability to have its '__class__' set, manually. Note that if + # you instantiate a subclass of ModuleType in the first place, then + # you get an RPython instance of a subclass of Module created in the + # normal way by typedef.py. That instance has got its own + # getclass(), getslotvalue(), etc. but provided it has no __slots__, + # it is compatible with ModuleType for '__class__' assignment. + + def getclass(self, space): + if self.w_userclass is None: + return W_Root.getclass(self, space) + return self.w_userclass + + def setclass(self, space, w_cls): + self.w_userclass = w_cls + + def user_setup(self, space, w_subtype): + self.w_userclass = w_subtype + def init_extra_module_attrs(space, w_mod): w_dict = w_mod.getdict(space) diff --git a/pypy/interpreter/test/test_module.py b/pypy/interpreter/test/test_module.py --- a/pypy/interpreter/test/test_module.py +++ b/pypy/interpreter/test/test_module.py @@ -220,3 +220,45 @@ import sys m = type(sys).__new__(type(sys)) assert not m.__dict__ + + def test_class_assignment_for_module(self): + import sys + modtype = type(sys) + class X(modtype): + _foobar_ = 42 + + m = X("yytest_moduleyy") + assert type(m) is m.__class__ is X + assert m._foobar_ == 42 + m.__class__ = modtype + assert type(m) is m.__class__ is modtype + assert not hasattr(m, '_foobar_') + + m = modtype("xxtest_modulexx") + assert type(m) is m.__class__ is modtype + m.__class__ = X + assert m._foobar_ == 42 + assert type(m) is m.__class__ is X + + sys.__class__ = modtype + assert type(sys) is sys.__class__ is modtype + sys.__class__ = X + assert sys._foobar_ == 42 + sys.__class__ = modtype + + class XX(modtype): + __slots__ = ['a', 'b'] + + x = XX("zztest_modulezz") + assert x.__class__ is XX + raises(AttributeError, "x.a") + x.a = 42 + assert x.a == 42 + x.a = 43 + assert x.a == 43 + assert 'a' not in x.__dict__ + del x.a + raises(AttributeError, "x.a") + raises(AttributeError, "del x.a") + raises(TypeError, "x.__class__ = X") + raises(TypeError, "sys.__class__ = XX") diff --git a/pypy/objspace/std/objectobject.py b/pypy/objspace/std/objectobject.py --- a/pypy/objspace/std/objectobject.py +++ b/pypy/objspace/std/objectobject.py @@ -141,13 +141,17 @@ def descr_set___class__(space, w_obj, w_newcls): from pypy.objspace.std.typeobject import W_TypeObject + from pypy.interpreter.module import Module + # if not isinstance(w_newcls, W_TypeObject): raise oefmt(space.w_TypeError, "__class__ must be set to a class, not '%T' " "object", w_newcls) - if not w_newcls.is_heaptype(): + if not (w_newcls.is_heaptype() or + w_newcls is space.gettypeobject(Module.typedef)): raise oefmt(space.w_TypeError, - "__class__ assignment: only for heap types") + "__class__ assignment only supported for heap types " + "or ModuleType subclasses") w_oldcls = space.type(w_obj) assert isinstance(w_oldcls, W_TypeObject) if (w_oldcls.get_full_instance_layout() == From pypy.commits at gmail.com Wed Aug 23 11:56:45 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 23 Aug 2017 08:56:45 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: Test and fix (lib-python/3/test/test_descr) Message-ID: <599da5bd.c35c1c0a.55d8.b680@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r92225:c1fb69da92dc Date: 2017-08-23 17:56 +0200 http://bitbucket.org/pypy/pypy/changeset/c1fb69da92dc/ Log: Test and fix (lib-python/3/test/test_descr) diff --git a/pypy/objspace/std/test/test_typeobject.py b/pypy/objspace/std/test/test_typeobject.py --- a/pypy/objspace/std/test/test_typeobject.py +++ b/pypy/objspace/std/test/test_typeobject.py @@ -1284,6 +1284,25 @@ raises(ValueError, type, 'A\x00B', (), {}) raises(TypeError, type, b'A', (), {}) + def test_incomplete_extend(self): """ + # Extending an unitialized type with type.__mro__ is None must + # throw a reasonable TypeError exception, instead of failing + # with a segfault. + class M(type): + def mro(cls): + if cls.__mro__ is None and cls.__name__ != 'X': + try: + class X(cls): + pass + except TypeError: + found.append(1) + return type.mro(cls) + found = [] + class A(metaclass=M): + pass + assert found == [1] + """ + class AppTestWithMethodCacheCounter: spaceconfig = {"objspace.std.withmethodcachecounter": True} diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py --- a/pypy/objspace/std/typeobject.py +++ b/pypy/objspace/std/typeobject.py @@ -1055,6 +1055,9 @@ if w_bestbase is None: raise oefmt(space.w_TypeError, "a new-style class can't have only classic bases") + if not w_bestbase.hasmro: + raise oefmt(space.w_TypeError, + "Cannot extend an incomplete type '%N'", w_bestbase) if not w_bestbase.layout.typedef.acceptable_as_base_class: raise oefmt(space.w_TypeError, "type '%N' is not an acceptable base class", w_bestbase) From pypy.commits at gmail.com Wed Aug 23 12:01:34 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 23 Aug 2017 09:01:34 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: Refinement of c1fb69da92dc Message-ID: <599da6de.1db7df0a.98fc4.897f@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r92226:e8a566e9fcd9 Date: 2017-08-23 18:00 +0200 http://bitbucket.org/pypy/pypy/changeset/e8a566e9fcd9/ Log: Refinement of c1fb69da92dc diff --git a/pypy/objspace/std/test/test_typeobject.py b/pypy/objspace/std/test/test_typeobject.py --- a/pypy/objspace/std/test/test_typeobject.py +++ b/pypy/objspace/std/test/test_typeobject.py @@ -1303,6 +1303,27 @@ assert found == [1] """ + def test_incomplete_extend_2(self): """ + # Same as test_incomplete_extend, with multiple inheritance + class M(type): + def mro(cls): + if cls.__mro__ is None and cls.__name__ == 'Second': + try: + class X(First, cls): + pass + except TypeError: + found.append(1) + return type.mro(cls) + found = [] + class Base(metaclass=M): + pass + class First(Base): + pass + class Second(Base): + pass + assert found == [1] + """ + class AppTestWithMethodCacheCounter: spaceconfig = {"objspace.std.withmethodcachecounter": True} diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py --- a/pypy/objspace/std/typeobject.py +++ b/pypy/objspace/std/typeobject.py @@ -1036,6 +1036,9 @@ for w_candidate in bases_w: if not isinstance(w_candidate, W_TypeObject): continue + if not w_candidate.hasmro: + raise oefmt(w_candidate.space.w_TypeError, + "Cannot extend an incomplete type '%N'", w_candidate) if w_bestbase is None: w_bestbase = w_candidate # for now continue @@ -1055,9 +1058,6 @@ if w_bestbase is None: raise oefmt(space.w_TypeError, "a new-style class can't have only classic bases") - if not w_bestbase.hasmro: - raise oefmt(space.w_TypeError, - "Cannot extend an incomplete type '%N'", w_bestbase) if not w_bestbase.layout.typedef.acceptable_as_base_class: raise oefmt(space.w_TypeError, "type '%N' is not an acceptable base class", w_bestbase) From pypy.commits at gmail.com Wed Aug 23 12:10:45 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 23 Aug 2017 09:10:45 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: This case "works", but gives a slightly strange error message Message-ID: <599da905.491a1c0a.b390c.1e46@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r92227:c30f2d4e721f Date: 2017-08-23 18:10 +0200 http://bitbucket.org/pypy/pypy/changeset/c30f2d4e721f/ Log: This case "works", but gives a slightly strange error message on both CPython and PyPy diff --git a/pypy/objspace/std/test/test_typeobject.py b/pypy/objspace/std/test/test_typeobject.py --- a/pypy/objspace/std/test/test_typeobject.py +++ b/pypy/objspace/std/test/test_typeobject.py @@ -1324,6 +1324,25 @@ assert found == [1] """ + def test_incomplete_extend_3(self): """ + # this case "works", but gives a slightly strange error message + # on both CPython and PyPy + class M(type): + def mro(cls): + if cls.__mro__ is None and cls.__name__ == 'A': + try: + Base.__new__(cls) + except TypeError: + found.append(1) + return type.mro(cls) + found = [] + class Base(metaclass=M): + pass + class A(Base): + pass + assert found == [1] + """ + class AppTestWithMethodCacheCounter: spaceconfig = {"objspace.std.withmethodcachecounter": True} From pypy.commits at gmail.com Wed Aug 23 12:23:47 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 23 Aug 2017 09:23:47 -0700 (PDT) Subject: [pypy-commit] pypy default: (fijal, arigo) Message-ID: <599dac13.4ad61c0a.c9047.bb68@mx.google.com> Author: Armin Rigo Branch: Changeset: r92228:18b10a6743c4 Date: 2017-08-23 18:21 +0200 http://bitbucket.org/pypy/pypy/changeset/18b10a6743c4/ Log: (fijal, arigo) Rename _pypy_dll to __pypy_dll__, so that the custom __getattr__ would raise if the attribute doesn't exist diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py --- a/lib-python/2.7/ctypes/__init__.py +++ b/lib-python/2.7/ctypes/__init__.py @@ -364,7 +364,7 @@ pypy_dll = _ffi.CDLL(name, mode) else: pypy_dll = _ffi.WinDLL(name, mode) - self._pypy_dll = pypy_dll + self.__pypy_dll__ = pypy_dll handle = int(pypy_dll) if _sys.maxint > 2 ** 32: handle = int(handle) # long -> int diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -82,7 +82,7 @@ return False def in_dll(self, dll, name): - return self.from_address(dll._pypy_dll.getaddressindll(name)) + return self.from_address(dll.__pypy_dll__.getaddressindll(name)) def from_buffer(self, obj, offset=0): size = self._sizeofinstances() diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py --- a/lib_pypy/_ctypes/function.py +++ b/lib_pypy/_ctypes/function.py @@ -430,7 +430,7 @@ ffires = restype.get_ffi_argtype() return _ffi.FuncPtr.fromaddr(ptr, '', ffiargs, ffires, self._flags_) - cdll = self.dll._pypy_dll + cdll = self.dll.__pypy_dll__ try: ffi_argtypes = [argtype.get_ffi_argtype() for argtype in argtypes] ffi_restype = restype.get_ffi_argtype() From pypy.commits at gmail.com Wed Aug 23 12:23:49 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 23 Aug 2017 09:23:49 -0700 (PDT) Subject: [pypy-commit] pypy default: (fijal, arigo) Message-ID: <599dac15.81131c0a.37075.e37e@mx.google.com> Author: Armin Rigo Branch: Changeset: r92229:b02f6ce0d05b Date: 2017-08-23 18:23 +0200 http://bitbucket.org/pypy/pypy/changeset/b02f6ce0d05b/ Log: (fijal, arigo) Improve the explanation a bit diff --git a/pypy/module/test_lib_pypy/README.txt b/pypy/module/test_lib_pypy/README.txt --- a/pypy/module/test_lib_pypy/README.txt +++ b/pypy/module/test_lib_pypy/README.txt @@ -1,4 +1,7 @@ This directory contains app-level tests are supposed to be run *after* translation. So you run them by saying: -pypy pytest.py +../../goal/pypy-c pytest.py + +Note that if you run it with a PyPy from elsewhere, it will not pick +up the changes to lib-python and lib_pypy. From pypy.commits at gmail.com Wed Aug 23 12:34:31 2017 From: pypy.commits at gmail.com (cfbolz) Date: Wed, 23 Aug 2017 09:34:31 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: more realistic jump faking Message-ID: <599dae97.6ea9df0a.97a44.4572@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: regalloc-playground Changeset: r92231:3ba71498fe77 Date: 2017-08-23 11:07 +0200 http://bitbucket.org/pypy/pypy/changeset/3ba71498fe77/ Log: more realistic jump faking diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -997,8 +997,8 @@ def fake_allocate(self, loop): from rpython.jit.backend.x86.jump import remap_frame_layout - - emit = self.assembler.emitted.append + def emit(*args): + self.assembler.emitted.append(args) for i, op in enumerate(loop.operations): self.rm.position = i opnum = op.getopnum() @@ -1006,15 +1006,15 @@ if rop.is_comparison(opnum): locs = [self.loc(x) for x in op.getarglist()] loc = self.force_allocate_reg_or_cc(op) - emit((opname, loc, locs)) + emit(opname, loc, locs) elif opname.startswith("int_"): locs = [self.loc(x) for x in op.getarglist()] loc = self.rm.force_result_in_reg( op, op.getarg(0), op.getarglist()) - emit((opname, loc, locs[1:])) + emit(opname, loc, locs[1:]) elif op.is_guard(): fail_locs = [self.loc(x) for x in op.getfailargs()] - emit((opname, self.loc(op.getarg(0)), fail_locs)) + emit(opname, self.loc(op.getarg(0)), fail_locs) elif rop.is_call(opnum): # calling convention! src_locs = [self.loc(x) for x in op.getarglist()[1:]] @@ -1022,19 +1022,24 @@ loc = self.rm.after_call(op) dst_locs = [r1, r2, r3][:len(src_locs)] remap_frame_layout(self.assembler, src_locs, dst_locs, r8) - emit((opname, loc, dst_locs)) + emit(opname, loc, dst_locs) elif opname == "label": descr = op.getdescr() locs = [self.loc(x) for x in op.getarglist()] - emit((opname, locs)) + emit(opname, locs) descr._fake_arglocs = locs + elif opname == "jump": + src_locs = [self.loc(x) for x in op.getarglist()] + dst_locs = op.getdescr()._fake_arglocs + remap_frame_layout(self.assembler, src_locs, dst_locs, r8) + emit("jump", dst_locs) else: locs = [self.loc(x) for x in op.getarglist()] if op.type != "v": loc = self.rm.force_allocate_reg(op) - emit((opname, loc, locs)) + emit(opname, loc, locs) else: - emit((opname, locs)) + emit(opname, locs) self.possibly_free_vars_for_op(op) return self.assembler.emitted @@ -1131,7 +1136,8 @@ ("int_add", r0, [1]), ("int_lt", r8, [r0, 20]), ("guard_true", r8, [r0]), - ("jump", [r0]), + ("move", fp0, r0), + ("jump", [fp0]), ] def test_call(self): From pypy.commits at gmail.com Wed Aug 23 12:34:32 2017 From: pypy.commits at gmail.com (cfbolz) Date: Wed, 23 Aug 2017 09:34:32 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: make it possible to specify the locs of the inputargs in the tests. a skipped Message-ID: <599dae98.6ea9df0a.97a44.4579@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: regalloc-playground Changeset: r92232:3cde3ad8b828 Date: 2017-08-23 11:53 +0200 http://bitbucket.org/pypy/pypy/changeset/3cde3ad8b828/ Log: make it possible to specify the locs of the inputargs in the tests. a skipped test that's messy to fix. diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -945,12 +945,15 @@ def __init__(self): self.assembler = MockAsm() - def prepare_loop(self, inputargs, operations, looptoken, allgcrefs): - operations = self._prepare(inputargs, operations, allgcrefs) + def fake_prepare_loop(self, inputargs, operations, looptoken, inputarg_locs=None): + operations = self._prepare(inputargs, operations, []) self.operations = operations - self._set_initial_bindings(inputargs, looptoken) - # note: we need to make a copy of inputargs because possibly_free_vars - # is also used on op args, which is a non-resizable list + if inputarg_locs is None: + self._set_initial_bindings(inputargs, looptoken) + else: + for v, loc in zip(inputargs, inputarg_locs): + self.rm.reg_bindings[v] = loc + self.rm.free_regs.remove(loc) self.possibly_free_vars(list(inputargs)) self._add_fixed_registers() return operations @@ -1105,12 +1108,12 @@ return parse(s, self.cpu, namespace or self.namespace, boxkinds=boxkinds) - def allocate(self, s): + def allocate(self, s, inputarg_locs=None): loop = self.parse(s) self.loop = loop regalloc = FakeRegalloc() - regalloc.prepare_loop(loop.inputargs, loop.operations, - loop.original_jitcell_token, []) + regalloc.fake_prepare_loop(loop.inputargs, loop.operations, + loop.original_jitcell_token, inputarg_locs) self.regalloc = regalloc return regalloc.fake_allocate(loop) @@ -1200,3 +1203,36 @@ ('call_i', r0, [r1]), ('guard_false', r0, []) ] + + def test_specify_inputarg_locs(self): + ops = ''' + [i0] + i1 = int_mul(i0, 5) + i5 = int_is_true(i1) + guard_true(i5) [] + ''' + emitted = self.allocate(ops, [r0]) + assert emitted == [ + ('int_mul', r0, [5]), + ('int_is_true', r8, [r0]), + ('guard_true', r8, []) + ] + + def test_coalescing_first_var_already_in_different_reg(self): + py.test.skip("messy - later") + ops = ''' + [i0] + i2 = int_mul(i0, 2) + i3 = int_add(i2, 1) # i2 and i3 need to be coalesced + i4 = call_i(ConstClass(f1ptr), i3, descr=f1_calldescr) + guard_false(i4) [i0] + ''' + emitted = self.allocate(ops, [r5]) + assert emitted == [ + ('move', r1, r5), + ('int_mul', r1, [2]), + ('int_add', r1, [1]), + ('call_i', r0, [r1]), + ('guard_false', r0, []) + ] + From pypy.commits at gmail.com Wed Aug 23 12:34:29 2017 From: pypy.commits at gmail.com (cfbolz) Date: Wed, 23 Aug 2017 09:34:29 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: now the coalescing integration test works Message-ID: <599dae95.54871c0a.dcbf8.e94d@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: regalloc-playground Changeset: r92230:c02b584c919d Date: 2017-08-23 10:42 +0200 http://bitbucket.org/pypy/pypy/changeset/c02b584c919d/ Log: now the coalescing integration test works diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -972,6 +972,14 @@ if var is not None: # xxx kludgy self.possibly_free_var(var) + def possibly_free_vars_for_op(self, op): + for i in range(op.numargs()): + var = op.getarg(i) + if var is not None: # xxx kludgy + self.possibly_free_var(var) + if op.type != 'v': + self.possibly_free_var(op) + def loc(self, x): return self.rm.loc(x) @@ -1027,16 +1035,23 @@ emit((opname, loc, locs)) else: emit((opname, locs)) + self.possibly_free_vars_for_op(op) return self.assembler.emitted def _add_fixed_registers(self): for i, op in enumerate(self.operations): - if rop.is_call(op.getopnum()): + opnum = op.getopnum() + opname = op.getopname() + args = op.getarglist() + if rop.is_call(opnum): # calling convention! arglist = op.getarglist()[1:] for arg, reg in zip(arglist + [None] * (3 - len(arglist)), [r1, r2, r3]): self.longevity.fixed_register(i, reg, arg) self.longevity.fixed_register(i, r0, op) + elif opname.startswith("int_"): + if not args[0].is_constant(): + self.longevity.try_use_same_register(args[0], op) CPU = getcpuclass() @@ -1156,9 +1171,11 @@ ] def test_coalescing(self): - py.test.skip("hard - later") ops = ''' [i0] + i1 = int_mul(i0, 5) + i5 = int_is_true(i1) + guard_true(i5) [] i2 = int_mul(i0, 2) i3 = int_add(i2, 1) # i2 and i3 need to be coalesced i4 = call_i(ConstClass(f1ptr), i3, descr=f1_calldescr) @@ -1167,9 +1184,13 @@ emitted = self.allocate(ops) fp0 = FakeFramePos(0, INT) assert emitted == [ - ("move", r1, fp0), - ("int_mul", r1, [2]), - ("int_add", r1, [1]), - ("call_i", r0, [r1]), - ("guard_false", r0, []), + ('move', r1, fp0), + ('int_mul', r1, [5]), + ('int_is_true', r8, [r1]), + ('guard_true', r8, []), + ('move', r1, fp0), + ('int_mul', r1, [2]), + ('int_add', r1, [1]), + ('call_i', r0, [r1]), + ('guard_false', r0, []) ] From pypy.commits at gmail.com Wed Aug 23 12:34:34 2017 From: pypy.commits at gmail.com (cfbolz) Date: Wed, 23 Aug 2017 09:34:34 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: use the regular spill heuristics to chose the variables to spill before a call Message-ID: <599dae9a.c7181c0a.7d20a.321d@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: regalloc-playground Changeset: r92233:f68f729a80cf Date: 2017-08-23 18:33 +0200 http://bitbucket.org/pypy/pypy/changeset/f68f729a80cf/ Log: use the regular spill heuristics to chose the variables to spill before a call diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -418,20 +418,24 @@ return loc def _pick_variable_to_spill(self, v, forbidden_vars, selected_reg=None, - need_lower_byte=False): + need_lower_byte=False, vars=None): + # YYY v is unused, remove + # try to spill a variable that has no further real usages, ie that only # appears in failargs or in a jump # if that doesn't exist, spill the variable that has a real_usage that # is the furthest away from the current position # YYY check for fixed variable usages + if vars is None: + vars = self.reg_bindings.keys() cur_max_use_distance = -1 position = self.position candidate = None cur_max_age_failargs = -1 candidate_from_failargs = None - for next in self.reg_bindings: + for next in vars: reg = self.reg_bindings[next] if next in forbidden_vars: continue @@ -696,46 +700,31 @@ else: # this is a register like eax/rax, which needs either # spilling or moving. - move_or_spill.append((v, max_age)) + move_or_spill.append(v) if len(move_or_spill) > 0: - while len(self.free_regs) > 0: - # YYY here we need to use the new information to pick stuff - new_reg = self.free_regs.pop() - if new_reg in self.save_around_call_regs: - new_free_regs.append(new_reg) # not this register... - continue - # This 'new_reg' is suitable for moving a candidate to. - # Pick the one with the smallest max_age. (This - # is one step of a naive sorting algo, slow in theory, - # but the list should always be very small so it - # doesn't matter.) - best_i = 0 - smallest_max_age = move_or_spill[0][1] - for i in range(1, len(move_or_spill)): - max_age = move_or_spill[i][1] - if max_age < smallest_max_age: - best_i = i - smallest_max_age = max_age - v, max_age = move_or_spill.pop(best_i) - # move from 'reg' to 'new_reg' + free_regs = [reg for reg in self.free_regs + if reg not in self.save_around_call_regs] + # chose which to spill using the usual spill heuristics + while len(move_or_spill) > len(free_regs): + v = self._pick_variable_to_spill(None, [], vars=move_or_spill) + self._bc_spill(v, new_free_regs) + move_or_spill.remove(v) + assert len(move_or_spill) <= len(free_regs) + for v in move_or_spill: + # search next good reg + new_reg = None + while True: + new_reg = self.free_regs.pop() + if new_reg in self.save_around_call_regs: + new_free_regs.append(new_reg) # not this register... + continue + break + assert new_reg is not None # must succeed reg = self.reg_bindings[v] - if not we_are_translated(): - if move_or_spill: - assert max_age <= min([_a for _, _a in move_or_spill]) - assert reg in save_sublist - assert reg in self.save_around_call_regs - assert new_reg not in self.save_around_call_regs self.assembler.regalloc_mov(reg, new_reg) self.reg_bindings[v] = new_reg # change the binding new_free_regs.append(reg) - # - if len(move_or_spill) == 0: - break - else: - # no more free registers to move to, spill the rest - for v, max_age in move_or_spill: - self._bc_spill(v, new_free_regs) # re-add registers in 'new_free_regs', but in reverse order, # so that the last ones (added just above, from diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -617,6 +617,7 @@ assembler=asm) for b in boxes[:-1]: rm.force_allocate_reg(b) + rm.position = 0 rm.before_call() assert len(rm.reg_bindings) == 2 assert fm.get_frame_depth() == 2 @@ -1236,3 +1237,35 @@ ('guard_false', r0, []) ] + def test_call_spill_furthest_use(self): + # here, i2 should be spilled, because its use is farther away + ops = ''' + [i0, i1, i2, i3, i4, i5, i6] + i8 = call_i(ConstClass(f2ptr), i0, i1, descr=f2_calldescr) + escape_i(i3) + escape_i(i2) + guard_false(i8) [i2, i3, i4, i5, i6] + ''' + emitted = self.allocate(ops, [r1, r2, r0, r3, r4, r5, r6]) + fp0 = FakeFramePos(0, INT) + assert emitted == [ + ('move', fp0, r0), + ('move', r7, r3), + ('call_i', r0, [r1, r2]), + ('escape_i', r1, [r7]), + ('escape_i', r1, [fp0]), + ('guard_false', r0, [fp0, r7, r4, r5, r6]) + ] + + def test_call_spill(self): + py.test.skip("also messy") + # i0 dies, i1 is the argument, the other fight for caller-saved regs + # all_regs = [r0, r1, r2, r3, r4, r5, r6, r7] + # save_around_call_regs = [r0, r1, r2, r3] + ops = ''' + [i0, i1, i2, i3, i4, i5, i6] + i8 = call_i(ConstClass(f2ptr), i1, i0, descr=f2_calldescr) + guard_false(i8) [i2, i3, i4, i5, i6] + ''' + emitted = self.allocate(ops, [r5, r1, r0, r2, r3, r6, r7]) + assert emitted == ["???"] From pypy.commits at gmail.com Wed Aug 23 12:45:26 2017 From: pypy.commits at gmail.com (cfbolz) Date: Wed, 23 Aug 2017 09:45:26 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: just to check that jump hinting would work too Message-ID: <599db126.6ea0df0a.5700f.a1e1@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: regalloc-playground Changeset: r92234:ac35a22e47df Date: 2017-08-23 18:44 +0200 http://bitbucket.org/pypy/pypy/changeset/ac35a22e47df/ Log: just to check that jump hinting would work too diff --git a/pytest.ini b/pytest.ini --- a/pytest.ini +++ b/pytest.ini @@ -1,2 +0,0 @@ -[pytest] -addopts = --assert=reinterp -rf diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -1032,6 +1032,14 @@ locs = [self.loc(x) for x in op.getarglist()] emit(opname, locs) descr._fake_arglocs = locs + lastop = loop.operations[-1] + if lastop.getopname() == "jump" and lastop.getdescr() is descr: + # now we know the places, add hints + for i, r in enumerate(locs): + if isinstance(r, FakeReg): + self.longevity.fixed_register( + len(loop.operations) - 1, r, lastop.getarg(i)) + elif opname == "jump": src_locs = [self.loc(x) for x in op.getarglist()] dst_locs = op.getdescr()._fake_arglocs @@ -1269,3 +1277,24 @@ ''' emitted = self.allocate(ops, [r5, r1, r0, r2, r3, r6, r7]) assert emitted == ["???"] + + def test_jump_hinting(self): + ops = ''' + [i0, i1] + i2 = escape_i() + i3 = escape_i() + label(i2, i3, descr=targettoken) + i4 = escape_i() + i5 = escape_i() + jump(i4, i5, descr=targettoken) + ''' + self.targettoken._fake_arglocs = [r5, r6] + emitted = self.allocate(ops) + assert emitted == [ + ('escape_i', r0, []), + ('escape_i', r1, []), + ('label', [r0, r1]), + ('escape_i', r0, []), + ('escape_i', r1, []), + ('jump', [r0, r1]) + ] diff --git a/rpython/pytest.ini b/rpython/pytest.ini --- a/rpython/pytest.ini +++ b/rpython/pytest.ini @@ -1,2 +0,0 @@ -[pytest] -addopts = --assert=reinterp -rf From pypy.commits at gmail.com Wed Aug 23 12:55:53 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 23 Aug 2017 09:55:53 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <599db399.91a5df0a.9f412.9f24@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r92235:d4535b1f44cd Date: 2017-08-23 18:55 +0200 http://bitbucket.org/pypy/pypy/changeset/d4535b1f44cd/ Log: hg merge default diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py --- a/lib-python/2.7/ctypes/__init__.py +++ b/lib-python/2.7/ctypes/__init__.py @@ -364,7 +364,7 @@ pypy_dll = _ffi.CDLL(name, mode) else: pypy_dll = _ffi.WinDLL(name, mode) - self._pypy_dll = pypy_dll + self.__pypy_dll__ = pypy_dll handle = int(pypy_dll) if _sys.maxint > 2 ** 32: handle = int(handle) # long -> int diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -82,7 +82,7 @@ return False def in_dll(self, dll, name): - return self.from_address(dll._pypy_dll.getaddressindll(name)) + return self.from_address(dll.__pypy_dll__.getaddressindll(name)) def from_buffer(self, obj, offset=0): size = self._sizeofinstances() diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py --- a/lib_pypy/_ctypes/function.py +++ b/lib_pypy/_ctypes/function.py @@ -430,7 +430,7 @@ ffires = restype.get_ffi_argtype() return _ffi.FuncPtr.fromaddr(ptr, '', ffiargs, ffires, self._flags_) - cdll = self.dll._pypy_dll + cdll = self.dll.__pypy_dll__ try: ffi_argtypes = [argtype.get_ffi_argtype() for argtype in argtypes] ffi_restype = restype.get_ffi_argtype() diff --git a/pypy/module/cpyext/object.py b/pypy/module/cpyext/object.py --- a/pypy/module/cpyext/object.py +++ b/pypy/module/cpyext/object.py @@ -311,7 +311,7 @@ PyErr_BadInternalCall(space) @cpython_api([PyObject, PyObject, rffi.INT_real], rffi.INT_real, error=-1) -def PyObject_RichCompareBool(space, ref1, ref2, opid_int): +def PyObject_RichCompareBool(space, w_o1, w_o2, opid_int): """Compare the values of o1 and o2 using the operation specified by opid, which must be one of Py_LT, Py_LE, Py_EQ, Py_NE, Py_GT, or Py_GE, corresponding to <, @@ -321,13 +321,13 @@ opid.""" # Quick result when objects are the same. # Guarantees that identity implies equality. - if ref1 is ref2: + if space.is_w(w_o1, w_o2): opid = rffi.cast(lltype.Signed, opid_int) if opid == Py_EQ: return 1 if opid == Py_NE: return 0 - w_res = PyObject_RichCompare(space, ref1, ref2, opid_int) + w_res = PyObject_RichCompare(space, w_o1, w_o2, opid_int) return int(space.is_true(w_res)) @cpython_api([PyObject], PyObject, result_is_ll=True) diff --git a/pypy/module/cpyext/sequence.py b/pypy/module/cpyext/sequence.py --- a/pypy/module/cpyext/sequence.py +++ b/pypy/module/cpyext/sequence.py @@ -294,6 +294,23 @@ def getitems_fixedsize(self, w_list): return self.getitems_unroll(w_list) + def copy_into(self, w_list, w_other): + w_other.strategy = self + w_other.lstorage = self.getstorage_copy(w_list) + + def clone(self, w_list): + storage = self.getstorage_copy(w_list) + w_clone = W_ListObject.from_storage_and_strategy(self.space, storage, + self) + return w_clone + + def getitems_copy(self, w_list): + return self.getitems(w_list) # getitems copies anyway + + def getstorage_copy(self, w_list): + lst = self.getitems(w_list) + return self.erase(CPyListStorage(w_list.space, lst)) + #------------------------------------------ # all these methods fail or switch strategy and then call ListObjectStrategy's method @@ -301,23 +318,9 @@ w_list.switch_to_object_strategy() w_list.strategy.setslice(w_list, start, stop, step, length) - def get_sizehint(self): - return -1 - def init_from_list_w(self, w_list, list_w): raise NotImplementedError - def clone(self, w_list): - storage = w_list.lstorage # lstorage is tuple, no need to clone - w_clone = W_ListObject.from_storage_and_strategy(self.space, storage, - self) - w_clone.switch_to_object_strategy() - return w_clone - - def copy_into(self, w_list, w_other): - w_list.switch_to_object_strategy() - w_list.strategy.copy_into(w_list, w_other) - def _resize_hint(self, w_list, hint): pass @@ -325,13 +328,6 @@ w_list.switch_to_object_strategy() return w_list.strategy.find(w_list, w_item, start, stop) - def getitems_copy(self, w_list): - w_list.switch_to_object_strategy() - return w_list.strategy.getitems_copy(w_list) - - def getstorage_copy(self, w_list): - raise NotImplementedError - def append(self, w_list, w_item): w_list.switch_to_object_strategy() w_list.strategy.append(w_list, w_item) diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py --- a/pypy/module/cpyext/stubs.py +++ b/pypy/module/cpyext/stubs.py @@ -1516,13 +1516,6 @@ raise NotImplementedError - at cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL) -def PyType_IS_GC(space, o): - """Return true if the type object includes support for the cycle detector; this - tests the type flag Py_TPFLAGS_HAVE_GC.""" - raise NotImplementedError - - @cpython_api([], rffi.INT_real, error=-1) def PyUnicode_ClearFreeList(space, ): """Clear the free list. Return the total number of freed items.""" diff --git a/pypy/module/cpyext/test/test_object.py b/pypy/module/cpyext/test/test_object.py --- a/pypy/module/cpyext/test/test_object.py +++ b/pypy/module/cpyext/test/test_object.py @@ -416,7 +416,7 @@ Py_buffer passed to it. """ module = self.import_extension('foo', [ - ("fillinfo", "METH_VARARGS", + ("fillinfo", "METH_NOARGS", """ Py_buffer buf; PyObject *str = PyBytes_FromString("hello, world."); @@ -468,7 +468,7 @@ object. """ module = self.import_extension('foo', [ - ("fillinfo", "METH_VARARGS", + ("fillinfo", "METH_NOARGS", """ Py_buffer buf; PyObject *str = PyBytes_FromString("hello, world."); @@ -514,7 +514,7 @@ PyBuffer_FillInfo fails if WRITABLE is passed but object is readonly. """ module = self.import_extension('foo', [ - ("fillinfo", "METH_VARARGS", + ("fillinfo", "METH_NOARGS", """ Py_buffer buf; PyObject *str = PyBytes_FromString("hello, world."); @@ -541,7 +541,7 @@ decremented by PyBuffer_Release. """ module = self.import_extension('foo', [ - ("release", "METH_VARARGS", + ("release", "METH_NOARGS", """ Py_buffer buf; buf.obj = PyBytes_FromString("release me!"); @@ -560,3 +560,20 @@ Py_RETURN_NONE; """)]) assert module.release() is None + + +class AppTestPyBuffer_Release(AppTestCpythonExtensionBase): + def test_richcomp_nan(self): + module = self.import_extension('foo', [ + ("comp_eq", "METH_VARARGS", + """ + PyObject *a = PyTuple_GetItem(args, 0); + PyObject *b = PyTuple_GetItem(args, 1); + int res = PyObject_RichCompareBool(a, b, Py_EQ); + return PyLong_FromLong(res); + """),]) + a = float('nan') + b = float('nan') + assert a is b + res = module.comp_eq(a, b) + assert res == 1 diff --git a/pypy/module/cpyext/test/test_sequence.py b/pypy/module/cpyext/test/test_sequence.py --- a/pypy/module/cpyext/test/test_sequence.py +++ b/pypy/module/cpyext/test/test_sequence.py @@ -226,6 +226,15 @@ w_l.inplace_mul(2) assert space.int_w(space.len(w_l)) == 10 + def test_getstorage_copy(self, space, api): + w = space.wrap + w_l = w([1, 2, 3, 4]) + api.PySequence_Fast(w_l, "foo") # converts + + w_l1 = w([]) + space.setitem(w_l1, space.newslice(w(0), w(0), w(1)), w_l) + assert map(space.unwrap, space.unpackiterable(w_l1)) == [1, 2, 3, 4] + class AppTestSequenceObject(AppTestCpythonExtensionBase): def test_fast(self): diff --git a/pypy/module/test_lib_pypy/README.txt b/pypy/module/test_lib_pypy/README.txt --- a/pypy/module/test_lib_pypy/README.txt +++ b/pypy/module/test_lib_pypy/README.txt @@ -1,4 +1,7 @@ This directory contains app-level tests are supposed to be run *after* translation. So you run them by saying: -pypy pytest.py +../../goal/pypy-c pytest.py + +Note that if you run it with a PyPy from elsewhere, it will not pick +up the changes to lib-python and lib_pypy. diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py --- a/pypy/objspace/std/typeobject.py +++ b/pypy/objspace/std/typeobject.py @@ -546,19 +546,24 @@ space = self.space if self.is_heaptype(): return self.getdictvalue(space, '__module__') + elif self.is_cpytype(): + dot = self.name.rfind('.') else: dot = self.name.find('.') - if dot >= 0: - mod = self.name[:dot] - else: - mod = "builtins" - return space.newtext(mod) + if dot >= 0: + mod = self.name[:dot] + else: + mod = "builtins" + return space.newtext(mod) def getname(self, space): if self.is_heaptype(): result = self.name else: - dot = self.name.find('.') + if self.is_cpytype(): + dot = self.name.rfind('.') + else: + dot = self.name.find('.') if dot >= 0: result = self.name[dot+1:] else: diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -552,10 +552,11 @@ self.reg_bindings[result_v] = loc return loc if v not in self.reg_bindings: + # v not in a register. allocate one for result_v and move v there prev_loc = self.frame_manager.loc(v) - loc = self.force_allocate_reg(v, forbidden_vars) + loc = self.force_allocate_reg(result_v, forbidden_vars) self.assembler.regalloc_mov(prev_loc, loc) - assert v in self.reg_bindings + return loc if self.longevity[v][1] > self.position: # we need to find a new place for variable v and # store result in the same place diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -504,7 +504,7 @@ clt.frame_info = rffi.cast(jitframe.JITFRAMEINFOPTR, frame_info) clt.frame_info.clear() # for now - if log: + if log or self._debug: number = looptoken.number operations = self._inject_debugging_code(looptoken, operations, 'e', number) @@ -589,7 +589,7 @@ faildescr.adr_jump_offset) self.mc.force_frame_size(DEFAULT_FRAME_BYTES) descr_number = compute_unique_id(faildescr) - if log: + if log or self._debug: operations = self._inject_debugging_code(faildescr, operations, 'b', descr_number) arglocs = self.rebuild_faillocs_from_descr(faildescr, inputargs) @@ -1618,18 +1618,6 @@ else: not_implemented("save_into_mem size = %d" % size) - def _genop_getfield(self, op, arglocs, resloc): - base_loc, ofs_loc, size_loc, sign_loc = arglocs - assert isinstance(size_loc, ImmedLoc) - source_addr = AddressLoc(base_loc, ofs_loc) - self.load_from_mem(resloc, source_addr, size_loc, sign_loc) - - genop_getfield_gc_i = _genop_getfield - genop_getfield_gc_r = _genop_getfield - genop_getfield_gc_f = _genop_getfield - genop_getfield_raw_i = _genop_getfield - genop_getfield_raw_f = _genop_getfield - def _genop_gc_load(self, op, arglocs, resloc): base_loc, ofs_loc, size_loc, sign_loc = arglocs assert isinstance(size_loc, ImmedLoc) diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -1305,7 +1305,7 @@ self.rm.possibly_free_var(tmpbox_high) def compute_hint_frame_locations(self, operations): - # optimization only: fill in the 'hint_frame_locations' dictionary + # optimization only: fill in the 'hint_frame_pos' dictionary # of 'fm' based on the JUMP at the end of the loop, by looking # at where we would like the boxes to be after the jump. op = operations[-1] @@ -1320,7 +1320,7 @@ self._compute_hint_frame_locations_from_descr(descr) #else: # The loop ends in a JUMP going back to a LABEL in the same loop. - # We cannot fill 'hint_frame_locations' immediately, but we can + # We cannot fill 'hint_frame_pos' immediately, but we can # wait until the corresponding consider_label() to know where the # we would like the boxes to be after the jump. diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py --- a/rpython/rlib/rposix.py +++ b/rpython/rlib/rposix.py @@ -205,6 +205,18 @@ if not is_valid_fd(fd): from errno import EBADF raise OSError(EBADF, 'Bad file descriptor') + + def _bound_for_write(fd, count): + if count > 32767 and c_isatty(fd): + # CPython Issue #11395, PyPy Issue #2636: the Windows console + # returns an error (12: not enough space error) on writing into + # stdout if stdout mode is binary and the length is greater than + # 66,000 bytes (or less, depending on heap usage). Can't easily + # test that, because we need 'fd' to be non-redirected... + count = 32767 + elif count > 0x7fffffff: + count = 0x7fffffff + return count else: def is_valid_fd(fd): return 1 @@ -213,6 +225,9 @@ def validate_fd(fd): pass + def _bound_for_write(fd, count): + return count + def closerange(fd_low, fd_high): # this behaves like os.closerange() from Python 2.6. for fd in xrange(fd_low, fd_high): @@ -449,6 +464,7 @@ def write(fd, data): count = len(data) validate_fd(fd) + count = _bound_for_write(fd, count) with rffi.scoped_nonmovingbuffer(data) as buf: return handle_posix_error('write', c_write(fd, buf, count)) diff --git a/rpython/rtyper/tool/rffi_platform.py b/rpython/rtyper/tool/rffi_platform.py --- a/rpython/rtyper/tool/rffi_platform.py +++ b/rpython/rtyper/tool/rffi_platform.py @@ -710,7 +710,8 @@ size, _ = expected_size_and_sign return lltype.FixedSizeArray(fieldtype.OF, size/_sizeof(fieldtype.OF)) raise TypeError("conflict between translating python and compiler field" - " type %r for %r" % (fieldtype, fieldname)) + " type %r for symbol %r, expected size+sign %r" % ( + fieldtype, fieldname, expected_size_and_sign)) def expose_value_as_rpython(value): if intmask(value) == value: From pypy.commits at gmail.com Wed Aug 23 13:00:28 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 23 Aug 2017 10:00:28 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: Manual merge of 141ba627dc5f+18b10a6743c4 Message-ID: <599db4ac.84a3df0a.55d2e.5b0c@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r92236:9b4bd629bb44 Date: 2017-08-23 18:59 +0200 http://bitbucket.org/pypy/pypy/changeset/9b4bd629bb44/ Log: Manual merge of 141ba627dc5f+18b10a6743c4 diff --git a/lib-python/3/ctypes/__init__.py b/lib-python/3/ctypes/__init__.py --- a/lib-python/3/ctypes/__init__.py +++ b/lib-python/3/ctypes/__init__.py @@ -346,16 +346,18 @@ if handle is None: if flags & _FUNCFLAG_CDECL: - self._handle = _ffi.CDLL(name, mode) + pypy_dll = _ffi.CDLL(name, mode) else: - self._handle = _ffi.WinDLL(name, mode) - else: - self._handle = handle + pypy_dll = _ffi.WinDLL(name, mode) + self.__pypy_dll__ = pypy_dll + handle = int(pypy_dll) + self._handle = handle def __repr__(self): - return "<%s '%s', handle %r at 0x%x>" % ( - self.__class__.__name__, self._name, self._handle, - id(self) & (_sys.maxsize * 2 + 1)) + return "<%s '%s', handle %x at 0x%x>" % \ + (self.__class__.__name__, self._name, + (self._handle & (_sys.maxsize*2 + 1)), + id(self) & (_sys.maxsize*2 + 1)) def __getattr__(self, name): if name.startswith('__') and name.endswith('__'): From pypy.commits at gmail.com Wed Aug 23 13:05:08 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 23 Aug 2017 10:05:08 -0700 (PDT) Subject: [pypy-commit] pypy default: (fijal, arigo) Message-ID: <599db5c4.12badf0a.92cf.057b@mx.google.com> Author: Armin Rigo Branch: Changeset: r92237:8cff23e10359 Date: 2017-08-23 19:04 +0200 http://bitbucket.org/pypy/pypy/changeset/8cff23e10359/ Log: (fijal, arigo) Turn functions that do nothing into macros that do the same diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h --- a/pypy/module/cpyext/include/object.h +++ b/pypy/module/cpyext/include/object.h @@ -277,6 +277,15 @@ char dummy; } PyGC_Head; +/* dummy GC macros */ +#define _PyGC_FINALIZED(o) 1 +#define PyType_IS_GC(tp) 1 + +#define PyObject_GC_Track(o) do { } while(0) +#define PyObject_GC_UnTrack(o) do { } while(0) +#define _PyObject_GC_TRACK(o) do { } while(0) +#define _PyObject_GC_UNTRACK(o) do { } while(0) + /* Utility macro to help write tp_traverse functions. * To use this macro, the tp_traverse function must name its arguments * "visit" and "arg". This is intended to keep tp_traverse functions diff --git a/pypy/module/cpyext/object.py b/pypy/module/cpyext/object.py --- a/pypy/module/cpyext/object.py +++ b/pypy/module/cpyext/object.py @@ -76,31 +76,6 @@ def PyObject_GC_Del(space, obj): PyObject_Free(space, obj) - at cpython_api([rffi.VOIDP], lltype.Void) -def PyObject_GC_Track(space, op): - """Adds the object op to the set of container objects tracked by the - collector. The collector can run at unexpected times so objects must be - valid while being tracked. This should be called once all the fields - followed by the tp_traverse handler become valid, usually near the - end of the constructor.""" - pass - - at cpython_api([rffi.VOIDP], lltype.Void) -def PyObject_GC_UnTrack(space, op): - """Remove the object op from the set of container objects tracked by the - collector. Note that PyObject_GC_Track() can be called again on - this object to add it back to the set of tracked objects. The deallocator - (tp_dealloc handler) should call this for the object before any of - the fields used by the tp_traverse handler become invalid.""" - pass - - at cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL) -def PyType_IS_GC(space, o): - """Return true if the type object includes support for the cycle detector; this - tests the type flag Py_TPFLAGS_HAVE_GC. - """ - return False - @cpython_api([PyObject], PyObjectP, error=CANNOT_FAIL) def _PyObject_GetDictPtr(space, op): return lltype.nullptr(PyObjectP.TO) diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py --- a/pypy/module/cpyext/stubs.py +++ b/pypy/module/cpyext/stubs.py @@ -655,18 +655,6 @@ require changes in your code for properly supporting 64-bit systems.""" raise NotImplementedError - at cpython_api([PyObject], lltype.Void) -def _PyObject_GC_TRACK(space, op): - """A macro version of PyObject_GC_Track(). It should not be used for - extension modules.""" - raise NotImplementedError - - at cpython_api([PyObject], lltype.Void) -def _PyObject_GC_UNTRACK(space, op): - """A macro version of PyObject_GC_UnTrack(). It should not be used for - extension modules.""" - raise NotImplementedError - @cpython_api([PyFrameObject], PyObject) def PyGen_New(space, frame): """Create and return a new generator object based on the frame object. A From pypy.commits at gmail.com Wed Aug 23 13:06:36 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 23 Aug 2017 10:06:36 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <599db61c.8d6d1c0a.dc421.1e22@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r92238:89b8fc097575 Date: 2017-08-23 19:05 +0200 http://bitbucket.org/pypy/pypy/changeset/89b8fc097575/ Log: hg merge default diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h --- a/pypy/module/cpyext/include/object.h +++ b/pypy/module/cpyext/include/object.h @@ -269,6 +269,11 @@ #define _PyGC_FINALIZED(o) 1 #define PyType_IS_GC(tp) 1 +#define PyObject_GC_Track(o) do { } while(0) +#define PyObject_GC_UnTrack(o) do { } while(0) +#define _PyObject_GC_TRACK(o) do { } while(0) +#define _PyObject_GC_UNTRACK(o) do { } while(0) + /* Utility macro to help write tp_traverse functions. * To use this macro, the tp_traverse function must name its arguments * "visit" and "arg". This is intended to keep tp_traverse functions diff --git a/pypy/module/cpyext/object.py b/pypy/module/cpyext/object.py --- a/pypy/module/cpyext/object.py +++ b/pypy/module/cpyext/object.py @@ -80,24 +80,6 @@ def PyObject_GC_Del(space, obj): PyObject_Free(space, obj) - at cpython_api([rffi.VOIDP], lltype.Void) -def PyObject_GC_Track(space, op): - """Adds the object op to the set of container objects tracked by the - collector. The collector can run at unexpected times so objects must be - valid while being tracked. This should be called once all the fields - followed by the tp_traverse handler become valid, usually near the - end of the constructor.""" - pass - - at cpython_api([rffi.VOIDP], lltype.Void) -def PyObject_GC_UnTrack(space, op): - """Remove the object op from the set of container objects tracked by the - collector. Note that PyObject_GC_Track() can be called again on - this object to add it back to the set of tracked objects. The deallocator - (tp_dealloc handler) should call this for the object before any of - the fields used by the tp_traverse handler become invalid.""" - pass - @cpython_api([PyObject], PyObjectP, error=CANNOT_FAIL) def _PyObject_GetDictPtr(space, op): return lltype.nullptr(PyObjectP.TO) diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py --- a/pypy/module/cpyext/stubs.py +++ b/pypy/module/cpyext/stubs.py @@ -625,18 +625,6 @@ resized object or NULL on failure.""" raise NotImplementedError - at cpython_api([PyObject], lltype.Void) -def _PyObject_GC_TRACK(space, op): - """A macro version of PyObject_GC_Track(). It should not be used for - extension modules.""" - raise NotImplementedError - - at cpython_api([PyObject], lltype.Void) -def _PyObject_GC_UNTRACK(space, op): - """A macro version of PyObject_GC_UnTrack(). It should not be used for - extension modules.""" - raise NotImplementedError - @cpython_api([PyFrameObject], PyObject) def PyGen_New(space, frame): """Create and return a new generator object based on the frame object. A From pypy.commits at gmail.com Wed Aug 23 15:54:41 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 23 Aug 2017 12:54:41 -0700 (PDT) Subject: [pypy-commit] pypy default: test, fix for missing userslot tp_iter, tp_iternext, this time via PyObject_Call Message-ID: <599ddd81.85921c0a.d803d.14f3@mx.google.com> Author: Matti Picus Branch: Changeset: r92239:eb3baada82b7 Date: 2017-08-23 21:28 +0300 http://bitbucket.org/pypy/pypy/changeset/eb3baada82b7/ Log: test, fix for missing userslot tp_iter, tp_iternext, this time via PyObject_Call diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py --- a/pypy/module/cpyext/test/test_eval.py +++ b/pypy/module/cpyext/test/test_eval.py @@ -362,3 +362,56 @@ assert 'while calling recurse' in str(e) else: assert False, "expected RuntimeError" + + def test_build_class(self): + # make sure PyObject_Call generates a proper PyTypeObject, + # along the way verify that userslot has iter and next + module = self.import_extension('foo', [ + ("object_call", "METH_O", + ''' + return PyObject_Call((PyObject*)&PyType_Type, args, NULL); + '''), + ('iter', "METH_O", + ''' + if (NULL == args->ob_type->tp_iter) + { + PyErr_SetString(PyExc_TypeError, "NULL tp_iter"); + return NULL; + } + return args->ob_type->tp_iter(args); + '''), + ('next', "METH_O", + ''' + if (NULL == args->ob_type->tp_iternext) + { + PyErr_SetString(PyExc_TypeError, "NULL tp_iternext"); + return NULL; + } + return args->ob_type->tp_iternext(args); + '''),]) + def __init__(self, N): + self.N = N + self.i = 0 + + def __iter__(self): + return self + + def __next__(self): + if self.i < self.N: + i = self.i + self.i += 1 + return i + raise StopIteration + + d = {'__init__': __init__, '__iter__': __iter__, 'next': __next__, + '__next__': next} + C = module.object_call(('Iterable', (object,), d)) + c = C(5) + i = module.iter(c) + out = [] + try: + while 1: + out.append(module.next(i)) + except StopIteration: + pass + assert out == [0, 1, 2, 3, 4] diff --git a/pypy/module/cpyext/userslot.py b/pypy/module/cpyext/userslot.py --- a/pypy/module/cpyext/userslot.py +++ b/pypy/module/cpyext/userslot.py @@ -122,3 +122,11 @@ else: space.delete(w_self, w_obj) return 0 + + at slot_function([PyObject], PyObject) +def slot_tp_iter(space, w_self): + return space.iter(w_self) + + at slot_function([PyObject], PyObject) +def slot_tp_iternext(space, w_self): + return space.next(w_self) From pypy.commits at gmail.com Thu Aug 24 04:49:03 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 24 Aug 2017 01:49:03 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: Backed out changeset 3e38274ddd35 Message-ID: <599e92ff.491a1c0a.ebdea.2740@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r92240:7655999bf67f Date: 2017-08-24 10:48 +0200 http://bitbucket.org/pypy/pypy/changeset/7655999bf67f/ Log: Backed out changeset 3e38274ddd35 (fijal) diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -295,7 +295,7 @@ i += 1 bits |= ord(ch) if ch == '"': - if 1 or bits & 0x80: + if bits & 0x80: # the 8th bit is set, it's an utf8 strnig content_utf8 = self.getslice(start, i-1) content_unicode = unicodehelper.decode_utf8(self.space, content_utf8) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -20,7 +20,6 @@ from pypy.objspace.std.formatting import mod_format from pypy.objspace.std.stringmethods import StringMethods from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT -from pypy.objspace.std.sliceobject import unwrap_start_stop __all__ = ['W_UnicodeObject', 'wrapunicode', 'plain_str2unicode', 'encode_object', 'decode_object', 'unicode_from_object', @@ -76,13 +75,6 @@ uid = (base << IDTAG_SHIFT) | IDTAG_SPECIAL return space.newint(uid) - def _convert_idx_params_unicode(self, space, w_start, w_end): - """ Specialcase this for unicode - one less element in the tuple - """ - lenself = self._len() - start, end = unwrap_start_stop(space, lenself, w_start, w_end) - return start, end - def str_w(self, space): return space.text_w(space.str(self)) @@ -133,8 +125,8 @@ return rutf8.compute_length_utf8(self._utf8) def _val(self, space): - import pdb - pdb.set_trace() + #import pdb + #pdb.set_trace() return self._utf8.decode('utf8') @staticmethod @@ -454,6 +446,9 @@ i = rutf8.next_codepoint_pos(val, i) return space.newbool(cased) + def _starts_ends_overflow(self, prefix): + return len(prefix) == 0 + def descr_add(self, space, w_other): try: w_other = self.convert_arg_to_w_unicode(space, w_other) @@ -727,26 +722,6 @@ assert rpos >= lpos # annotator hint, don't remove return self._utf8_sliced(lpos, rpos, lgt) - def descr_startswith(self, space, w_prefix, w_start=None, w_end=None): - (start, end) = self._convert_idx_params_unicode(space, w_start, w_end) - if space.isinstance_w(w_prefix, space.w_tuple): - return self._startswith_tuple(space, w_prefix, start, end) - return space.newbool(self._startswith(space, w_prefix, start, end)) - - def _startswith_tuple(self, space, w_prefix, start, end): - for w_prefix in space.fixedview(w_prefix): - if self._startswith(space, w_prefix, start, end): - return space.w_True - return space.w_False - - def _startswith(self, space, w_prefix, start, end): - prefix = self.convert_arg_to_w_unicode(space, w_prefix)._utf8 - if start > self._len(): - return len(prefix) == 0 # bug-to-bug cpython compatibility - xxx - return startswith(self._utf8, prefix, start, end) - - def descr_getnewargs(self, space): return space.newtuple([W_UnicodeObject(self._utf8, self._length)]) From pypy.commits at gmail.com Thu Aug 24 05:11:12 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 24 Aug 2017 02:11:12 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: (fijal, arigo) Message-ID: <599e9830.a799df0a.9ed31.3fbd@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r92241:41e0c8d46641 Date: 2017-08-24 11:10 +0200 http://bitbucket.org/pypy/pypy/changeset/41e0c8d46641/ Log: (fijal, arigo) Fix diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -44,7 +44,8 @@ string, len(string), "strict", final=True, errorhandler=DecodeWrapper(decode_error_handler(space)).handle, unicodedata_handler=unicodedata_handler) - return result_u.encode('utf8'), len(result_u) + # XXX argh. we want each surrogate to be encoded separately + return ''.join([u.encode('utf8') for u in result_u]), len(result_u) def decode_raw_unicode_escape(space, string): # XXX pick better length, maybe @@ -52,7 +53,8 @@ result_u, consumed = runicode.str_decode_raw_unicode_escape( string, len(string), "strict", final=True, errorhandler=DecodeWrapper(decode_error_handler(space)).handle) - return result_u.encode('utf8'), len(result_u) + # XXX argh. we want each surrogate to be encoded separately + return ''.join([u.encode('utf8') for u in result_u]), len(result_u) def check_utf8(space, string): # Surrogates are accepted and not treated specially at all. From pypy.commits at gmail.com Thu Aug 24 05:44:54 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 24 Aug 2017 02:44:54 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: hg merge default Message-ID: <599ea016.05371c0a.c2fa0.a95e@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r92242:367afaf4ad3a Date: 2017-08-24 11:43 +0200 http://bitbucket.org/pypy/pypy/changeset/367afaf4ad3a/ Log: hg merge default Manual merges may go wrong diff too long, truncating to 2000 out of 88216 lines diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -1,6 +1,6 @@ syntax: glob *.py[co] -*.sw[po] +*.sw[pon] *~ .*.swp .idea @@ -8,6 +8,8 @@ .pydevproject __pycache__ +.cache/ +.gdb_history syntax: regexp ^testresult$ ^site-packages$ @@ -23,16 +25,17 @@ ^pypy/module/cpyext/test/.+\.manifest$ ^pypy/module/test_lib_pypy/ctypes_tests/.+\.o$ ^pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test\.o$ -^pypy/module/cppyy/src/.+\.o$ -^pypy/module/cppyy/bench/.+\.so$ -^pypy/module/cppyy/bench/.+\.root$ -^pypy/module/cppyy/bench/.+\.d$ -^pypy/module/cppyy/src/.+\.errors$ -^pypy/module/cppyy/test/.+_rflx\.cpp$ -^pypy/module/cppyy/test/.+\.so$ -^pypy/module/cppyy/test/.+\.rootmap$ -^pypy/module/cppyy/test/.+\.exe$ -^pypy/module/cppyy/test/.+_cint.h$ +^pypy/module/_cppyy/src/.+\.o$ +^pypy/module/_cppyy/bench/.+\.so$ +^pypy/module/_cppyy/bench/.+\.root$ +^pypy/module/_cppyy/bench/.+\.d$ +^pypy/module/_cppyy/src/.+\.errors$ +^pypy/module/_cppyy/test/.+_rflx\.cpp$ +^pypy/module/_cppyy/test/.+\.so$ +^pypy/module/_cppyy/test/.+\.rootmap$ +^pypy/module/_cppyy/test/.+\.exe$ +^pypy/module/_cppyy/test/.+_cint.h$ +^pypy/module/_cppyy/.+/*\.pcm$ ^pypy/module/test_lib_pypy/cffi_tests/__pycache__.+$ ^pypy/doc/.+\.html$ ^pypy/doc/config/.+\.rst$ @@ -49,6 +52,11 @@ ^rpython/translator/goal/target.+-c$ ^rpython/translator/goal/.+\.exe$ ^rpython/translator/goal/.+\.dll$ +^rpython/rlib/rvmprof/src/shared/libbacktrace/Makefile$ +^rpython/rlib/rvmprof/src/shared/libbacktrace/config.guess$ +^rpython/rlib/rvmprof/src/shared/libbacktrace/config.h$ +^rpython/rlib/rvmprof/src/shared/libbacktrace/config.log$ +^rpython/rlib/rvmprof/src/shared/libbacktrace/config.status$ ^pypy/goal/pypy-translation-snapshot$ ^pypy/goal/pypy-c ^pypy/goal/.+\.exe$ @@ -60,6 +68,9 @@ ^lib_pypy/ctypes_config_cache/_.+_cache\.py$ ^lib_pypy/ctypes_config_cache/_.+_.+_\.py$ ^lib_pypy/_libmpdec/.+.o$ +^lib_pypy/.+.c$ +^lib_pypy/.+.o$ +^lib_pypy/.+.so$ ^pypy/doc/discussion/.+\.html$ ^include/.+\.h$ ^include/.+\.inl$ @@ -74,8 +85,7 @@ ^rpython/doc/_build/.*$ ^compiled ^.git/ -^.hypothesis/ +.hypothesis/ ^release/ ^rpython/_cache$ -pypy/module/cppyy/.+/*\.pcm diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -34,3 +34,9 @@ 050d84dd78997f021acf0e133934275d63547cc0 release-pypy2.7-v5.4.1 0e2d9a73f5a1818d0245d75daccdbe21b2d5c3ef release-pypy2.7-v5.4.1 aff251e543859ce4508159dd9f1a82a2f553de00 release-pypy2.7-v5.6.0 +fa3249d55d15b9829e1be69cdf45b5a44cec902d release-pypy2.7-v5.7.0 +b16a4363e930f6401bceb499b9520955504c6cb0 release-pypy3.5-v5.7.0 +1aa2d8e03cdfab54b7121e93fda7e98ea88a30bf release-pypy2.7-v5.7.1 +2875f328eae2216a87f3d6f335092832eb031f56 release-pypy3.5-v5.7.1 +c925e73810367cd960a32592dd7f728f436c125c release-pypy2.7-v5.8.0 +a37ecfe5f142bc971a86d17305cc5d1d70abec64 release-pypy3.5-v5.8.0 diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -1,3 +1,5 @@ +#encoding utf-8 + License ======= @@ -37,14 +39,14 @@ Armin Rigo Maciej Fijalkowski - Carl Friedrich Bolz + Carl Friedrich Bolz-Tereick Amaury Forgeot d'Arc Antonio Cuni + Matti Picus Samuele Pedroni - Matti Picus + Ronan Lamy Alex Gaynor Philip Jenvey - Ronan Lamy Brian Kearns Richard Plangger Michael Hudson @@ -55,12 +57,12 @@ Hakan Ardo Benjamin Peterson Anders Chrigstrom + Wim Lavrijsen Eric van Riet Paap - Wim Lavrijsen Richard Emslie Alexander Schremmer + Remi Meier Dan Villiom Podlaski Christiansen - Remi Meier Lukas Diekmann Sven Hager Anders Lehmann @@ -83,8 +85,8 @@ Lawrence Oluyede Bartosz Skowron Daniel Roberts + Adrien Di Mascio Niko Matsakis - Adrien Di Mascio Alexander Hesse Ludovic Aubry Jacob Hallen @@ -99,278 +101,288 @@ Vincent Legoll Michael Foord Stephan Diehl + Stefano Rivera Stefan Schwarzer + Tomek Meka Valentino Volonghi - Tomek Meka - Stefano Rivera Patrick Maupin Devin Jeanpierre Bob Ippolito Bruno Gola David Malcolm Jean-Paul Calderone + Squeaky + Edd Barrett Timo Paulssen - Edd Barrett - Squeaky Marius Gedminas Alexandre Fayolle Simon Burton + Nicolas Truessel Martin Matusiak - Nicolas Truessel + Laurence Tratt + Wenzhu Man Konstantin Lopuhin - Wenzhu Man John Witulski - Laurence Tratt + Greg Price Ivan Sichmann Freitas - Greg Price Dario Bertini + Jeremy Thurgood Mark Pearse Simon Cross - Jeremy Thurgood + Tobias Pape Andreas Stührk - Tobias Pape Jean-Philippe St. Pierre Guido van Rossum Pavel Vinogradov Paweł Piotr Przeradowski + William Leslie + marky1991 + Ilya Osadchiy + Tobias Oberstein Paul deGrandis - Ilya Osadchiy - marky1991 - Tobias Oberstein + Boris Feigin + Taavi Burns Adrian Kuhn - Boris Feigin tav - Taavi Burns Georg Brandl Bert Freudenberg Stian Andreassen Wanja Saatkamp + Mike Blume + Joannah Nanjekye Gerald Klix - Mike Blume Oscar Nierstrasz + Rami Chowdhury Stefan H. Muller - Rami Chowdhury + Tim Felgentreff Eugene Oden + Jeff Terrace Henry Mason Vasily Kuznetsov Preston Timmons David Ripton - Jeff Terrace - Tim Felgentreff Dusty Phillips Lukas Renggli Guenter Jantzen - William Leslie + Jasper Schulz Ned Batchelder + Amit Regmi Anton Gulenko - Amit Regmi - Ben Young - Jasper Schulz + Sergey Matyunin + Andrew Chambers Nicolas Chauvat Andrew Durdin - Andrew Chambers - Sergey Matyunin + Ben Young Michael Schneider Nicholas Riley Jason Chu Igor Trindade Oliveira Yichao Yu + Michael Twomey Rocco Moretti Gintautas Miliauskas - Michael Twomey Lucian Branescu Mihaila anatoly techtonik + Dodan Mihai + Karl Bartel Gabriel Lavoie + Jared Grubb Olivier Dormond - Jared Grubb - Karl Bartel Wouter van Heyst + Sebastian Pawluś Brian Dorsey Victor Stinner Andrews Medina - Sebastian Pawluś - Stuart Williams - Daniel Patrick Aaron Iles Toby Watson + Daniel Patrick + Stuart Williams Antoine Pitrou Christian Hudon + Justas Sadzevicius + Neil Shepperd Michael Cheng - Justas Sadzevicius + Mikael Schönenberg + Stanislaw Halik + Berkin Ilbeyi Gasper Zejn - Neil Shepperd - Stanislaw Halik - Mikael Schönenberg - Berkin Ilbeyi Faye Zhao Elmo Mäntynen - Jonathan David Riehl Anders Qvist Corbin Simpson Chirag Jadwani + Jonathan David Riehl Beatrice During Alex Perry + p_zieschang at yahoo.de + Robert Zaremba + Alan McIntyre + Alexander Sedov Vaibhav Sood - Alan McIntyre Reuben Cummings - Alexander Sedov - p_zieschang at yahoo.de Attila Gobi + Alecsandru Patrascu Christopher Pope - Aaron Gallagher + Tristan Arthur + Christian Tismer + Dan Stromberg + Carl Meyer Florin Papa - Christian Tismer - Marc Abramowitz - Dan Stromberg - Arjun Naik + Jens-Uwe Mager Valentina Mukhamedzhanova Stefano Parmesan touilleMan + Marc Abramowitz + Arjun Naik + Aaron Gallagher Alexis Daboville - Jens-Uwe Mager - Carl Meyer + Pieter Zieschang Karl Ramm - Pieter Zieschang - Gabriel Lukas Vacek - Kunal Grover - Andrew Dalke + Omer Katz + Jacek Generowicz Sylvain Thenault Jakub Stasiak + Stefan Beyer + Andrew Dalke + Alejandro J. Cura + Vladimir Kryachko + Gabriel + Mark Williams + Kunal Grover Nathan Taylor - Vladimir Kryachko - Omer Katz - Mark Williams - Jacek Generowicz - Alejandro J. Cura + Travis Francis Athougies + Yasir Suhail + Sergey Kishchenko + Martin Blais + Lutz Paelike + Ian Foote + Philipp Rustemeuer + Catalin Gabriel Manciu Jacob Oscarson - Travis Francis Athougies Ryan Gonzalez - Ian Foote Kristjan Valur Jonsson + Lucio Torre + Richard Lancaster + Dan Buch + Lene Wagner + Tomo Cocoa David Lievens Neil Blakey-Milner - Lutz Paelike - Lucio Torre + Henrik Vendelbo Lars Wassermann - Philipp Rustemeuer - Henrik Vendelbo - Richard Lancaster - Yasir Suhail - Dan Buch + Ignas Mikalajunas + Christoph Gerum Miguel de Val Borro Artur Lisiecki - Sergey Kishchenko - Ignas Mikalajunas - Alecsandru Patrascu - Christoph Gerum - Martin Blais - Lene Wagner - Catalin Gabriel Manciu - Tomo Cocoa - Kim Jin Su - rafalgalczynski at gmail.com Toni Mattis - Amber Brown + Laurens Van Houtven + Bobby Impollonia + Roberto De Ioris + Jeong YunWon + Christopher Armstrong + Aaron Tubbs + Vasantha Ganesh K + Jason Michalski + Markus Holtermann + Andrew Thompson + Yusei Tahara + Ruochen Huang + Fabio Niephaus + Akira Li + Gustavo Niemeyer + Rafał Gałczyński + Logan Chien Lucas Stadler - Julian Berman - Markus Holtermann roberto at goyle + Matt Bogosian Yury V. Zaytsev - Anna Katrina Dominguez - Bobby Impollonia - Vasantha Ganesh K - Andrew Thompson florinpapa - Yusei Tahara - Aaron Tubbs - Ben Darnell - Roberto De Ioris - Logan Chien - Juan Francisco Cantero Hurtado - Ruochen Huang - Jeong YunWon - Godefroid Chappelle - Joshua Gilbert - Dan Colish - Christopher Armstrong - Michael Hudson-Doyle Anders Sigfridsson Nikolay Zinov - Jason Michalski + rafalgalczynski at gmail.com + Joshua Gilbert + Anna Katrina Dominguez + Kim Jin Su + Amber Brown + Nate Bragg + Ben Darnell + Juan Francisco Cantero Hurtado + Godefroid Chappelle + Julian Berman + Michael Hudson-Doyle Floris Bruynooghe - Laurens Van Houtven - Akira Li - Gustavo Niemeyer Stephan Busemann - Rafał Gałczyński - Matt Bogosian + Dan Colish timo - Christian Muirhead - Berker Peksag - James Lan Volodymyr Vladymyrov - shoma hosaka - Ben Mather - Niclas Olofsson - Matthew Miller - Rodrigo Araújo + Daniel Neuhäuser + Flavio Percoco halgari - Boglarka Vezer - Chris Pressey - Buck Golemon - Diana Popa - Konrad Delong - Dinu Gherman + Jim Baker Chris Lambacher coolbutuseless at gmail.com + Mike Bayer + Rodrigo Araújo Daniil Yarancev - Jim Baker + OlivierBlanvillain + Jonas Pfannschmidt + Zearin + Andrey Churin Dan Crosta - Nikolaos-Digenis Karagiannis - James Robert - Armin Ronacher - Brett Cannon - Donald Stufft - yrttyr - aliceinwire - OlivierBlanvillain - Dan Sanders - Zooko Wilcox-O Hearn + reubano at gmail.com + Julien Phalip + Roman Podoliaka + Eli Stevens + Boglarka Vezer + PavloKapyshin Tomer Chachamu Christopher Groskopf Asmo Soinio - jiaaro - Mads Kiilerich Antony Lee - Jason Madden - Daniel Neuh�user - reubano at gmail.com - Yaroslav Fedevych Jim Hunziker - Markus Unterwaditzer - Even Wiik Thomassen - jbs - squeaky - soareschen - Jonas Pfannschmidt - Kurt Griffiths - Mike Bayer - Stefan Marr - Flavio Percoco - Kristoffer Kleine + shoma hosaka + Buck Golemon + Iraklis D. + JohnDoe + yrttyr Michael Chermside Anna Ravencroft + remarkablerocket + Petre Vijiac + Berker Peksag + Christian Muirhead + soareschen + Matthew Miller + Konrad Delong + Dinu Gherman pizi - remarkablerocket - Andrey Churin - Zearin - Eli Stevens - Tobias Diaz - Julien Phalip - Roman Podoliaka + James Robert + Armin Ronacher + Diana Popa + Mads Kiilerich + Brett Cannon + aliceinwire + Zooko Wilcox-O Hearn + James Lan + jiaaro + Markus Unterwaditzer + Kristoffer Kleine + Graham Markall Dan Loewenherz werat + Niclas Olofsson + Chris Pressey + Tobias Diaz + Nikolaos-Digenis Karagiannis + Kurt Griffiths + Ben Mather + Donald Stufft + Dan Sanders + Jason Madden + Yaroslav Fedevych + Even Wiik Thomassen + Stefan Marr Heinrich-Heine University, Germany Open End AB (formerly AB Strakt), Sweden diff --git a/Makefile b/Makefile --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ RUNINTERP = $(PYPY_EXECUTABLE) endif -.PHONY: cffi_imports +.PHONY: pypy-c cffi_imports pypy-c: @echo @@ -32,7 +32,7 @@ @echo "====================================================================" @echo @sleep 5 - $(RUNINTERP) rpython/bin/rpython -Ojit pypy/goal/targetpypystandalone.py + cd pypy/goal && $(RUNINTERP) ../../rpython/bin/rpython -Ojit targetpypystandalone.py # Note: the -jN option, or MAKEFLAGS=-jN, are not usable. They are # replaced with an opaque --jobserver option by the time this Makefile @@ -40,4 +40,4 @@ # http://lists.gnu.org/archive/html/help-make/2010-08/msg00106.html cffi_imports: pypy-c - PYTHONPATH=. ./pypy-c pypy/tool/build_cffi_imports.py || /bin/true + PYTHONPATH=. pypy/goal/pypy-c pypy/tool/build_cffi_imports.py || /bin/true diff --git a/README.rst b/README.rst --- a/README.rst +++ b/README.rst @@ -27,14 +27,19 @@ Building ======== -build with: +First switch to or download the correct branch. The basic choices are +``default`` for Python 2.7 and, for Python 3.X, the corresponding py3.X +branch (e.g. ``py3.5``). + +Build with: .. code-block:: console $ rpython/bin/rpython -Ojit pypy/goal/targetpypystandalone.py -This ends up with ``pypy-c`` binary in the main pypy directory. We suggest -to use virtualenv with the resulting pypy-c as the interpreter; you can -find more details about various installation schemes here: +This ends up with a ``pypy-c`` or ``pypy3-c`` binary in the main pypy +directory. We suggest to use virtualenv with the resulting +pypy-c/pypy3-c as the interpreter; you can find more details about +various installation schemes here: http://doc.pypy.org/en/latest/install.html diff --git a/extra_tests/README.txt b/extra_tests/README.txt new file mode 100644 --- /dev/null +++ b/extra_tests/README.txt @@ -0,0 +1,5 @@ +The tests in this directory are a complement to lib-python/3/test/. + +They are meant to run on top of a compiled pypy3 or CPython3.5 in an +environment containing at least pytest and hypothesis, using a command like +'pytest extra_tests/'. diff --git a/extra_tests/pytest.ini b/extra_tests/pytest.ini new file mode 100644 diff --git a/extra_tests/test_unicode.py b/extra_tests/test_unicode.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_unicode.py @@ -0,0 +1,34 @@ +import pytest +from hypothesis import strategies as st +from hypothesis import given, settings, example + +from unicodedata import normalize + +# For every (n1, n2, n3) triple, applying n1 then n2 must be the same +# as applying n3. +# Reference: http://unicode.org/reports/tr15/#Design_Goals +compositions = [ + ('NFC', 'NFC', 'NFC'), + ('NFC', 'NFD', 'NFD'), + ('NFC', 'NFKC', 'NFKC'), + ('NFC', 'NFKD', 'NFKD'), + ('NFD', 'NFC', 'NFC'), + ('NFD', 'NFD', 'NFD'), + ('NFD', 'NFKC', 'NFKC'), + ('NFD', 'NFKD', 'NFKD'), + ('NFKC', 'NFC', 'NFKC'), + ('NFKC', 'NFD', 'NFKD'), + ('NFKC', 'NFKC', 'NFKC'), + ('NFKC', 'NFKD', 'NFKD'), + ('NFKD', 'NFC', 'NFKC'), + ('NFKD', 'NFD', 'NFKD'), + ('NFKD', 'NFKC', 'NFKC'), + ('NFKD', 'NFKD', 'NFKD'), +] + + at pytest.mark.parametrize('norm1, norm2, norm3', compositions) + at settings(max_examples=1000) + at example(s=u'---\uafb8\u11a7---') # issue 2289 + at given(s=st.text()) +def test_composition(s, norm1, norm2, norm3): + assert normalize(norm2, normalize(norm1, s)) == normalize(norm3, s) diff --git a/include/README b/include/README --- a/include/README +++ b/include/README @@ -1,7 +1,11 @@ This directory contains all the include files needed to build cpython extensions with PyPy. Note that these are just copies of the original headers -that are in pypy/module/cpyext/include: they are automatically copied from -there during translation. +that are in pypy/module/cpyext/{include,parse}: they are automatically copied +from there during translation. -Moreover, pypy_decl.h and pypy_macros.h are automatically generated, also -during translation. +Moreover, some pypy-specific files are automatically generated, also during +translation. Currently they are: +* pypy_decl.h +* pypy_macros.h +* pypy_numpy.h +* pypy_structmember_decl.h diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py --- a/lib-python/2.7/ctypes/__init__.py +++ b/lib-python/2.7/ctypes/__init__.py @@ -361,17 +361,20 @@ if handle is None: if flags & _FUNCFLAG_CDECL: - self._handle = _ffi.CDLL(name, mode) + pypy_dll = _ffi.CDLL(name, mode) else: - self._handle = _ffi.WinDLL(name, mode) - else: - self._handle = handle + pypy_dll = _ffi.WinDLL(name, mode) + self.__pypy_dll__ = pypy_dll + handle = int(pypy_dll) + if _sys.maxint > 2 ** 32: + handle = int(handle) # long -> int + self._handle = handle def __repr__(self): - return "<%s '%s', handle %r at 0x%x>" % ( - self.__class__.__name__, self._name, self._handle, - id(self) & (_sys.maxint * 2 + 1)) - + return "<%s '%s', handle %x at %x>" % \ + (self.__class__.__name__, self._name, + (self._handle & (_sys.maxint*2 + 1)), + id(self) & (_sys.maxint*2 + 1)) def __getattr__(self, name): if name.startswith('__') and name.endswith('__'): diff --git a/lib-python/2.7/ctypes/test/test_unaligned_structures.py b/lib-python/2.7/ctypes/test/test_unaligned_structures.py --- a/lib-python/2.7/ctypes/test/test_unaligned_structures.py +++ b/lib-python/2.7/ctypes/test/test_unaligned_structures.py @@ -37,7 +37,10 @@ for typ in byteswapped_structures: ## print >> sys.stderr, typ.value self.assertEqual(typ.value.offset, 1) - o = typ() + try: + o = typ() + except NotImplementedError as e: + self.skipTest(str(e)) # for PyPy o.value = 4 self.assertEqual(o.value, 4) diff --git a/lib-python/2.7/distutils/sysconfig_pypy.py b/lib-python/2.7/distutils/sysconfig_pypy.py --- a/lib-python/2.7/distutils/sysconfig_pypy.py +++ b/lib-python/2.7/distutils/sysconfig_pypy.py @@ -61,12 +61,12 @@ def _init_posix(): """Initialize the module as appropriate for POSIX systems.""" g = {} - g['CC'] = "gcc -pthread" - g['CXX'] = "g++ -pthread" + g['CC'] = "cc -pthread" + g['CXX'] = "c++ -pthread" g['OPT'] = "-DNDEBUG -O2" g['CFLAGS'] = "-DNDEBUG -O2" g['CCSHARED'] = "-fPIC" - g['LDSHARED'] = "gcc -pthread -shared" + g['LDSHARED'] = "cc -pthread -shared" g['SO'] = [s[0] for s in imp.get_suffixes() if s[2] == imp.C_EXTENSION][0] g['AR'] = "ar" g['ARFLAGS'] = "rc" @@ -218,6 +218,10 @@ compiler.shared_lib_extension = so_ext +def get_config_h_filename(): + """Returns the path of pyconfig.h.""" + inc_dir = get_python_inc(plat_specific=1) + return os.path.join(inc_dir, 'pyconfig.h') from sysconfig_cpython import ( parse_makefile, _variable_rx, expand_makefile_vars) diff --git a/lib-python/2.7/distutils/unixccompiler.py b/lib-python/2.7/distutils/unixccompiler.py --- a/lib-python/2.7/distutils/unixccompiler.py +++ b/lib-python/2.7/distutils/unixccompiler.py @@ -226,7 +226,19 @@ return "-L" + dir def _is_gcc(self, compiler_name): - return "gcc" in compiler_name or "g++" in compiler_name + # XXX PyPy workaround, look at the big comment below for more + # context. On CPython, the hack below works fine because + # `compiler_name` contains the name of the actual compiler which was + # used at compile time (e.g. 'x86_64-linux-gnu-gcc' on my machine). + # PyPy hardcodes it to 'cc', so the hack doesn't work, and the end + # result is that we pass the wrong option to the compiler. + # + # The workaround is to *always* pretend to be GCC if we are on Linux: + # this should cover the vast majority of real systems, including the + # ones which use clang (which understands the '-Wl,-rpath' syntax as + # well) + return (sys.platform == "linux2" or + "gcc" in compiler_name or "g++" in compiler_name) def runtime_library_dir_option(self, dir): # XXX Hackish, at the very least. See Python bug #445902: diff --git a/lib-python/2.7/sysconfig.py b/lib-python/2.7/sysconfig.py --- a/lib-python/2.7/sysconfig.py +++ b/lib-python/2.7/sysconfig.py @@ -29,8 +29,8 @@ 'pypy': { 'stdlib': '{base}/lib-{implementation_lower}/{py_version_short}', 'platstdlib': '{base}/lib-{implementation_lower}/{py_version_short}', - 'purelib': '{base}/lib-{implementation_lower}/{py_version_short}', - 'platlib': '{base}/lib-{implementation_lower}/{py_version_short}', + 'purelib': '{base}/site-packages', + 'platlib': '{base}/site-packages', 'include': '{base}/include', 'platinclude': '{base}/include', 'scripts': '{base}/bin', @@ -369,11 +369,8 @@ def _init_posix(vars): """Initialize the module as appropriate for POSIX systems.""" - # in cPython, _sysconfigdata is generated at build time, see _generate_posix_vars() - # in PyPy no such module exists - #from _sysconfigdata import build_time_vars - #vars.update(build_time_vars) - return + from _sysconfigdata import build_time_vars + vars.update(build_time_vars) def _init_non_posix(vars): """Initialize the module as appropriate for NT""" @@ -529,7 +526,9 @@ for suffix, mode, type_ in imp.get_suffixes(): if type_ == imp.C_EXTENSION: _CONFIG_VARS['SOABI'] = suffix.split('.')[1] - break + break + _CONFIG_VARS['INCLUDEPY'] = os.path.join(_CONFIG_VARS['prefix'], + 'include') if args: vals = [] diff --git a/lib-python/2.7/test/test_os.py b/lib-python/2.7/test/test_os.py --- a/lib-python/2.7/test/test_os.py +++ b/lib-python/2.7/test/test_os.py @@ -580,6 +580,7 @@ "getentropy() does not use a file descriptor") class URandomFDTests(unittest.TestCase): @unittest.skipUnless(resource, "test requires the resource module") + @test_support.impl_detail(pypy=False) # on Linux, may use getrandom() def test_urandom_failure(self): # Check urandom() failing when it is not able to open /dev/random. # We spawn a new process to make the test more robust (if getrlimit() diff --git a/lib-python/2.7/warnings.py b/lib-python/2.7/warnings.py --- a/lib-python/2.7/warnings.py +++ b/lib-python/2.7/warnings.py @@ -309,9 +309,12 @@ def __init__(self, message, category, filename, lineno, file=None, line=None): - local_values = locals() - for attr in self._WARNING_DETAILS: - setattr(self, attr, local_values[attr]) + self.message = message + self.category = category + self.filename = filename + self.lineno = lineno + self.file = file + self.line = line self._category_name = category.__name__ if category else None def __str__(self): diff --git a/lib-python/2.7/weakref.py b/lib-python/2.7/weakref.py --- a/lib-python/2.7/weakref.py +++ b/lib-python/2.7/weakref.py @@ -36,9 +36,9 @@ except ImportError: def _delitem_if_value_is(d, key, value): try: - if self.data[key] is value: # fall-back: there is a potential + if d[key] is value: # fall-back: there is a potential # race condition in multithreaded programs HERE - del self.data[key] + del d[key] except KeyError: pass diff --git a/lib-python/2.7/zipfile.py b/lib-python/2.7/zipfile.py --- a/lib-python/2.7/zipfile.py +++ b/lib-python/2.7/zipfile.py @@ -622,19 +622,23 @@ """Read and return up to n bytes. If the argument is omitted, None, or negative, data is read and returned until EOF is reached.. """ - buf = '' + # PyPy modification: don't do repeated string concatenation + buf = [] + lenbuf = 0 if n is None: n = -1 while True: if n < 0: data = self.read1(n) - elif n > len(buf): - data = self.read1(n - len(buf)) + elif n > lenbuf: + data = self.read1(n - lenbuf) else: - return buf + break if len(data) == 0: - return buf - buf += data + break + lenbuf += len(data) + buf.append(data) + return "".join(buf) def _update_crc(self, newdata, eof): # Update the CRC using the given data. diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py --- a/lib_pypy/_ctypes/array.py +++ b/lib_pypy/_ctypes/array.py @@ -76,17 +76,22 @@ return self._type_._alignmentofinstances() def _CData_output(self, resarray, base=None, index=-1): - # this seems to be a string if we're array of char, surprise! - from ctypes import c_char, c_wchar - if self._type_ is c_char: - return _rawffi.charp2string(resarray.buffer, self._length_) - if self._type_ is c_wchar: - return _rawffi.wcharp2unicode(resarray.buffer, self._length_) + from _rawffi.alt import types + # If a char_p or unichar_p is received, skip the string interpretation + if base._ffiargtype != types.Pointer(types.char_p) and \ + base._ffiargtype != types.Pointer(types.unichar_p): + # this seems to be a string if we're array of char, surprise! + from ctypes import c_char, c_wchar + if self._type_ is c_char: + return _rawffi.charp2string(resarray.buffer, self._length_) + if self._type_ is c_wchar: + return _rawffi.wcharp2unicode(resarray.buffer, self._length_) res = self.__new__(self) ffiarray = self._ffiarray.fromaddress(resarray.buffer, self._length_) res._buffer = ffiarray - res._base = base - res._index = index + if base is not None: + res._base = base + res._index = index return res def _CData_retval(self, resbuffer): diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -64,8 +64,9 @@ res = object.__new__(self) res.__class__ = self res.__dict__['_buffer'] = resbuffer - res.__dict__['_base'] = base - res.__dict__['_index'] = index + if base is not None: + res.__dict__['_base'] = base + res.__dict__['_index'] = index return res def _CData_retval(self, resbuffer): @@ -81,7 +82,7 @@ return False def in_dll(self, dll, name): - return self.from_address(dll._handle.getaddressindll(name)) + return self.from_address(dll.__pypy_dll__.getaddressindll(name)) def from_buffer(self, obj, offset=0): size = self._sizeofinstances() diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py --- a/lib_pypy/_ctypes/function.py +++ b/lib_pypy/_ctypes/function.py @@ -1,4 +1,3 @@ - from _ctypes.basics import _CData, _CDataMeta, cdata_from_address from _ctypes.primitive import SimpleType, _SimpleCData from _ctypes.basics import ArgumentError, keepalive_key @@ -9,13 +8,16 @@ import sys import traceback -try: from __pypy__ import builtinify -except ImportError: builtinify = lambda f: f + +try: + from __pypy__ import builtinify +except ImportError: + builtinify = lambda f: f # XXX this file needs huge refactoring I fear -PARAMFLAG_FIN = 0x1 -PARAMFLAG_FOUT = 0x2 +PARAMFLAG_FIN = 0x1 +PARAMFLAG_FOUT = 0x2 PARAMFLAG_FLCID = 0x4 PARAMFLAG_COMBINED = PARAMFLAG_FIN | PARAMFLAG_FOUT | PARAMFLAG_FLCID @@ -24,9 +26,9 @@ PARAMFLAG_FIN, PARAMFLAG_FIN | PARAMFLAG_FOUT, PARAMFLAG_FIN | PARAMFLAG_FLCID - ) +) -WIN64 = sys.platform == 'win32' and sys.maxint == 2**63 - 1 +WIN64 = sys.platform == 'win32' and sys.maxint == 2 ** 63 - 1 def get_com_error(errcode, riid, pIunk): @@ -35,6 +37,7 @@ from _ctypes import COMError return COMError(errcode, None, None) + @builtinify def call_function(func, args): "Only for debugging so far: So that we can call CFunction instances" @@ -94,14 +97,9 @@ "item %d in _argtypes_ has no from_param method" % ( i + 1,)) self._argtypes_ = list(argtypes) - self._check_argtypes_for_fastpath() + argtypes = property(_getargtypes, _setargtypes) - def _check_argtypes_for_fastpath(self): - if all([hasattr(argtype, '_ffiargshape_') for argtype in self._argtypes_]): - fastpath_cls = make_fastpath_subclass(self.__class__) - fastpath_cls.enable_fastpath_maybe(self) - def _getparamflags(self): return self._paramflags @@ -126,27 +124,26 @@ raise TypeError( "paramflags must be a sequence of (int [,string [,value]]) " "tuples" - ) + ) if not isinstance(flag, int): raise TypeError( "paramflags must be a sequence of (int [,string [,value]]) " "tuples" - ) + ) _flag = flag & PARAMFLAG_COMBINED if _flag == PARAMFLAG_FOUT: typ = self._argtypes_[idx] if getattr(typ, '_ffiargshape_', None) not in ('P', 'z', 'Z'): raise TypeError( "'out' parameter %d must be a pointer type, not %s" - % (idx+1, type(typ).__name__) - ) + % (idx + 1, type(typ).__name__) + ) elif _flag not in VALID_PARAMFLAGS: raise TypeError("paramflag value %d not supported" % flag) self._paramflags = paramflags paramflags = property(_getparamflags, _setparamflags) - def _getrestype(self): return self._restype_ @@ -156,7 +153,7 @@ from ctypes import c_int restype = c_int if not (isinstance(restype, _CDataMeta) or restype is None or - callable(restype)): + callable(restype)): raise TypeError("restype must be a type, a callable, or None") self._restype_ = restype @@ -168,15 +165,18 @@ def _geterrcheck(self): return getattr(self, '_errcheck_', None) + def _seterrcheck(self, errcheck): if not callable(errcheck): raise TypeError("The errcheck attribute must be callable") self._errcheck_ = errcheck + def _delerrcheck(self): try: del self._errcheck_ except AttributeError: pass + errcheck = property(_geterrcheck, _seterrcheck, _delerrcheck) def _ffishapes(self, args, restype): @@ -188,7 +188,7 @@ raise TypeError("invalid result type for callback function") restype = restype._ffiargshape_ else: - restype = 'O' # void + restype = 'O' # void return argtypes, restype def _set_address(self, address): @@ -201,7 +201,7 @@ def __init__(self, *args): self.name = None - self._objects = {keepalive_key(0):self} + self._objects = {keepalive_key(0): self} self._needs_free = True # Empty function object -- this is needed for casts @@ -222,10 +222,8 @@ if self._argtypes_ is None: self._argtypes_ = [] self._ptr = self._getfuncptr_fromaddress(self._argtypes_, restype) - self._check_argtypes_for_fastpath() return - # A callback into python if callable(argument) and not argsl: self.callable = argument @@ -259,7 +257,7 @@ if (sys.platform == 'win32' and isinstance(argument, (int, long)) and argsl): ffiargs, ffires = self._ffishapes(self._argtypes_, self._restype_) - self._com_index = argument + 0x1000 + self._com_index = argument + 0x1000 self.name = argsl.pop(0) if argsl: self.paramflags = argsl.pop(0) @@ -281,6 +279,7 @@ except SystemExit as e: handle_system_exit(e) raise + return f def __call__(self, *args, **kwargs): @@ -317,7 +316,7 @@ except: exc_info = sys.exc_info() traceback.print_tb(exc_info[2], file=sys.stderr) - print >>sys.stderr, "%s: %s" % (exc_info[0].__name__, exc_info[1]) + print >> sys.stderr, "%s: %s" % (exc_info[0].__name__, exc_info[1]) return 0 if self._restype_ is not None: return res @@ -328,7 +327,7 @@ # really slow". Now we don't worry that much about slowness # of ctypes, and it's strange to get warnings for perfectly- # legal code. - #warnings.warn('C function without declared arguments called', + # warnings.warn('C function without declared arguments called', # RuntimeWarning, stacklevel=2) argtypes = [] @@ -337,7 +336,7 @@ if not args: raise ValueError( "native COM method call without 'this' parameter" - ) + ) thisvalue = args[0] thisarg = cast(thisvalue, POINTER(POINTER(c_void_p))) keepalives, newargs, argtypes, outargs, errcheckargs = ( @@ -366,7 +365,6 @@ return tuple(outargs) def _call_funcptr(self, funcptr, *newargs): - if self._flags_ & _rawffi.FUNCFLAG_USE_ERRNO: tmp = _rawffi.get_errno() _rawffi.set_errno(get_errno()) @@ -431,8 +429,8 @@ ffiargs = [argtype.get_ffi_argtype() for argtype in argtypes] ffires = restype.get_ffi_argtype() return _ffi.FuncPtr.fromaddr(ptr, '', ffiargs, ffires, self._flags_) - - cdll = self.dll._handle + + cdll = self.dll.__pypy_dll__ try: ffi_argtypes = [argtype.get_ffi_argtype() for argtype in argtypes] ffi_restype = restype.get_ffi_argtype() @@ -450,7 +448,7 @@ # funcname -> _funcname@ # where n is 0, 4, 8, 12, ..., 128 for i in range(33): - mangled_name = "_%s@%d" % (self.name, i*4) + mangled_name = "_%s@%d" % (self.name, i * 4) try: return cdll.getfunc(mangled_name, ffi_argtypes, ffi_restype, @@ -492,7 +490,7 @@ for argtype, arg in zip(argtypes, args): param = argtype.from_param(arg) _type_ = getattr(argtype, '_type_', None) - if _type_ == 'P': # special-case for c_void_p + if _type_ == 'P': # special-case for c_void_p param = param._get_buffer_value() elif self._is_primitive(argtype): param = param.value @@ -668,69 +666,11 @@ self._needs_free = False -def make_fastpath_subclass(CFuncPtr): - if CFuncPtr._is_fastpath: - return CFuncPtr - # - try: - return make_fastpath_subclass.memo[CFuncPtr] - except KeyError: - pass - - class CFuncPtrFast(CFuncPtr): - - _is_fastpath = True - _slowpath_allowed = True # set to False by tests - - @classmethod - def enable_fastpath_maybe(cls, obj): - if (obj.callable is None and - obj._com_index is None): - obj.__class__ = cls - - def __rollback(self): - assert self._slowpath_allowed - self.__class__ = CFuncPtr - - # disable the fast path if we reset argtypes - def _setargtypes(self, argtypes): - self.__rollback() - self._setargtypes(argtypes) - argtypes = property(CFuncPtr._getargtypes, _setargtypes) - - def _setcallable(self, func): - self.__rollback() - self.callable = func - callable = property(lambda x: None, _setcallable) - - def _setcom_index(self, idx): - self.__rollback() - self._com_index = idx - _com_index = property(lambda x: None, _setcom_index) - - def __call__(self, *args): - thisarg = None - argtypes = self._argtypes_ - restype = self._restype_ - funcptr = self._getfuncptr(argtypes, restype, thisarg) - try: - result = self._call_funcptr(funcptr, *args) - result, _ = self._do_errcheck(result, args) - except (TypeError, ArgumentError, UnicodeDecodeError): - assert self._slowpath_allowed - return CFuncPtr.__call__(self, *args) - return result - - make_fastpath_subclass.memo[CFuncPtr] = CFuncPtrFast - return CFuncPtrFast -make_fastpath_subclass.memo = {} - - def handle_system_exit(e): # issue #1194: if we get SystemExit here, then exit the interpreter. # Highly obscure imho but some people seem to depend on it. if sys.flags.inspect: - return # Don't exit if -i flag was given. + return # Don't exit if -i flag was given. else: code = e.code if isinstance(code, int): diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -40,6 +40,22 @@ else: rawfields.append((f[0], f[1]._ffishape_)) + # hack for duplicate field names + already_seen = set() + names1 = names + names = [] + for f in names1: + if f not in already_seen: + names.append(f) + already_seen.add(f) + already_seen = set() + for i in reversed(range(len(rawfields))): + if rawfields[i][0] in already_seen: + rawfields[i] = (('$DUP%d$%s' % (i, rawfields[i][0]),) + + rawfields[i][1:]) + already_seen.add(rawfields[i][0]) + # /hack + _set_shape(self, rawfields, self._is_union) fields = {} @@ -234,6 +250,9 @@ if ('_abstract_' in cls.__dict__ or cls is Structure or cls is union.Union): raise TypeError("abstract class") + if hasattr(cls, '_swappedbytes_'): + raise NotImplementedError("missing in PyPy: structure/union with " + "swapped (non-native) byte ordering") if hasattr(cls, '_ffistruct_'): self.__dict__['_buffer'] = self._ffistruct_(autofree=True) return self diff --git a/lib_pypy/_curses.py b/lib_pypy/_curses.py --- a/lib_pypy/_curses.py +++ b/lib_pypy/_curses.py @@ -8,6 +8,9 @@ from _curses_cffi import ffi, lib +version = b"2.2" +__version__ = b"2.2" + def _copy_to_globals(name): globals()[name] = getattr(lib, name) @@ -60,10 +63,6 @@ _setup() -# Do we want this? -# version = "2.2" -# __version__ = "2.2" - # ____________________________________________________________ @@ -913,101 +912,29 @@ return None -# XXX: Do something about the following? -# /* Internal helper used for updating curses.LINES, curses.COLS, _curses.LINES -# * and _curses.COLS */ -# #if defined(HAVE_CURSES_RESIZETERM) || defined(HAVE_CURSES_RESIZE_TERM) -# static int -# update_lines_cols(void) -# { -# PyObject *o; -# PyObject *m = PyImport_ImportModuleNoBlock("curses"); +# Internal helper used for updating curses.LINES, curses.COLS, _curses.LINES +# and _curses.COLS +def update_lines_cols(): + globals()["LINES"] = lib.LINES + globals()["COLS"] = lib.COLS + try: + m = sys.modules["curses"] + m.LINES = lib.LINES + m.COLS = lib.COLS + except (KeyError, AttributeError): + pass -# if (!m) -# return 0; -# o = PyInt_FromLong(LINES); -# if (!o) { -# Py_DECREF(m); -# return 0; -# } -# if (PyObject_SetAttrString(m, "LINES", o)) { -# Py_DECREF(m); -# Py_DECREF(o); -# return 0; -# } -# if (PyDict_SetItemString(ModDict, "LINES", o)) { -# Py_DECREF(m); -# Py_DECREF(o); -# return 0; -# } -# Py_DECREF(o); -# o = PyInt_FromLong(COLS); -# if (!o) { -# Py_DECREF(m); -# return 0; -# } -# if (PyObject_SetAttrString(m, "COLS", o)) { -# Py_DECREF(m); -# Py_DECREF(o); -# return 0; -# } -# if (PyDict_SetItemString(ModDict, "COLS", o)) { -# Py_DECREF(m); -# Py_DECREF(o); -# return 0; -# } -# Py_DECREF(o); -# Py_DECREF(m); -# return 1; -# } -# #endif +def resizeterm(lines, columns): + _ensure_initialised() + _check_ERR(lib.resizeterm(lines, columns), "resizeterm") + update_lines_cols() -# #ifdef HAVE_CURSES_RESIZETERM -# static PyObject * -# PyCurses_ResizeTerm(PyObject *self, PyObject *args) -# { -# int lines; -# int columns; -# PyObject *result; -# PyCursesInitialised; - -# if (!PyArg_ParseTuple(args,"ii:resizeterm", &lines, &columns)) -# return NULL; - -# result = PyCursesCheckERR(resizeterm(lines, columns), "resizeterm"); -# if (!result) -# return NULL; -# if (!update_lines_cols()) -# return NULL; -# return result; -# } - -# #endif - -# #ifdef HAVE_CURSES_RESIZE_TERM -# static PyObject * -# PyCurses_Resize_Term(PyObject *self, PyObject *args) -# { -# int lines; -# int columns; - -# PyObject *result; - -# PyCursesInitialised; - -# if (!PyArg_ParseTuple(args,"ii:resize_term", &lines, &columns)) -# return NULL; - -# result = PyCursesCheckERR(resize_term(lines, columns), "resize_term"); -# if (!result) -# return NULL; -# if (!update_lines_cols()) -# return NULL; -# return result; -# } -# #endif /* HAVE_CURSES_RESIZE_TERM */ +def resize_term(lines, columns): + _ensure_initialised() + _check_ERR(lib.resize_term(lines, columns), "resize_term") + update_lines_cols() def setsyx(y, x): diff --git a/lib_pypy/_curses_build.py b/lib_pypy/_curses_build.py --- a/lib_pypy/_curses_build.py +++ b/lib_pypy/_curses_build.py @@ -87,6 +87,13 @@ static const chtype A_CHARTEXT; static const chtype A_COLOR; +static const chtype A_HORIZONTAL; +static const chtype A_LEFT; +static const chtype A_LOW; +static const chtype A_RIGHT; +static const chtype A_TOP; +static const chtype A_VERTICAL; + static const int BUTTON1_RELEASED; static const int BUTTON1_PRESSED; static const int BUTTON1_CLICKED; @@ -202,6 +209,8 @@ int resetty(void); int reset_prog_mode(void); int reset_shell_mode(void); +int resizeterm(int, int); +int resize_term(int, int); int savetty(void); int scroll(WINDOW *); int scrollok(WINDOW *, bool); diff --git a/lib_pypy/_pypy_winbase_build.py b/lib_pypy/_pypy_winbase_build.py --- a/lib_pypy/_pypy_winbase_build.py +++ b/lib_pypy/_pypy_winbase_build.py @@ -79,10 +79,20 @@ BOOL WINAPI CreateProcessA(char *, char *, void *, void *, BOOL, DWORD, char *, char *, LPSTARTUPINFO, LPPROCESS_INFORMATION); +BOOL WINAPI CreateProcessW(wchar_t *, wchar_t *, void *, + void *, BOOL, DWORD, wchar_t *, + wchar_t *, LPSTARTUPINFO, LPPROCESS_INFORMATION); DWORD WINAPI WaitForSingleObject(HANDLE, DWORD); BOOL WINAPI GetExitCodeProcess(HANDLE, LPDWORD); BOOL WINAPI TerminateProcess(HANDLE, UINT); HANDLE WINAPI GetStdHandle(DWORD); +DWORD WINAPI GetModuleFileNameW(HANDLE, wchar_t *, DWORD); + +UINT WINAPI SetErrorMode(UINT); +#define SEM_FAILCRITICALERRORS 0x0001 +#define SEM_NOGPFAULTERRORBOX 0x0002 +#define SEM_NOALIGNMENTFAULTEXCEPT 0x0004 +#define SEM_NOOPENFILEERRORBOX 0x8000 """) # -------------------- diff --git a/lib_pypy/_pypy_winbase_cffi.py b/lib_pypy/_pypy_winbase_cffi.py --- a/lib_pypy/_pypy_winbase_cffi.py +++ b/lib_pypy/_pypy_winbase_cffi.py @@ -3,8 +3,8 @@ ffi = _cffi_backend.FFI('_pypy_winbase_cffi', _version = 0x2601, - _types = b'\x00\x00\x01\x0D\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x01\x0D\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x01\x0D\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x09\x01\x00\x00\x00\x0F\x00\x00\x01\x0D\x00\x00\x19\x01\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x01\x0D\x00\x00\x00\x0F\x00\x00\x01\x0D\x00\x00\x50\x03\x00\x00\x13\x11\x00\x00\x53\x03\x00\x00\x15\x11\x00\x00\x07\x01\x00\x00\x0A\x01\x00\x00\x13\x11\x00\x00\x13\x11\x00\x00\x4F\x03\x00\x00\x4E\x03\x00\x00\x02\x0F\x00\x00\x01\x0D\x00\x00\x15\x03\x00\x00\x1F\x11\x00\x00\x15\x11\x00\x00\x0A\x01\x00\x00\x02\x0F\x00\x00\x01\x0D\x00\x00\x15\x11\x00\x00\x02\x0F\x00\x00\x01\x0D\x00\x00\x15\x11\x00\x00\x08\x01\x00\x00\x02\x0F\x00\x00\x01\x0D\x00\x00\x15\x11\x00\x00\x18\x03\x00\x00\x02\x0F\x00\x00\x01\x0D\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x1F\x11\x00\x00\x0A\x01\x00\x00\x07\x01\x00\x00\x0A\x01\x00\x00\x02\x0F\x00\x00\x0D\x0D\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x18\x0D\x00\x00\x15\x11\x00\x00\x0A\x01\x00\x00\x02\x0F\x00\x00\x18\x0D\x00\x00\x02\x0F\x00\x00\x42\x0D\x00\x00\x06\x01\x00\x00\x00\x0F\x00\x00\x42\x0D\x00\x00\x00\x0F\x00\x00\x42\x0D\x00\x00\x10\x01\x00\x00\x00\x0F\x00\x00\x15\x0D\x00\x00\x0A\x01\x00\x00\x02\x0F\x00\x00\x15\x0D\x00\x00\x02\x0F\x00\x00\x00\x09\x00\x00\x01\x09\x00\x00\x02\x01\x00\x00\x52\x03\x00\x00\x04\x01\x00\x00\x00\x01', - _globals = (b'\x00\x00\x24\x23CloseHandle',0,b'\x00\x00\x1E\x23CreatePipe',0,b'\x00\x00\x12\x23CreateProcessA',0,b'\x00\x00\x2F\x23DuplicateHandle',0,b'\x00\x00\x4C\x23GetCurrentProcess',0,b'\x00\x00\x2B\x23GetExitCodeProcess',0,b'\x00\x00\x49\x23GetStdHandle',0,b'\x00\x00\x3F\x23GetVersion',0,b'\x00\x00\x27\x23TerminateProcess',0,b'\x00\x00\x3B\x23WaitForSingleObject',0,b'\x00\x00\x38\x23_get_osfhandle',0,b'\x00\x00\x10\x23_getch',0,b'\x00\x00\x10\x23_getche',0,b'\x00\x00\x44\x23_getwch',0,b'\x00\x00\x44\x23_getwche',0,b'\x00\x00\x10\x23_kbhit',0,b'\x00\x00\x07\x23_locking',0,b'\x00\x00\x0C\x23_open_osfhandle',0,b'\x00\x00\x00\x23_putch',0,b'\x00\x00\x46\x23_putwch',0,b'\x00\x00\x03\x23_setmode',0,b'\x00\x00\x00\x23_ungetch',0,b'\x00\x00\x41\x23_ungetwch',0), - _struct_unions = ((b'\x00\x00\x00\x4E\x00\x00\x00\x02$PROCESS_INFORMATION',b'\x00\x00\x15\x11hProcess',b'\x00\x00\x15\x11hThread',b'\x00\x00\x18\x11dwProcessId',b'\x00\x00\x18\x11dwThreadId'),(b'\x00\x00\x00\x4F\x00\x00\x00\x02$STARTUPINFO',b'\x00\x00\x18\x11cb',b'\x00\x00\x13\x11lpReserved',b'\x00\x00\x13\x11lpDesktop',b'\x00\x00\x13\x11lpTitle',b'\x00\x00\x18\x11dwX',b'\x00\x00\x18\x11dwY',b'\x00\x00\x18\x11dwXSize',b'\x00\x00\x18\x11dwYSize',b'\x00\x00\x18\x11dwXCountChars',b'\x00\x00\x18\x11dwYCountChars',b'\x00\x00\x18\x11dwFillAttribute',b'\x00\x00\x18\x11dwFlags',b'\x00\x00\x42\x11wShowWindow',b'\x00\x00\x42\x11cbReserved2',b'\x00\x00\x51\x11lpReserved2',b'\x00\x00\x15\x11hStdInput',b'\x00\x00\x15\x11hStdOutput',b'\x00\x00\x15\x11hStdError')), - _typenames = (b'\x00\x00\x00\x1CLPPROCESS_INFORMATION',b'\x00\x00\x00\x1BLPSTARTUPINFO',b'\x00\x00\x00\x4EPROCESS_INFORMATION',b'\x00\x00\x00\x4FSTARTUPINFO',b'\x00\x00\x00\x42wint_t'), + _types = b'\x00\x00\x01\x0D\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x01\x0D\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x01\x0D\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x09\x01\x00\x00\x00\x0F\x00\x00\x01\x0D\x00\x00\x19\x01\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x01\x0D\x00\x00\x00\x0F\x00\x00\x01\x0D\x00\x00\x64\x03\x00\x00\x13\x11\x00\x00\x67\x03\x00\x00\x15\x11\x00\x00\x07\x01\x00\x00\x0A\x01\x00\x00\x13\x11\x00\x00\x13\x11\x00\x00\x63\x03\x00\x00\x62\x03\x00\x00\x02\x0F\x00\x00\x01\x0D\x00\x00\x15\x03\x00\x00\x1F\x11\x00\x00\x15\x11\x00\x00\x0A\x01\x00\x00\x02\x0F\x00\x00\x01\x0D\x00\x00\x15\x11\x00\x00\x02\x0F\x00\x00\x01\x0D\x00\x00\x15\x11\x00\x00\x08\x01\x00\x00\x02\x0F\x00\x00\x01\x0D\x00\x00\x15\x11\x00\x00\x18\x03\x00\x00\x02\x0F\x00\x00\x01\x0D\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x1F\x11\x00\x00\x0A\x01\x00\x00\x07\x01\x00\x00\x0A\x01\x00\x00\x02\x0F\x00\x00\x01\x0D\x00\x00\x5B\x03\x00\x00\x39\x11\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x07\x01\x00\x00\x0A\x01\x00\x00\x39\x11\x00\x00\x39\x11\x00\x00\x1B\x11\x00\x00\x1C\x11\x00\x00\x02\x0F\x00\x00\x0D\x0D\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x29\x0D\x00\x00\x08\x01\x00\x00\x02\x0F\x00\x00\x18\x0D\x00\x00\x15\x11\x00\x00\x0A\x01\x00\x00\x02\x0F\x00\x00\x18\x0D\x00\x00\x15\x11\x00\x00\x39\x11\x00\x00\x0A\x01\x00\x00\x02\x0F\x00\x00\x18\x0D\x00\x00\x02\x0F\x00\x00\x56\x0D\x00\x00\x06\x01\x00\x00\x00\x0F\x00\x00\x56\x0D\x00\x00\x00\x0F\x00\x00\x56\x0D\x00\x00\x10\x01\x00\x00\x00\x0F\x00\x00\x15\x0D\x00\x00\x0A\x01\x00\x00\x02\x0F\x00\x00\x15\x0D\x00\x00\x02\x0F\x00\x00\x00\x09\x00\x00\x01\x09\x00\x00\x02\x01\x00\x00\x66\x03\x00\x00\x04\x01\x00\x00\x00\x01', + _globals = (b'\x00\x00\x24\x23CloseHandle',0,b'\x00\x00\x1E\x23CreatePipe',0,b'\x00\x00\x12\x23CreateProcessA',0,b'\x00\x00\x38\x23CreateProcessW',0,b'\x00\x00\x2F\x23DuplicateHandle',0,b'\x00\x00\x60\x23GetCurrentProcess',0,b'\x00\x00\x2B\x23GetExitCodeProcess',0,b'\x00\x00\x4E\x23GetModuleFileNameW',0,b'\x00\x00\x5D\x23GetStdHandle',0,b'\x00\x00\x53\x23GetVersion',0,b'\xFF\xFF\xFF\x1FSEM_FAILCRITICALERRORS',1,b'\xFF\xFF\xFF\x1FSEM_NOALIGNMENTFAULTEXCEPT',4,b'\xFF\xFF\xFF\x1FSEM_NOGPFAULTERRORBOX',2,b'\xFF\xFF\xFF\x1FSEM_NOOPENFILEERRORBOX',32768,b'\x00\x00\x47\x23SetErrorMode',0,b'\x00\x00\x27\x23TerminateProcess',0,b'\x00\x00\x4A\x23WaitForSingleObject',0,b'\x00\x00\x44\x23_get_osfhandle',0,b'\x00\x00\x10\x23_getch',0,b'\x00\x00\x10\x23_getche',0,b'\x00\x00\x58\x23_getwch',0,b'\x00\x00\x58\x23_getwche',0,b'\x00\x00\x10\x23_kbhit',0,b'\x00\x00\x07\x23_locking',0,b'\x00\x00\x0C\x23_open_osfhandle',0,b'\x00\x00\x00\x23_putch',0,b'\x00\x00\x5A\x23_putwch',0,b'\x00\x00\x03\x23_setmode',0,b'\x00\x00\x00\x23_ungetch',0,b'\x00\x00\x55\x23_ungetwch',0), + _struct_unions = ((b'\x00\x00\x00\x62\x00\x00\x00\x02$PROCESS_INFORMATION',b'\x00\x00\x15\x11hProcess',b'\x00\x00\x15\x11hThread',b'\x00\x00\x18\x11dwProcessId',b'\x00\x00\x18\x11dwThreadId'),(b'\x00\x00\x00\x63\x00\x00\x00\x02$STARTUPINFO',b'\x00\x00\x18\x11cb',b'\x00\x00\x13\x11lpReserved',b'\x00\x00\x13\x11lpDesktop',b'\x00\x00\x13\x11lpTitle',b'\x00\x00\x18\x11dwX',b'\x00\x00\x18\x11dwY',b'\x00\x00\x18\x11dwXSize',b'\x00\x00\x18\x11dwYSize',b'\x00\x00\x18\x11dwXCountChars',b'\x00\x00\x18\x11dwYCountChars',b'\x00\x00\x18\x11dwFillAttribute',b'\x00\x00\x18\x11dwFlags',b'\x00\x00\x56\x11wShowWindow',b'\x00\x00\x56\x11cbReserved2',b'\x00\x00\x65\x11lpReserved2',b'\x00\x00\x15\x11hStdInput',b'\x00\x00\x15\x11hStdOutput',b'\x00\x00\x15\x11hStdError')), + _typenames = (b'\x00\x00\x00\x1CLPPROCESS_INFORMATION',b'\x00\x00\x00\x1BLPSTARTUPINFO',b'\x00\x00\x00\x62PROCESS_INFORMATION',b'\x00\x00\x00\x63STARTUPINFO',b'\x00\x00\x00\x56wint_t'), ) diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py --- a/lib_pypy/_sqlite3.py +++ b/lib_pypy/_sqlite3.py @@ -31,10 +31,11 @@ import weakref from threading import _get_ident as _thread_get_ident try: - from __pypy__ import newlist_hint + from __pypy__ import newlist_hint, add_memory_pressure except ImportError: assert '__pypy__' not in sys.builtin_module_names newlist_hint = lambda sizehint: [] + add_memory_pressure = lambda size: None if sys.version_info[0] >= 3: StandardError = Exception @@ -150,6 +151,9 @@ def connect(database, timeout=5.0, detect_types=0, isolation_level="", check_same_thread=True, factory=None, cached_statements=100): factory = Connection if not factory else factory + # an sqlite3 db seems to be around 100 KiB at least (doesn't matter if + # backed by :memory: or a file) + add_memory_pressure(100 * 1024) return factory(database, timeout, detect_types, isolation_level, check_same_thread, factory, cached_statements) diff --git a/lib_pypy/_sysconfigdata.py b/lib_pypy/_sysconfigdata.py new file mode 100644 --- /dev/null +++ b/lib_pypy/_sysconfigdata.py @@ -0,0 +1,5 @@ +import imp + +build_time_vars = { + "SO": [s[0] for s in imp.get_suffixes() if s[2] == imp.C_EXTENSION][0] +} diff --git a/lib_pypy/_tkinter/tklib_build.py b/lib_pypy/_tkinter/tklib_build.py --- a/lib_pypy/_tkinter/tklib_build.py +++ b/lib_pypy/_tkinter/tklib_build.py @@ -22,12 +22,27 @@ linklibs = ['tcl', 'tk'] libdirs = [] else: - for _ver in ['', '8.6', '8.5', '']: + # On some Linux distributions, the tcl and tk libraries are + # stored in /usr/include, so we must check this case also + libdirs = [] + found = False + for _ver in ['', '8.6', '8.5']: incdirs = ['/usr/include/tcl' + _ver] linklibs = ['tcl' + _ver, 'tk' + _ver] - libdirs = [] if os.path.isdir(incdirs[0]): + found = True break + if not found: + for _ver in ['8.6', '8.5', '']: + incdirs = [] + linklibs = ['tcl' + _ver, 'tk' + _ver] + if os.path.isfile(''.join(['/usr/lib/lib', linklibs[1], '.so'])): + found = True + break + if not found: + sys.stderr.write("*** TCL libraries not found! Falling back...\n") + incdirs = [] + linklibs = ['tcl', 'tk'] config_ffi = FFI() config_ffi.cdef(""" diff --git a/lib_pypy/cPickle.py b/lib_pypy/cPickle.py --- a/lib_pypy/cPickle.py +++ b/lib_pypy/cPickle.py @@ -116,10 +116,20 @@ @builtinify def dump(obj, file, protocol=None): + if protocol > HIGHEST_PROTOCOL: + # use cPickle error message, not pickle.py one + raise ValueError("pickle protocol %d asked for; " + "the highest available protocol is %d" % ( + protocol, HIGHEST_PROTOCOL)) Pickler(file, protocol).dump(obj) @builtinify def dumps(obj, protocol=None): + if protocol > HIGHEST_PROTOCOL: + # use cPickle error message, not pickle.py one + raise ValueError("pickle protocol %d asked for; " + "the highest available protocol is %d" % ( + protocol, HIGHEST_PROTOCOL)) file = StringIO() Pickler(file, protocol).dump(obj) return file.getvalue() @@ -431,7 +441,14 @@ self.append(obj) def find_class(self, module, name): - # Subclasses may override this + if self.find_global is None: + raise UnpicklingError( + "Global and instance pickles are not supported.") + return self.find_global(module, name) + + def find_global(self, module, name): + # This can officially be patched directly in the Unpickler + # instance, according to the docs __import__(module) mod = sys.modules[module] klass = getattr(mod, name) diff --git a/lib_pypy/cffi.egg-info/PKG-INFO b/lib_pypy/cffi.egg-info/PKG-INFO --- a/lib_pypy/cffi.egg-info/PKG-INFO +++ b/lib_pypy/cffi.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: cffi -Version: 1.10.0 +Version: 1.11.0 Summary: Foreign Function Interface for Python calling C code. Home-page: http://cffi.readthedocs.org Author: Armin Rigo, Maciej Fijalkowski diff --git a/lib_pypy/cffi/__init__.py b/lib_pypy/cffi/__init__.py --- a/lib_pypy/cffi/__init__.py +++ b/lib_pypy/cffi/__init__.py @@ -4,8 +4,8 @@ from .api import FFI from .error import CDefError, FFIError, VerificationError, VerificationMissing -__version__ = "1.10.0" -__version_info__ = (1, 10, 0) +__version__ = "1.11.0" +__version_info__ = (1, 11, 0) # The verifier module file names are based on the CRC32 of a string that # contains the following version number. It may be older than __version__ diff --git a/lib_pypy/cffi/_cffi_errors.h b/lib_pypy/cffi/_cffi_errors.h new file mode 100644 --- /dev/null +++ b/lib_pypy/cffi/_cffi_errors.h @@ -0,0 +1,145 @@ +#ifndef CFFI_MESSAGEBOX +# ifdef _MSC_VER +# define CFFI_MESSAGEBOX 1 +# else +# define CFFI_MESSAGEBOX 0 +# endif +#endif + + +#if CFFI_MESSAGEBOX +/* Windows only: logic to take the Python-CFFI embedding logic + initialization errors and display them in a background thread + with MessageBox. The idea is that if the whole program closes + as a result of this problem, then likely it is already a console + program and you can read the stderr output in the console too. + If it is not a console program, then it will likely show its own + dialog to complain, or generally not abruptly close, and for this + case the background thread should stay alive. +*/ +static void *volatile _cffi_bootstrap_text; + +static PyObject *_cffi_start_error_capture(void) +{ + PyObject *result = NULL; + PyObject *x, *m, *bi; + + if (InterlockedCompareExchangePointer(&_cffi_bootstrap_text, + (void *)1, NULL) != NULL) + return (PyObject *)1; + + m = PyImport_AddModule("_cffi_error_capture"); + if (m == NULL) + goto error; + + result = PyModule_GetDict(m); + if (result == NULL) + goto error; + +#if PY_MAJOR_VERSION >= 3 + bi = PyImport_ImportModule("builtins"); +#else + bi = PyImport_ImportModule("__builtin__"); +#endif + if (bi == NULL) + goto error; + PyDict_SetItemString(result, "__builtins__", bi); + Py_DECREF(bi); + + x = PyRun_String( + "import sys\n" + "class FileLike:\n" + " def write(self, x):\n" + " of.write(x)\n" + " self.buf += x\n" + "fl = FileLike()\n" + "fl.buf = ''\n" + "of = sys.stderr\n" + "sys.stderr = fl\n" + "def done():\n" + " sys.stderr = of\n" + " return fl.buf\n", /* make sure the returned value stays alive */ + Py_file_input, + result, result); + Py_XDECREF(x); + + error: + if (PyErr_Occurred()) + { + PyErr_WriteUnraisable(Py_None); + PyErr_Clear(); + } + return result; +} + +#pragma comment(lib, "user32.lib") + +static DWORD WINAPI _cffi_bootstrap_dialog(LPVOID ignored) +{ + Sleep(666); /* may be interrupted if the whole process is closing */ +#if PY_MAJOR_VERSION >= 3 + MessageBoxW(NULL, (wchar_t *)_cffi_bootstrap_text, + L"Python-CFFI error", + MB_OK | MB_ICONERROR); +#else + MessageBoxA(NULL, (char *)_cffi_bootstrap_text, + "Python-CFFI error", + MB_OK | MB_ICONERROR); +#endif + _cffi_bootstrap_text = NULL; + return 0; +} + +static void _cffi_stop_error_capture(PyObject *ecap) +{ + PyObject *s; + void *text; + + if (ecap == (PyObject *)1) + return; + + if (ecap == NULL) + goto error; + + s = PyRun_String("done()", Py_eval_input, ecap, ecap); + if (s == NULL) + goto error; + + /* Show a dialog box, but in a background thread, and + never show multiple dialog boxes at once. */ +#if PY_MAJOR_VERSION >= 3 + text = PyUnicode_AsWideCharString(s, NULL); +#else + text = PyString_AsString(s); +#endif + + _cffi_bootstrap_text = text; + + if (text != NULL) + { + HANDLE h; + h = CreateThread(NULL, 0, _cffi_bootstrap_dialog, + NULL, 0, NULL); + if (h != NULL) + CloseHandle(h); + } + /* decref the string, but it should stay alive as 'fl.buf' + in the small module above. It will really be freed only if + we later get another similar error. So it's a leak of at + most one copy of the small module. That's fine for this + situation which is usually a "fatal error" anyway. */ + Py_DECREF(s); + PyErr_Clear(); + return; + + error: + _cffi_bootstrap_text = NULL; + PyErr_Clear(); +} + +#else + +static PyObject *_cffi_start_error_capture(void) { return NULL; } +static void _cffi_stop_error_capture(PyObject *ecap) { } + +#endif diff --git a/lib_pypy/cffi/_cffi_include.h b/lib_pypy/cffi/_cffi_include.h --- a/lib_pypy/cffi/_cffi_include.h +++ b/lib_pypy/cffi/_cffi_include.h @@ -8,7 +8,7 @@ the same works for the other two macros. Py_DEBUG implies them, but not the other way around. */ -#ifndef _CFFI_USE_EMBEDDING +#if !defined(_CFFI_USE_EMBEDDING) && !defined(Py_LIMITED_API) # include # if !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) # define Py_LIMITED_API @@ -95,6 +95,7 @@ #define _cffi_from_c_ulong PyLong_FromUnsignedLong #define _cffi_from_c_longlong PyLong_FromLongLong #define _cffi_from_c_ulonglong PyLong_FromUnsignedLongLong +#define _cffi_from_c__Bool PyBool_FromLong #define _cffi_to_c_double PyFloat_AsDouble #define _cffi_to_c_float PyFloat_AsDouble @@ -159,9 +160,9 @@ #define _cffi_from_c_struct \ ((PyObject *(*)(char *, struct _cffi_ctypedescr *))_cffi_exports[18]) #define _cffi_to_c_wchar_t \ - ((wchar_t(*)(PyObject *))_cffi_exports[19]) + ((_cffi_wchar_t(*)(PyObject *))_cffi_exports[19]) #define _cffi_from_c_wchar_t \ - ((PyObject *(*)(wchar_t))_cffi_exports[20]) + ((PyObject *(*)(_cffi_wchar_t))_cffi_exports[20]) #define _cffi_to_c_long_double \ ((long double(*)(PyObject *))_cffi_exports[21]) #define _cffi_to_c__Bool \ @@ -174,7 +175,11 @@ #define _CFFI_CPIDX 25 #define _cffi_call_python \ ((void(*)(struct _cffi_externpy_s *, char *))_cffi_exports[_CFFI_CPIDX]) -#define _CFFI_NUM_EXPORTS 26 +#define _cffi_to_c_wchar3216_t \ + ((int(*)(PyObject *))_cffi_exports[26]) +#define _cffi_from_c_wchar3216_t \ + ((PyObject *(*)(int))_cffi_exports[27]) +#define _CFFI_NUM_EXPORTS 28 struct _cffi_ctypedescr; @@ -215,6 +220,46 @@ return NULL; } + +#ifdef HAVE_WCHAR_H +typedef wchar_t _cffi_wchar_t; +#else +typedef uint16_t _cffi_wchar_t; /* same random pick as _cffi_backend.c */ +#endif + +_CFFI_UNUSED_FN static uint16_t _cffi_to_c_char16_t(PyObject *o) +{ + if (sizeof(_cffi_wchar_t) == 2) + return (uint16_t)_cffi_to_c_wchar_t(o); + else + return (uint16_t)_cffi_to_c_wchar3216_t(o); +} + +_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char16_t(uint16_t x) +{ + if (sizeof(_cffi_wchar_t) == 2) + return _cffi_from_c_wchar_t(x); + else + return _cffi_from_c_wchar3216_t(x); +} + +_CFFI_UNUSED_FN static int _cffi_to_c_char32_t(PyObject *o) +{ + if (sizeof(_cffi_wchar_t) == 4) + return (int)_cffi_to_c_wchar_t(o); + else + return (int)_cffi_to_c_wchar3216_t(o); +} + +_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char32_t(int x) +{ + if (sizeof(_cffi_wchar_t) == 4) + return _cffi_from_c_wchar_t(x); + else + return _cffi_from_c_wchar3216_t(x); +} + + /********** end CPython-specific section **********/ #else _CFFI_UNUSED_FN diff --git a/lib_pypy/cffi/_embedding.h b/lib_pypy/cffi/_embedding.h --- a/lib_pypy/cffi/_embedding.h +++ b/lib_pypy/cffi/_embedding.h @@ -1,7 +1,12 @@ /***** Support code for embedding *****/ -#if defined(_MSC_VER) +#ifdef __cplusplus +extern "C" { +#endif + + +#if defined(_WIN32) # define CFFI_DLLEXPORT __declspec(dllexport) #elif defined(__GNUC__) # define CFFI_DLLEXPORT __attribute__((visibility("default"))) @@ -109,6 +114,8 @@ /********** CPython-specific section **********/ #ifndef PYPY_VERSION +#include "_cffi_errors.h" + #define _cffi_call_python_org _cffi_exports[_CFFI_CPIDX] @@ -220,8 +227,16 @@ /* Print as much information as potentially useful. Debugging load-time failures with embedding is not fun */ + PyObject *ecap; PyObject *exception, *v, *tb, *f, *modules, *mod; PyErr_Fetch(&exception, &v, &tb); + ecap = _cffi_start_error_capture(); + f = PySys_GetObject((char *)"stderr"); + if (f != NULL && f != Py_None) { + PyFile_WriteString( + "Failed to initialize the Python-CFFI embedding logic:\n\n", f); + } + if (exception != NULL) { PyErr_NormalizeException(&exception, &v, &tb); PyErr_Display(exception, v, tb); @@ -230,10 +245,9 @@ Py_XDECREF(v); Py_XDECREF(tb); - f = PySys_GetObject((char *)"stderr"); if (f != NULL && f != Py_None) { PyFile_WriteString("\nFrom: " _CFFI_MODULE_NAME - "\ncompiled with cffi version: 1.10.0" + "\ncompiled with cffi version: 1.11.0" "\n_cffi_backend module: ", f); modules = PyImport_GetModuleDict(); mod = PyDict_GetItemString(modules, "_cffi_backend"); @@ -249,6 +263,7 @@ PyFile_WriteObject(PySys_GetObject((char *)"path"), f, 0); PyFile_WriteString("\n\n", f); } + _cffi_stop_error_capture(ecap); } result = -1; goto done; @@ -515,3 +530,7 @@ #undef cffi_compare_and_swap #undef cffi_write_barrier #undef cffi_read_barrier + +#ifdef __cplusplus +} +#endif diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -75,9 +75,10 @@ self._init_once_cache = {} self._cdef_version = None self._embedding = None + self._typecache = model.get_typecache(backend) if hasattr(backend, 'set_ffi'): backend.set_ffi(self) - for name in backend.__dict__: + for name in list(backend.__dict__): if name.startswith('RTLD_'): setattr(self, name, getattr(backend, name)) # @@ -393,12 +394,17 @@ From pypy.commits at gmail.com Thu Aug 24 05:57:00 2017 From: pypy.commits at gmail.com (cfbolz) Date: Thu, 24 Aug 2017 02:57:00 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: improve allocation choice for coalesced variables Message-ID: <599ea2ec.4692df0a.f2949.d204@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: regalloc-playground Changeset: r92244:2e31b6e3f902 Date: 2017-08-24 10:19 +0200 http://bitbucket.org/pypy/pypy/changeset/2e31b6e3f902/ Log: improve allocation choice for coalesced variables diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -482,7 +482,7 @@ need_lower_byte=need_lower_byte) if loc: return loc - loc = self._spill_var(v, forbidden_vars, selected_reg, + loc = self._spill_var(forbidden_vars, selected_reg, need_lower_byte=need_lower_byte) prev_loc = self.reg_bindings.get(v, None) if prev_loc is not None: @@ -707,7 +707,7 @@ if reg not in self.save_around_call_regs] # chose which to spill using the usual spill heuristics while len(move_or_spill) > len(free_regs): - v = self._pick_variable_to_spill(None, [], vars=move_or_spill) + v = self._pick_variable_to_spill([], vars=move_or_spill) self._bc_spill(v, new_free_regs) move_or_spill.remove(v) assert len(move_or_spill) <= len(free_regs) @@ -807,6 +807,11 @@ assert op.numargs() == 1 return [self.loc(op.getarg(0))] + +# ____________________________________________________________ + + + UNDEF_POS = -42 class Lifetime(object): @@ -834,6 +839,11 @@ # the other lifetime will have this variable set to self.definition_pos self._definition_pos_shared = UNDEF_POS + def last_usage_including_sharing(self): + while self.share_with is not None: + self = self.share_with + return self.last_usage + def is_last_real_use_before(self, position): if self.real_usages is None: return True @@ -918,6 +928,9 @@ return index return sys.maxint + def __repr__(self): + return "%s: fixed at %s" % (self.register, self.index_lifetimes) + class LifetimeManager(object): def __init__(self, longevity): @@ -986,10 +999,10 @@ unfixed_reg = reg continue use_after = fixed_reg_pos.free_until_pos(position) - if use_after < longevityvar.last_usage: + if use_after < longevityvar.last_usage_including_sharing(): # can't fit continue - assert use_after >= longevityvar.last_usage + assert use_after >= longevityvar.last_usage_including_sharing() if use_after < min_fixed_use_after: best_reg = reg min_fixed_use_after = use_after diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -341,6 +341,21 @@ # r1 is picked, because b4 fits before b0 assert loc is r1 +def test_coalescing_non_fixed_regs(): + b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) + l0 = Lifetime(0, 10) + l1 = Lifetime(10, 20) + l2 = Lifetime(25, 40) + l3 = Lifetime(15, 40) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2, b3: l3}) + longevity.try_use_same_register(b0, b1) + longevity.fixed_register(35, r2, b2) + longevity.fixed_register(35, r3, b3) + + loc = longevity.try_pick_free_reg(0, b0, [r1, r2, r3]) + # r2 is picked, otherwise b1 can't end up in the same reg as b0 + assert loc is r2 + def test_chained_coalescing(): # 5 + b4 @@ -419,7 +434,7 @@ class XRegisterManager(RegisterManager): no_lower_byte_regs = [r2, r3] - + rm = XRegisterManager(longevity) rm.next_instruction() loc0 = rm.try_allocate_reg(b0, need_lower_byte=True) @@ -454,7 +469,7 @@ class XRegisterManager(RegisterManager): no_lower_byte_regs = [r2, r3] - + rm = XRegisterManager(longevity, frame_manager=fm, assembler=MockAsm()) @@ -649,7 +664,7 @@ rm.after_call(boxes[-1]) assert len(rm.reg_bindings) == 1 rm._check_invariants() - + def test_different_frame_width(self): class XRegisterManager(RegisterManager): From pypy.commits at gmail.com Thu Aug 24 05:57:02 2017 From: pypy.commits at gmail.com (cfbolz) Date: Thu, 24 Aug 2017 02:57:02 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: cleanups Message-ID: <599ea2ee.08e61c0a.e6ed1.7a78@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: regalloc-playground Changeset: r92245:f19f3e988faf Date: 2017-08-24 10:20 +0200 http://bitbucket.org/pypy/pypy/changeset/f19f3e988faf/ Log: cleanups diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -403,23 +403,18 @@ self.free_regs.remove(loc) return loc - def _spill_var(self, v, forbidden_vars, selected_reg, + def _spill_var(self, forbidden_vars, selected_reg, need_lower_byte=False): - v_to_spill = self._pick_variable_to_spill(v, forbidden_vars, + v_to_spill = self._pick_variable_to_spill(forbidden_vars, selected_reg, need_lower_byte=need_lower_byte) loc = self.reg_bindings[v_to_spill] + self.assembler.num_spills += 1 + self._sync_var(v_to_spill) del self.reg_bindings[v_to_spill] - self.assembler.num_spills += 1 - if self.frame_manager.get(v_to_spill) is None: - newloc = self.frame_manager.loc(v_to_spill) - self.assembler.regalloc_mov(loc, newloc) - else: - self.assembler.num_spills_to_existing += 1 return loc - def _pick_variable_to_spill(self, v, forbidden_vars, selected_reg=None, + def _pick_variable_to_spill(self, forbidden_vars, selected_reg=None, need_lower_byte=False, vars=None): - # YYY v is unused, remove # try to spill a variable that has no further real usages, ie that only # appears in failargs or in a jump @@ -529,7 +524,7 @@ if selected_reg in self.free_regs: self.assembler.regalloc_mov(immloc, selected_reg) return selected_reg - loc = self._spill_var(v, forbidden_vars, selected_reg) + loc = self._spill_var(forbidden_vars, selected_reg) self.free_regs.append(loc) self.assembler.regalloc_mov(immloc, loc) return loc @@ -578,12 +573,8 @@ self._check_type(result_v) self._check_type(v) if isinstance(v, Const): - if self.free_regs: - loc = self.free_regs.pop() - else: - loc = self._spill_var(v, forbidden_vars, None) + loc = self.force_allocate_reg(result_v, forbidden_vars) self.assembler.regalloc_mov(self.convert_to_imm(v), loc) - self.reg_bindings[result_v] = loc return loc if v not in self.reg_bindings: # v not in a register. allocate one for result_v and move v there @@ -605,11 +596,13 @@ return loc def _sync_var(self, v): + self.assembler.num_spills += 1 if not self.frame_manager.get(v): - self.assembler.num_moves_calls += 1 reg = self.reg_bindings[v] to = self.frame_manager.loc(v) self.assembler.regalloc_mov(reg, to) + else: + self.assembler.num_spills_to_existing += 1 # otherwise it's clean def _bc_spill(self, v, new_free_regs): @@ -1102,6 +1095,7 @@ return LifetimeManager(longevity) +# YYY unused? def is_comparison_or_ovf_op(opnum): return rop.is_comparison(opnum) or rop.is_ovf(opnum) diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -334,7 +334,7 @@ longevity.fixed_register(35, r2, b2) loc = longevity.try_pick_free_reg(0, b3, [r1, r2]) - # r2 is picked, otherwise b0 can't b0 can't end up in r1 + # r2 is picked, otherwise b0 can't end up in r1 assert loc is r2 loc = longevity.try_pick_free_reg(0, b4, [r1, r2]) @@ -1303,7 +1303,6 @@ i5 = escape_i() jump(i4, i5, descr=targettoken) ''' - self.targettoken._fake_arglocs = [r5, r6] emitted = self.allocate(ops) assert emitted == [ ('escape_i', r0, []), From pypy.commits at gmail.com Thu Aug 24 05:56:58 2017 From: pypy.commits at gmail.com (cfbolz) Date: Thu, 24 Aug 2017 02:56:58 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: oops Message-ID: <599ea2ea.85961c0a.140bf.68d5@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: regalloc-playground Changeset: r92243:0600554dc7ef Date: 2017-08-24 09:36 +0200 http://bitbucket.org/pypy/pypy/changeset/0600554dc7ef/ Log: oops diff --git a/pytest.ini b/pytest.ini --- a/pytest.ini +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = --assert=reinterp -rf diff --git a/rpython/pytest.ini b/rpython/pytest.ini --- a/rpython/pytest.ini +++ b/rpython/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = --assert=reinterp -rf From pypy.commits at gmail.com Thu Aug 24 05:57:04 2017 From: pypy.commits at gmail.com (cfbolz) Date: Thu, 24 Aug 2017 02:57:04 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: rename argument Message-ID: <599ea2f0.c6581c0a.5283d.668a@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: regalloc-playground Changeset: r92246:7a0afd3f4cdd Date: 2017-08-24 11:08 +0200 http://bitbucket.org/pypy/pypy/changeset/7a0afd3f4cdd/ Log: rename argument diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -414,7 +414,7 @@ return loc def _pick_variable_to_spill(self, forbidden_vars, selected_reg=None, - need_lower_byte=False, vars=None): + need_lower_byte=False, regs=None): # try to spill a variable that has no further real usages, ie that only # appears in failargs or in a jump @@ -422,15 +422,15 @@ # is the furthest away from the current position # YYY check for fixed variable usages - if vars is None: - vars = self.reg_bindings.keys() + if regs is None: + regs = self.reg_bindings.keys() cur_max_use_distance = -1 position = self.position candidate = None cur_max_age_failargs = -1 candidate_from_failargs = None - for next in vars: + for next in regs: reg = self.reg_bindings[next] if next in forbidden_vars: continue @@ -700,7 +700,7 @@ if reg not in self.save_around_call_regs] # chose which to spill using the usual spill heuristics while len(move_or_spill) > len(free_regs): - v = self._pick_variable_to_spill([], vars=move_or_spill) + v = self._pick_variable_to_spill([], regs=move_or_spill) self._bc_spill(v, new_free_regs) move_or_spill.remove(v) assert len(move_or_spill) <= len(free_regs) From pypy.commits at gmail.com Thu Aug 24 05:57:06 2017 From: pypy.commits at gmail.com (cfbolz) Date: Thu, 24 Aug 2017 02:57:06 -0700 (PDT) Subject: [pypy-commit] pypy regalloc-playground: move tests for force_result_in_regs to their own class, since there are so Message-ID: <599ea2f2.862e1c0a.a65c7.100b@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: regalloc-playground Changeset: r92247:52e02755ad6a Date: 2017-08-24 11:08 +0200 http://bitbucket.org/pypy/pypy/changeset/52e02755ad6a/ Log: move tests for force_result_in_regs to their own class, since there are so many cases (and I am about to add more) diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -507,72 +507,6 @@ assert isinstance(loc, FakeReg) rm._check_invariants() - def test_force_result_in_reg_1(self): - b0, b1 = newboxes(0, 0) - longevity = {b0: Lifetime(0, 1), b1: Lifetime(1, 3)} - fm = TFrameManager() - asm = MockAsm() - rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) - rm.next_instruction() - # first path, var is already in reg and dies - loc0 = rm.force_allocate_reg(b0) - rm._check_invariants() - rm.next_instruction() - loc = rm.force_result_in_reg(b1, b0) - assert loc is loc0 - assert len(asm.moves) == 0 - rm._check_invariants() - - def test_force_result_in_reg_2(self): - b0, b1 = newboxes(0, 0) - longevity = {b0: Lifetime(0, 2), b1: Lifetime(1, 3)} - fm = TFrameManager() - asm = MockAsm() - rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) - rm.next_instruction() - loc0 = rm.force_allocate_reg(b0) - rm._check_invariants() - rm.next_instruction() - loc = rm.force_result_in_reg(b1, b0) - assert loc is loc0 - assert rm.loc(b0) is not loc0 - assert len(asm.moves) == 1 - rm._check_invariants() - - def test_force_result_in_reg_3(self): - b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) - longevity = {b0: Lifetime(0, 2), b1: Lifetime(0, 2), - b3: Lifetime(0, 2), b2: Lifetime(0, 2), - b4: Lifetime(1, 3)} - fm = TFrameManager() - asm = MockAsm() - rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) - rm.next_instruction() - for b in b0, b1, b2, b3: - rm.force_allocate_reg(b) - assert not len(rm.free_regs) - rm._check_invariants() - rm.next_instruction() - rm.force_result_in_reg(b4, b0) - rm._check_invariants() - assert len(asm.moves) == 1 - - def test_force_result_in_reg_4(self): - b0, b1 = newboxes(0, 0) - longevity = {b0: Lifetime(0, 1), b1: Lifetime(0, 1)} - fm = TFrameManager() - asm = MockAsm() - rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) - rm.next_instruction() - fm.loc(b0) - rm.force_result_in_reg(b1, b0) - rm._check_invariants() - loc = rm.loc(b1) - assert isinstance(loc, FakeReg) - loc = rm.loc(b0) - assert isinstance(loc, FakeFramePos) - assert len(asm.moves) == 1 - def test_bogus_make_sure_var_in_reg(self): b0, = newboxes(0) longevity = {b0: Lifetime(0, 1)} @@ -602,17 +536,6 @@ assert len(rm.reg_bindings) == 4 rm._check_invariants() - def test_force_result_in_reg_const(self): - boxes, longevity = boxes_and_longevity(2) - fm = TFrameManager() - asm = MockAsm() - rm = RegisterManager(longevity, frame_manager=fm, - assembler=asm) - rm.next_instruction() - c = ConstInt(0) - rm.force_result_in_reg(boxes[0], c) - rm._check_invariants() - def test_loc_of_const(self): rm = RegisterManager({}) rm.next_instruction() @@ -935,6 +858,87 @@ for box in fm.bindings.keys(): fm.mark_as_free(box) + +class TestForceResultInReg(object): + # use it's own class since there are so many cases + + def test_force_result_in_reg_1(self): + b0, b1 = newboxes(0, 0) + longevity = {b0: Lifetime(0, 1), b1: Lifetime(1, 3)} + fm = TFrameManager() + asm = MockAsm() + rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) + rm.next_instruction() + # first path, var is already in reg and dies + loc0 = rm.force_allocate_reg(b0) + rm._check_invariants() + rm.next_instruction() + loc = rm.force_result_in_reg(b1, b0) + assert loc is loc0 + assert len(asm.moves) == 0 + rm._check_invariants() + + def test_force_result_in_reg_2(self): + b0, b1 = newboxes(0, 0) + longevity = {b0: Lifetime(0, 2), b1: Lifetime(1, 3)} + fm = TFrameManager() + asm = MockAsm() + rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) + rm.next_instruction() + loc0 = rm.force_allocate_reg(b0) + rm._check_invariants() + rm.next_instruction() + loc = rm.force_result_in_reg(b1, b0) + assert loc is loc0 + assert rm.loc(b0) is not loc0 + assert len(asm.moves) == 1 + rm._check_invariants() + + def test_force_result_in_reg_3(self): + b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) + longevity = {b0: Lifetime(0, 2), b1: Lifetime(0, 2), + b3: Lifetime(0, 2), b2: Lifetime(0, 2), + b4: Lifetime(1, 3)} + fm = TFrameManager() + asm = MockAsm() + rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) + rm.next_instruction() + for b in b0, b1, b2, b3: + rm.force_allocate_reg(b) + assert not len(rm.free_regs) + rm._check_invariants() + rm.next_instruction() + rm.force_result_in_reg(b4, b0) + rm._check_invariants() + assert len(asm.moves) == 1 + + def test_force_result_in_reg_4(self): + b0, b1 = newboxes(0, 0) + longevity = {b0: Lifetime(0, 1), b1: Lifetime(0, 1)} + fm = TFrameManager() + asm = MockAsm() + rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) + rm.next_instruction() + fm.loc(b0) + rm.force_result_in_reg(b1, b0) + rm._check_invariants() + loc = rm.loc(b1) + assert isinstance(loc, FakeReg) + loc = rm.loc(b0) + assert isinstance(loc, FakeFramePos) + assert len(asm.moves) == 1 + + def test_force_result_in_reg_const(self): + boxes, longevity = boxes_and_longevity(2) + fm = TFrameManager() + asm = MockAsm() + rm = RegisterManager(longevity, frame_manager=fm, + assembler=asm) + rm.next_instruction() + c = ConstInt(0) + rm.force_result_in_reg(boxes[0], c) + rm._check_invariants() + # _____________________________________________________ # tests that assign registers in a mocked way for a fake CPU @@ -1243,7 +1247,6 @@ ] def test_coalescing_first_var_already_in_different_reg(self): - py.test.skip("messy - later") ops = ''' [i0] i2 = int_mul(i0, 2) From pypy.commits at gmail.com Thu Aug 24 08:40:00 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 24 Aug 2017 05:40:00 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: (arigo, fijal climbing) Message-ID: <599ec920.e8aedf0a.28f79.7ee0@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r92248:bf0d9ddd4a6e Date: 2017-08-24 14:39 +0200 http://bitbucket.org/pypy/pypy/changeset/bf0d9ddd4a6e/ Log: (arigo, fijal climbing) Clean up rutf8.py diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -1,60 +1,70 @@ +""" This file is about supporting unicode strings in RPython, +represented by a byte string that is exactly the UTF-8 version +(for some definition of UTF-8). +This doesn't support Python 2's unicode characters beyond 0x10ffff, +which are theoretically possible to obtain using strange tricks like +the array or ctypes modules. + +Fun comes from surrogates. Various functions don't normally accept +any unicode character betwen 0xd800 and 0xdfff, but do if you give +the 'allow_surrogates = True' flag. +""" + +from rpython.rlib.objectmodel import enforceargs from rpython.rlib.rstring import StringBuilder -from rpython.rlib import runicode, jit +from rpython.rlib import jit +from rpython.rlib.rarithmetic import r_uint -def unichr_as_utf8(code): - """ Encode code (numeric value) as utf8 encoded string + +def unichr_as_utf8(code, allow_surrogates=False): + """Encode code (numeric value) as utf8 encoded string """ - if code < 0: - raise ValueError - lgt = 1 - if code >= runicode.MAXUNICODE: - lgt = 2 - if code < 0x80: + code = r_uint(code) + if code <= r_uint(0x7F): # Encode ASCII - return chr(code), 1 - if code < 0x0800: - # Encode Latin-1 - return chr((0xc0 | (code >> 6))) + chr((0x80 | (code & 0x3f))), lgt - if code < 0x10000: + return chr(code) + if code <= r_uint(0x07FF): + return chr((0xc0 | (code >> 6))) + chr((0x80 | (code & 0x3f))) + if code <= r_uint(0xFFFF): + if not allow_surrogates and 0xD800 <= code <= 0xDfff: + raise ValueError return (chr((0xe0 | (code >> 12))) + chr((0x80 | ((code >> 6) & 0x3f))) + - chr((0x80 | (code & 0x3f)))), lgt - if code < 0x10ffff: + chr((0x80 | (code & 0x3f)))) + if code <= r_uint(0x10FFFF): return (chr((0xf0 | (code >> 18))) + chr((0x80 | ((code >> 12) & 0x3f))) + chr((0x80 | ((code >> 6) & 0x3f))) + - chr((0x80 | (code & 0x3f)))), lgt + chr((0x80 | (code & 0x3f)))) raise ValueError -def unichr_as_utf8_append(builder, code): - """ Encode code (numeric value) as utf8 encoded string +def unichr_as_utf8_append(builder, code, allow_surrogates=False): + """Encode code (numeric value) as utf8 encoded string + and emit the result into the given StringBuilder. """ - if code < 0: - raise ValueError - lgt = 1 - if code >= runicode.MAXUNICODE: - lgt = 2 - if code < 0x80: + code = r_uint(code) + if code <= r_uint(0x7F): # Encode ASCII builder.append(chr(code)) - return 1 - if code < 0x0800: - # Encode Latin-1 + return + if code <= r_uint(0x07FF): builder.append(chr((0xc0 | (code >> 6)))) builder.append(chr((0x80 | (code & 0x3f)))) - return lgt - if code < 0x10000: + return + if code <= r_uint(0xFFFF): + if not allow_surrogates and 0xd800 <= code <= 0xdfff: + raise ValueError builder.append(chr((0xe0 | (code >> 12)))) builder.append(chr((0x80 | ((code >> 6) & 0x3f)))) builder.append(chr((0x80 | (code & 0x3f)))) - return lgt - if code < 0x10ffff: + return + if code <= r_uint(0x10FFFF): builder.append(chr((0xf0 | (code >> 18)))) builder.append(chr((0x80 | ((code >> 12) & 0x3f)))) builder.append(chr((0x80 | ((code >> 6) & 0x3f)))) builder.append(chr((0x80 | (code & 0x3f)))) - return lgt + return raise ValueError # note - table lookups are really slow. Measured on various elements of obama @@ -62,61 +72,64 @@ # In extreme cases (small, only chinese text), they're 40% slower def next_codepoint_pos(code, pos): - """ Gives the position of the next codepoint after pos, -1 - if it's the last one (assumes valid utf8) + """Gives the position of the next codepoint after pos. + Assumes valid utf8. 'pos' must be before the end of the string. """ chr1 = ord(code[pos]) - if chr1 < 0x80: + if chr1 <= 0x7F: return pos + 1 - if 0xC2 <= chr1 <= 0xDF: + if chr1 <= 0xDF: return pos + 2 - if chr1 >= 0xE0 and chr1 <= 0xEF: + if chr1 <= 0xEF: return pos + 3 return pos + 4 def prev_codepoint_pos(code, pos): - """ Gives the position of the previous codepoint + """Gives the position of the previous codepoint. + 'pos' must not be zero. """ pos -= 1 chr1 = ord(code[pos]) - if chr1 < 0x80: + if chr1 <= 0x7F: return pos - while ord(code[pos]) & 0xC0 == 0x80: - pos -= 1 + pos -= 1 + if ord(code[pos]) >= 0xC0: + return pos + pos -= 1 + if ord(code[pos]) >= 0xC0: + return pos + pos -= 1 return pos def compute_length_utf8(s): - pos = 0 - lgt = 0 - while pos < len(s): - pos = next_codepoint_pos(s, pos) - lgt += 1 - return lgt + continuation_bytes = 0 + for i in range(len(s)): + if 0x80 <= ord(s[i]) <= 0xBF: # count the continuation bytes + continuation_bytes += 1 + return len(s) - continuation_bytes def codepoint_at_pos(code, pos): """ Give a codepoint in code at pos - assumes valid utf8, no checking! """ ordch1 = ord(code[pos]) - if ordch1 < 0x80: + if ordch1 <= 0x7F: return ordch1 - n = ord(runicode._utf8_code_length[ordch1 - 0x80]) - if n == 2: - ordch2 = ord(code[pos+1]) + ordch2 = ord(code[pos+1]) + if ordch1 <= 0xDF: # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz return (((ordch1 & 0x1F) << 6) + # 0b00011111 (ordch2 & 0x3F)) # 0b00111111 - elif n == 3: - ordch2 = ord(code[pos+1]) - ordch3 = ord(code[pos+2]) + + ordch3 = ord(code[pos+2]) + if ordch1 <= 0xEF: # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz return (((ordch1 & 0x0F) << 12) + # 0b00001111 ((ordch2 & 0x3F) << 6) + # 0b00111111 (ordch3 & 0x3F)) # 0b00111111 - elif n == 4: - ordch2 = ord(code[pos+1]) - ordch3 = ord(code[pos+2]) - ordch4 = ord(code[pos+3]) + + ordch4 = ord(code[pos+3]) + if True: # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz return (((ordch1 & 0x07) << 18) + # 0b00000111 ((ordch2 & 0x3F) << 12) + # 0b00111111 @@ -124,46 +137,44 @@ (ordch4 & 0x3F)) # 0b00111111 assert False, "unreachable" -class AsciiCheckError(Exception): - def __init__(self, pos): - self.pos = pos +class CheckError(Exception): + pass -def check_ascii(s, size=-1): - if size == -1: - size = len(s) - for i in range(0, size): - if ord(s[i]) & 0x80: - raise AsciiCheckError(i) + at jit.elidable +def check_ascii(s): + for i in range(len(s)): + if ord(s[i]) > 0x7F: + raise CheckError -def utf8_encode_ascii(s, errors, encoding, msg, errorhandler): - res = StringBuilder(len(s)) - u_pos = 0 - pos = 0 - while pos < len(s): - chr1 = s[pos] - if ord(chr1) < 0x80: - res.append(chr1) - else: - repl, _, _, _ = errorhandler(errors, encoding, msg, s, u_pos, u_pos + 1) - res.append(repl) - u_pos += 1 - pos = next_codepoint_pos(s, pos) - return res.build() +#def utf8_encode_ascii(s, errors, encoding, msg, errorhandler): +# res = StringBuilder(len(s)) +# u_pos = 0 +# pos = 0 +# while pos < len(s): +# chr1 = s[pos] +# if ord(chr1) < 0x80: +# res.append(chr1) +# else: +# repl, _, _, _ = errorhandler(errors, encoding, msg, s, u_pos, u_pos + 1) +# res.append(repl) +# u_pos += 1 +# pos = next_codepoint_pos(s, pos) +# return res.build() -def str_decode_ascii(s, size, errors, errorhandler): - # ASCII is equivalent to the first 128 ordinals in Unicode. - result = StringBuilder(size) - pos = 0 - while pos < size: - c = s[pos] - if ord(c) < 128: - result.append(c) - else: - r, _, _ = errorhandler(errors, "ascii", "ordinal not in range(128)", - s, pos, pos + 1) - result.append(r) - pos += 1 - return result.build(), pos, -1 +#def str_decode_ascii(s, size, errors, errorhandler): +# # ASCII is equivalent to the first 128 ordinals in Unicode. +# result = StringBuilder(size) +# pos = 0 +# while pos < size: +# c = s[pos] +# if ord(c) < 128: +# result.append(c) +# else: +# r, _, _ = errorhandler(errors, "ascii", "ordinal not in range(128)", +# s, pos, pos + 1) +# result.append(r) +# pos += 1 +# return result.build(), pos, -1 def islinebreak(s, pos): chr1 = ord(s[pos]) @@ -217,149 +228,92 @@ return True return False -def utf8_in_chars(value, pos, chars): - """ equivalent of u'x' in u'xyz', just done in utf8 - """ - lgt = next_codepoint_pos(value, pos) - pos - i = 0 - while i < len(chars): - j = next_codepoint_pos(chars, i) - if j - i != lgt: - i = j - continue - for k in range(lgt): - if value[k + pos] != chars[i + k]: - break - else: - return True - i = j - return False -class Utf8CheckError(Exception): - def __init__(self, msg, startpos, endpos): - self.msg = msg - self.startpos = startpos - self.endpos = endpos +def _invalid_cont_byte(ordch): + return ordch>>6 != 0x2 # 0b10 + +_invalid_byte_2_of_2 = _invalid_cont_byte +_invalid_byte_3_of_3 = _invalid_cont_byte +_invalid_byte_3_of_4 = _invalid_cont_byte +_invalid_byte_4_of_4 = _invalid_cont_byte + + at enforceargs(allow_surrogates=bool) +def _invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates): + return (ordch2>>6 != 0x2 or # 0b10 + (ordch1 == 0xe0 and ordch2 < 0xa0) + # surrogates shouldn't be valid UTF-8! + or (ordch1 == 0xed and ordch2 > 0x9f and not allow_surrogates)) + +def _invalid_byte_2_of_4(ordch1, ordch2): + return (ordch2>>6 != 0x2 or # 0b10 + (ordch1 == 0xf0 and ordch2 < 0x90) or + (ordch1 == 0xf4 and ordch2 > 0x8f)) + @jit.elidable -def str_check_utf8(s, size, final=False, - allow_surrogates=runicode.allow_surrogate_by_default): - """ A simplified version of utf8 encoder - it only works with 'strict' - error handling. +def check_utf8(s, allow_surrogates=False): + """Check that 's' is a utf-8-encoded byte string. + Returns the length (number of chars) or raise CheckError. + Note that surrogates are not handled specially here. """ - # XXX do the following in a cleaner way, e.g. via signature - # NB. a bit messy because rtyper/rstr.py also calls the same - # function. Make sure we annotate for the args it passes, too - #if NonConstant(False): - # s = NonConstant('?????') - # size = NonConstant(12345) - # errors = NonConstant('strict') - # final = NonConstant(True) - # errorhandler = ll_unicode_error_decode - # allow_surrogates = NonConstant(True) - if size == 0: - return 0, 0 - pos = 0 - lgt = 0 - while pos < size: + continuation_bytes = 0 + while pos < len(s): ordch1 = ord(s[pos]) + pos += 1 # fast path for ASCII - # XXX maybe use a while loop here - if ordch1 < 0x80: - lgt += 1 - pos += 1 + if ordch1 <= 0x7F: continue - n = ord(runicode._utf8_code_length[ordch1 - 0x80]) - if pos + n > size: - if not final: - break - # argh, this obscure block of code is mostly a copy of - # what follows :-( - charsleft = size - pos - 1 # either 0, 1, 2 - # note: when we get the 'unexpected end of data' we need - # to care about the pos returned; it can be lower than size, - # in case we need to continue running this loop - if not charsleft: - # there's only the start byte and nothing else - raise Utf8CheckError('unexpected end of data', pos, pos + 1) - ordch2 = ord(s[pos+1]) - if n == 3: - # 3-bytes seq with only a continuation byte - if runicode._invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates): - # second byte invalid, take the first and continue - raise Utf8CheckError('invalid continuation byte', pos, - pos + 1) - else: - # second byte valid, but third byte missing - raise Utf8CheckError('unexpected end of data', pos, pos + 2) - elif n == 4: - # 4-bytes seq with 1 or 2 continuation bytes - if runicode._invalid_byte_2_of_4(ordch1, ordch2): - # second byte invalid, take the first and continue - raise Utf8CheckError('invalid continuation byte', pos, - pos + 1) - elif charsleft == 2 and runicode._invalid_byte_3_of_4(ord(s[pos+2])): - # third byte invalid, take the first two and continue - raise Utf8CheckError('invalid continuation byte', pos, - pos + 2) - else: - # there's only 1 or 2 valid cb, but the others are missing - raise Utf8CheckError('unexpected end of data', pos, - pos + charsleft + 1) - raise AssertionError("unreachable") + if ordch1 <= 0xC1: + raise CheckError - if n == 0: - raise Utf8CheckError('invalid start byte', pos, pos + 1) - elif n == 1: - assert 0, "ascii should have gone through the fast path" + if ordch1 <= 0xDF: + continuation_bytes += 1 + if pos >= len(s): + raise CheckError + ordch2 = ord(s[pos]) + pos += 1 - elif n == 2: - ordch2 = ord(s[pos+1]) - if runicode._invalid_byte_2_of_2(ordch2): - raise Utf8CheckError('invalid continuation byte', pos, - pos + 2) + if _invalid_byte_2_of_2(ordch2): + raise CheckError # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz - lgt += 1 + continue + + if ordch1 <= 0xEF: + continuation_bytes += 2 + if (pos + 2) > len(s): + raise CheckError + ordch2 = ord(s[pos]) + ordch3 = ord(s[pos + 1]) pos += 2 - elif n == 3: - ordch2 = ord(s[pos+1]) - ordch3 = ord(s[pos+2]) - if runicode._invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates): - raise Utf8CheckError('invalid continuation byte', pos, - pos + 1) - elif runicode._invalid_byte_3_of_3(ordch3): - raise Utf8CheckError('invalid continuation byte', pos, - pos + 2) + if _invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates): + raise CheckError + elif _invalid_byte_3_of_3(ordch3): + raise CheckError # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz - lgt += 1 + continue + + if ordch1 <= 0xF4: + continuation_bytes += 3 + if (pos + 3) > len(s): + raise CheckError + ordch2 = ord(s[pos]) + ordch3 = ord(s[pos + 1]) + ordch4 = ord(s[pos + 2]) pos += 3 - elif n == 4: - ordch2 = ord(s[pos+1]) - ordch3 = ord(s[pos+2]) - ordch4 = ord(s[pos+3]) - if runicode._invalid_byte_2_of_4(ordch1, ordch2): - raise Utf8CheckError('invalid continuation byte', pos, - pos + 1) - elif runicode._invalid_byte_3_of_4(ordch3): - raise Utf8CheckError('invalid continuation byte', pos, - pos + 2) - elif runicode._invalid_byte_4_of_4(ordch4): - raise Utf8CheckError('invalid continuation byte', pos, - pos + 3) + if _invalid_byte_2_of_4(ordch1, ordch2): + raise CheckError + elif _invalid_byte_3_of_4(ordch3): + raise CheckError + elif _invalid_byte_4_of_4(ordch4): + raise CheckError # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz - c = (((ordch1 & 0x07) << 18) + # 0b00000111 - ((ordch2 & 0x3F) << 12) + # 0b00111111 - ((ordch3 & 0x3F) << 6) + # 0b00111111 - (ordch4 & 0x3F)) # 0b00111111 - if c <= runicode.MAXUNICODE: - lgt += 1 - else: - # append the two surrogates: - lgt += 2 - pos += 4 + continue - return pos, lgt + raise CheckError + + assert pos == len(s) + return pos - continuation_bytes diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -1,14 +1,18 @@ - +import py import sys from hypothesis import given, strategies, settings, example from rpython.rlib import rutf8, runicode - at given(strategies.integers(min_value=0, max_value=runicode.MAXUNICODE)) -def test_unichr_as_utf8(i): - u, lgt = rutf8.unichr_as_utf8(i) - r = runicode.UNICHR(i) - assert u == r.encode('utf8') + + at given(strategies.characters(), strategies.booleans()) +def test_unichr_as_utf8(c, allow_surrogates): + i = ord(c) + if not allow_surrogates and 0xD800 <= i <= 0xDFFF: + py.test.raises(ValueError, rutf8.unichr_as_utf8, i, allow_surrogates) + else: + u = rutf8.unichr_as_utf8(i, allow_surrogates) + assert u == c.encode('utf8') @given(strategies.binary()) def test_check_ascii(s): @@ -19,28 +23,32 @@ raised = True try: rutf8.check_ascii(s) - except rutf8.AsciiCheckError as a: + except rutf8.CheckError: assert raised - assert a.pos == e.start else: assert not raised - at given(strategies.binary()) -def test_str_check_utf8(s): + at given(strategies.binary(), strategies.booleans()) +def test_check_utf8(s, allow_surrogates): + _test_check_utf8(s, allow_surrogates) + + at given(strategies.text(), strategies.booleans()) +def test_check_utf8_valid(u, allow_surrogates): + _test_check_utf8(u.encode('utf-8'), allow_surrogates) + +def _test_check_utf8(s, allow_surrogates): try: - u, _ = runicode.str_decode_utf_8(s, len(s), None, final=True) + u, _ = runicode.str_decode_utf_8(s, len(s), None, final=True, + allow_surrogates=allow_surrogates) valid = True except UnicodeDecodeError as e: valid = False try: - consumed, length = rutf8.str_check_utf8(s, len(s), final=True) - except rutf8.Utf8CheckError as a: + length = rutf8.check_utf8(s, allow_surrogates) + except rutf8.CheckError: assert not valid - assert a.startpos == e.start - # assert a.end == e.end, ideally else: assert valid - assert consumed == len(s) assert length == len(u) @given(strategies.characters()) @@ -80,5 +88,5 @@ response = True else: response = False - r = rutf8.utf8_in_chars(unichr(i).encode('utf8'), 0, uni.encode('utf8')) + r = unichr(i).encode('utf8') in uni.encode('utf8') assert r == response From pypy.commits at gmail.com Thu Aug 24 08:50:48 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 24 Aug 2017 05:50:48 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: (fijal, arigo) Message-ID: <599ecba8.db85df0a.c853d.e306@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r92249:c9a84142d1e3 Date: 2017-08-24 14:50 +0200 http://bitbucket.org/pypy/pypy/changeset/c9a84142d1e3/ Log: (fijal, arigo) Fix the gateway logic: we can now pass 'utf8' to get just a utf-8-encoded string diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py --- a/pypy/interpreter/gateway.py +++ b/pypy/interpreter/gateway.py @@ -160,6 +160,9 @@ def visit_text0(self, el, app_sig): self.checked_space_method(el, app_sig) + def visit_utf8(self, el, app_sig): + self.checked_space_method(el, app_sig) + def visit_fsencode(self, el, app_sig): self.checked_space_method(el, app_sig) @@ -244,7 +247,6 @@ def __init__(self): UnwrapSpecEmit.__init__(self) self.run_args = [] - self.extracode = [] def scopenext(self): return "scope_w[%d]" % self.succ() @@ -305,6 +307,9 @@ def visit_text0(self, typ): self.run_args.append("space.text0_w(%s)" % (self.scopenext(),)) + def visit_utf8(self, typ): + self.run_args.append("space.utf8_w(%s)" % (self.scopenext(),)) + def visit_fsencode(self, typ): self.run_args.append("space.fsencode_w(%s)" % (self.scopenext(),)) @@ -359,9 +364,8 @@ d = {} source = """if 1: def _run(self, space, scope_w): - %s return self.behavior(%s) - \n""" % ("\n".join(self.extracode), ', '.join(self.run_args)) + \n""" % (', '.join(self.run_args),) exec compile2(source) in self.miniglobals, d activation_cls = type("BuiltinActivation_UwS_%s" % label, @@ -402,7 +406,6 @@ UnwrapSpecEmit.__init__(self) self.args = [] self.unwrap = [] - self.extracode = [] self.finger = 0 def dispatch(self, el, *args): @@ -472,6 +475,9 @@ def visit_text0(self, typ): self.unwrap.append("space.text0_w(%s)" % (self.nextarg(),)) + def visit_utf8(self, typ): + self.unwrap.append("space.utf8_w(%s)" % (self.nextarg(),)) + def visit_fsencode(self, typ): self.unwrap.append("space.fsencode_w(%s)" % (self.nextarg(),)) @@ -526,10 +532,9 @@ unwrap_info.miniglobals['func'] = func source = """if 1: def fastfunc_%s_%d(%s): - %s return func(%s) \n""" % (func.__name__.replace('-', '_'), narg, - ', '.join(args), '\n'.join(unwrap_info.extracode), + ', '.join(args), ', '.join(unwrap_info.unwrap)) exec compile2(source) in unwrap_info.miniglobals, d fastfunc = d['fastfunc_%s_%d' % (func.__name__.replace('-', '_'), narg)] diff --git a/pypy/interpreter/test/test_gateway.py b/pypy/interpreter/test/test_gateway.py --- a/pypy/interpreter/test/test_gateway.py +++ b/pypy/interpreter/test/test_gateway.py @@ -538,8 +538,8 @@ def test_interp2app_unwrap_spec_utf8(self): space = self.space w = space.wrap - def g3_u(space, utf8, utf8len): - return space.newtuple([space.wrap(len(utf8)), space.wrap(utf8len)]) + def g3_u(space, utf8): + return space.wrap(utf8) app_g3_u = gateway.interp2app_temp(g3_u, unwrap_spec=[gateway.ObjSpace, 'utf8']) @@ -547,14 +547,20 @@ encoded = u"gęść".encode('utf8') assert self.space.eq_w( space.call_function(w_app_g3_u, w(u"gęść")), - space.newtuple([w(len(encoded)), w(4)])) + w(encoded)) assert self.space.eq_w( space.call_function(w_app_g3_u, w("foo")), - space.newtuple([w(3), w(3)])) + w("foo")) raises(gateway.OperationError, space.call_function, w_app_g3_u, w(None)) raises(gateway.OperationError, space.call_function, w_app_g3_u, w(42)) + w_ascii = space.appexec([], """(): + import sys + return sys.getdefaultencoding() == 'ascii'""") + if space.is_true(w_ascii): + raises(gateway.OperationError, space.call_function, w_app_g3_u, + w("\x80")) def test_interp2app_unwrap_spec_unwrapper(self): space = self.space diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -374,10 +374,10 @@ def make_encoder_wrapper(name): rname = "utf8_encode_%s" % (name.replace("_encode", ""), ) - XXX @unwrap_spec(utf8='utf8', errors='text_or_none') def wrap_encoder(space, utf8, utf8len, errors="strict"): from pypy.interpreter import unicodehelper + XXX if errors is None: errors = 'strict' diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -831,7 +831,8 @@ s = space.charbuf_w(w_obj) try: rutf8.check_ascii(s) - except rutf8.AsciiCheckError as e: + except rutf8.CheckError: + XXX unicodehelper.decode_error_handler(space)(None, 'ascii', "ordinal not in range(128)", s, e.pos, e.pos+1) assert False @@ -842,7 +843,8 @@ try: _, lgt = rutf8.str_check_utf8(s, len(s), final=True, allow_surrogates=True) - except rutf8.Utf8CheckError as e: + except rutf8.CheckError: + XXX eh(None, 'utf8', e.msg, s, e.startpos, e.endpos) assert False, "has to raise" return space.newutf8(s, lgt) From pypy.commits at gmail.com Thu Aug 24 08:55:20 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 24 Aug 2017 05:55:20 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: Fix Message-ID: <599eccb8.93b5df0a.b63c6.d03a@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r92250:86b689eb4f9f Date: 2017-08-24 14:54 +0200 http://bitbucket.org/pypy/pypy/changeset/86b689eb4f9f/ Log: Fix diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -831,8 +831,7 @@ s = space.charbuf_w(w_obj) try: rutf8.check_ascii(s) - except rutf8.CheckError: - XXX + except rutf8.CheckError as e: unicodehelper.decode_error_handler(space)(None, 'ascii', "ordinal not in range(128)", s, e.pos, e.pos+1) assert False diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -9,6 +9,10 @@ Fun comes from surrogates. Various functions don't normally accept any unicode character betwen 0xd800 and 0xdfff, but do if you give the 'allow_surrogates = True' flag. + +This is a minimal reference implementation. A lot of interpreters +need their own copy-pasted copy of some of the logic here, with +extra code in the middle for error handlers and so on. """ from rpython.rlib.objectmodel import enforceargs @@ -138,43 +142,14 @@ assert False, "unreachable" class CheckError(Exception): - pass + def __init__(self, pos): + self.pos = pos @jit.elidable def check_ascii(s): for i in range(len(s)): if ord(s[i]) > 0x7F: - raise CheckError - -#def utf8_encode_ascii(s, errors, encoding, msg, errorhandler): -# res = StringBuilder(len(s)) -# u_pos = 0 -# pos = 0 -# while pos < len(s): -# chr1 = s[pos] -# if ord(chr1) < 0x80: -# res.append(chr1) -# else: -# repl, _, _, _ = errorhandler(errors, encoding, msg, s, u_pos, u_pos + 1) -# res.append(repl) -# u_pos += 1 -# pos = next_codepoint_pos(s, pos) -# return res.build() - -#def str_decode_ascii(s, size, errors, errorhandler): -# # ASCII is equivalent to the first 128 ordinals in Unicode. -# result = StringBuilder(size) -# pos = 0 -# while pos < size: -# c = s[pos] -# if ord(c) < 128: -# result.append(c) -# else: -# r, _, _ = errorhandler(errors, "ascii", "ordinal not in range(128)", -# s, pos, pos + 1) -# result.append(r) -# pos += 1 -# return result.build(), pos, -1 + raise CheckError(i) def islinebreak(s, pos): chr1 = ord(s[pos]) @@ -266,54 +241,51 @@ continue if ordch1 <= 0xC1: - raise CheckError + raise CheckError(pos - 1) if ordch1 <= 0xDF: - continuation_bytes += 1 if pos >= len(s): - raise CheckError + raise CheckError(pos - 1) ordch2 = ord(s[pos]) pos += 1 if _invalid_byte_2_of_2(ordch2): - raise CheckError + raise CheckError(pos - 2) # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz + continuation_bytes += 1 continue if ordch1 <= 0xEF: - continuation_bytes += 2 if (pos + 2) > len(s): - raise CheckError + raise CheckError(pos - 1) ordch2 = ord(s[pos]) ordch3 = ord(s[pos + 1]) pos += 2 - if _invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates): - raise CheckError - elif _invalid_byte_3_of_3(ordch3): - raise CheckError + if (_invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates) or + _invalid_byte_3_of_3(ordch3)): + raise CheckError(pos - 3) # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz + continuation_bytes += 2 continue if ordch1 <= 0xF4: - continuation_bytes += 3 if (pos + 3) > len(s): - raise CheckError + raise CheckError(pos - 1) ordch2 = ord(s[pos]) ordch3 = ord(s[pos + 1]) ordch4 = ord(s[pos + 2]) pos += 3 - if _invalid_byte_2_of_4(ordch1, ordch2): - raise CheckError - elif _invalid_byte_3_of_4(ordch3): - raise CheckError - elif _invalid_byte_4_of_4(ordch4): - raise CheckError + if (_invalid_byte_2_of_4(ordch1, ordch2) or + _invalid_byte_3_of_4(ordch3) or + _invalid_byte_4_of_4(ordch4)): + raise CheckError(pos - 4) # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz + continuation_bytes += 3 continue - raise CheckError + raise CheckError(pos - 1) assert pos == len(s) return pos - continuation_bytes From pypy.commits at gmail.com Thu Aug 24 09:03:41 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 24 Aug 2017 06:03:41 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: Tweaks tweaks, test_unicodeobject starts to pass again Message-ID: <599ecead.0b99df0a.cbefd.7193@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r92251:d602bc94d49f Date: 2017-08-24 15:03 +0200 http://bitbucket.org/pypy/pypy/changeset/d602bc94d49f/ Log: Tweaks tweaks, test_unicodeobject starts to pass again diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -63,8 +63,9 @@ # you still get two surrogate unicode characters in the result. # These are the Python2 rules; Python3 differs. try: - consumed, length = rutf8.str_check_utf8(string, len(string), True) - except rutf8.Utf8CheckError as e: + length = rutf8.check_utf8(string, allow_surrogates=True) + except rutf8.CheckError as e: + XXX decode_error_handler(space)('strict', 'utf8', e.msg, string, e.startpos, e.endpos) raise False, "unreachable" diff --git a/pypy/module/__builtin__/operation.py b/pypy/module/__builtin__/operation.py --- a/pypy/module/__builtin__/operation.py +++ b/pypy/module/__builtin__/operation.py @@ -5,9 +5,8 @@ from pypy.interpreter import gateway from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import unwrap_spec, WrappedDefault -from rpython.rlib.rutf8 import unichr_as_utf8 from rpython.rlib.rfloat import isfinite, isinf, round_double, round_away -from rpython.rlib import rfloat +from rpython.rlib import rfloat, rutf8 import __builtin__ def abs(space, w_val): @@ -25,12 +24,11 @@ @unwrap_spec(code=int) def unichr(space, code): "Return a Unicode string of one character with the given ordinal." - # XXX this assumes unichr would be happy to return you surrogates try: - s, lgt = unichr_as_utf8(code) + s = rutf8.unichr_as_utf8(code, allow_surrogates=True) except ValueError: raise oefmt(space.w_ValueError, "unichr() arg out of range") - return space.newutf8(s, lgt) + return space.newutf8(s, 1) def len(space, w_obj): "len(object) -> integer\n\nReturn the number of items of a sequence or mapping." diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -34,12 +34,13 @@ @enforceargs(utf8str=str) def __init__(self, utf8str, length, ucs4str=None): assert isinstance(utf8str, str) + assert length >= 0 if ucs4str is not None: assert isinstance(ucs4str, unicode) self._utf8 = utf8str self._length = length self._ucs4 = ucs4str - if not we_are_translated() and length != -1: + if not we_are_translated(): assert rutf8.compute_length_utf8(utf8str) == length def __repr__(self): @@ -133,8 +134,8 @@ return W_UnicodeObject.EMPTY def _len(self): - if self._length == -1: - self._length = self._compute_length() + #if self._length == -1: + # self._length = self._compute_length() return self._length def _compute_length(self): @@ -902,7 +903,7 @@ s = space.bytes_w(w_bytes) try: rutf8.check_ascii(s) - except rutf8.AsciiCheckError: + except rutf8.CheckError: # raising UnicodeDecodeError is messy, "please crash for me" return unicode_from_encoded_object(space, w_bytes, "ascii", "strict") return W_UnicodeObject(s, len(s)) From pypy.commits at gmail.com Thu Aug 24 09:14:47 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 24 Aug 2017 06:14:47 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: Tweak unicode.splitlines() Message-ID: <599ed147.f688df0a.2d174.28f3@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r92252:2a8ae058f62e Date: 2017-08-24 15:14 +0200 http://bitbucket.org/pypy/pypy/changeset/2a8ae058f62e/ Log: Tweak unicode.splitlines() diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -522,17 +522,18 @@ lgt += 1 eol = pos if pos < length: - pos = rutf8.next_codepoint_pos(value, pos) - # read CRLF as one line break - if pos < length and value[eol] == '\r' and value[pos] == '\n': - pos += 1 + # read CRLF as one line break + if (value[pos] == '\r' and pos + 1 < length + and value[pos + 1] == '\n'): + pos += 2 + line_end_chars = 2 + else: + pos = rutf8.next_codepoint_pos(value, pos) + line_end_chars = 1 if keepends: - lgt += 1 - if keepends: - eol = pos - lgt += 1 - # XXX find out why lgt calculation is off - strs_w.append(W_UnicodeObject(value[sol:eol], -1)) + eol = pos + lgt += line_end_chars + strs_w.append(W_UnicodeObject(value[sol:eol], lgt)) return space.newlist(strs_w) @unwrap_spec(width=int) From pypy.commits at gmail.com Thu Aug 24 09:20:30 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 24 Aug 2017 06:20:30 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: Fixes Message-ID: <599ed29e.50131c0a.a6220.449a@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r92253:07a16357501d Date: 2017-08-24 15:19 +0200 http://bitbucket.org/pypy/pypy/changeset/07a16357501d/ Log: Fixes diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -375,7 +375,7 @@ def make_encoder_wrapper(name): rname = "utf8_encode_%s" % (name.replace("_encode", ""), ) @unwrap_spec(utf8='utf8', errors='text_or_none') - def wrap_encoder(space, utf8, utf8len, errors="strict"): + def wrap_encoder(space, utf8, errors="strict"): from pypy.interpreter import unicodehelper XXX @@ -446,7 +446,8 @@ # utf-8 functions are not regular, because we have to pass # "allow_surrogates=True" @unwrap_spec(utf8='utf8', errors='text_or_none') -def utf_8_encode(space, utf8, utf8len, errors="strict"): +def utf_8_encode(space, utf8, errors="strict"): + XXXX return space.newtuple([space.newbytes(utf8), space.newint(utf8len)]) #@unwrap_spec(uni=unicode, errors='text_or_none') #def utf_8_encode(space, uni, errors="strict"): @@ -472,29 +473,17 @@ state = space.fromcache(CodecState) # call the fast version for checking try: - consumed, lgt = rutf8.str_check_utf8(string, len(string), final) - except rutf8.Utf8CheckError as e: - if errors == 'strict': - # just raise - state.decode_error_handler(errors, 'utf8', e.msg, string, - e.startpos, e.endpos) - assert False, "raises" - # XXX do the way aroun runicode - we can optimize it later if we + lgt = rutf8.check_utf8(string) + except rutf8.CheckError as e: + # XXX do the way around runicode - we can optimize it later if we # decide we care about obscure cases res, consumed, lgt = unicodehelper.str_decode_utf8(string, len(string), errors, final, state.decode_error_handler) return space.newtuple([space.newutf8(res, lgt), - space.newint(consumed)]) - #result, consumed = runicode.str_decode_utf_8_impl( - # string, len(string), errors, - # final, state.decode_error_handler, - # allow_surrogates=True) - if final or consumed == len(string): + space.newint(consumed)]) + else: return space.newtuple([space.newutf8(string, lgt), - space.newint(consumed)]) - - return space.newtuple([space.newutf8(string[:consumed], lgt), - space.newint(consumed)]) + space.newint(len(string))]) @unwrap_spec(data='bufferstr', errors='text_or_none', byteorder=int, w_final=WrappedDefault(False)) @@ -639,8 +628,9 @@ return space.newtuple([space.newunicode(result), space.newint(consumed)]) @unwrap_spec(utf8='utf8', errors='text_or_none') -def charmap_encode(space, utf8, utf8len, errors="strict", w_mapping=None): +def charmap_encode(space, utf8, errors="strict", w_mapping=None): from pypy.interpreter.unicodehelper import EncodeWrapper + XXXXX if errors is None: errors = 'strict' @@ -658,8 +648,9 @@ @unwrap_spec(chars='utf8') -def charmap_build(space, chars, charslen): +def charmap_build(space, chars): # XXX CPython sometimes uses a three-level trie + XXXXXX w_charmap = space.newdict() pos = 0 num = 0 From pypy.commits at gmail.com Thu Aug 24 11:01:38 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 24 Aug 2017 08:01:38 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: (fijal, arigo) Message-ID: <599eea52.830a1c0a.ebb7.78c6@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r92254:d4bde635e3a9 Date: 2017-08-24 17:00 +0200 http://bitbucket.org/pypy/pypy/changeset/d4bde635e3a9/ Log: (fijal, arigo) General progress diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -9,7 +9,7 @@ from rpython.rlib.debug import make_sure_not_resized from rpython.rlib.rarithmetic import base_int, widen, is_valid_int from rpython.rlib.objectmodel import import_from_mixin, enforceargs, not_rpython -from rpython.rlib import jit +from rpython.rlib import jit, rutf8 # Object imports from pypy.objspace.std.basestringtype import basestring_typedef @@ -312,11 +312,12 @@ return self.newlist(list_u) return W_ListObject.newlist_unicode(self, list_u) - def newlist_from_unicode(self, lst): + def newlist_utf8(self, lst): res_w = [] - for u in lst: - assert u is not None - res_w.append(self.newutf8(u, -1)) + for utf in lst: + assert utf is not None + assert isinstance(utf, str) + res_w.append(self.newutf8(utf, rutf8.check_utf8(utf))) return self.newlist(res_w) def newlist_int(self, list_i): diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -315,6 +315,16 @@ assert u'one!two!three!'.replace('x', '@') == u'one!two!three!' assert u'one!two!three!'.replace(u'x', '@', 2) == u'one!two!three!' assert u'abc'.replace('', u'-') == u'-a-b-c-' + assert u'\u1234'.replace(u'', '-') == u'-\u1234-' + assert u'\u0234\u5678'.replace('', u'-') == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 0) == u'\u0234\u5678' + assert u'\u0234\u5678'.replace('', u'-', 1) == u'-\u0234\u5678' + assert u'\u0234\u5678'.replace('', u'-', 2) == u'-\u0234-\u5678' + assert u'\u0234\u5678'.replace('', u'-', 3) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 4) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 700) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', -1) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', -42) == u'-\u0234-\u5678-' assert u'abc'.replace(u'', u'-', 3) == u'-a-b-c' assert u'abc'.replace('', '-', 0) == u'abc' assert u''.replace(u'', '') == u'' diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -6,7 +6,7 @@ from rpython.rlib.buffer import StringBuffer from rpython.rlib.mutbuffer import MutableStringBuffer from rpython.rlib.rstring import StringBuilder, split, rsplit, UnicodeBuilder,\ - replace + replace_count from rpython.rlib.runicode import make_unicode_escape_function from rpython.rlib import rutf8, jit @@ -41,7 +41,7 @@ self._length = length self._ucs4 = ucs4str if not we_are_translated(): - assert rutf8.compute_length_utf8(utf8str) == length + assert rutf8.check_utf8(utf8str) == length def __repr__(self): """representation for debugging purposes""" @@ -561,30 +561,30 @@ res = [] value = self._utf8 if space.is_none(w_sep): - res = split(value, maxsplit=maxsplit, isutf8=1) - return space.newlist_from_unicode(res) + res = split(value, maxsplit=maxsplit, isutf8=True) + return space.newlist_utf8(res) by = self.convert_arg_to_w_unicode(space, w_sep)._utf8 if len(by) == 0: raise oefmt(space.w_ValueError, "empty separator") - res = split(value, by, maxsplit, isutf8=1) + res = split(value, by, maxsplit, isutf8=True) - return space.newlist_from_unicode(res) + return space.newlist_utf8(res) @unwrap_spec(maxsplit=int) def descr_rsplit(self, space, w_sep=None, maxsplit=-1): res = [] value = self._utf8 if space.is_none(w_sep): - res = rsplit(value, maxsplit=maxsplit, isutf8=1) - return space.newlist_from_unicode(res) + res = rsplit(value, maxsplit=maxsplit, isutf8=True) + return space.newlist_utf8(res) by = self.convert_arg_to_w_unicode(space, w_sep)._utf8 if len(by) == 0: raise oefmt(space.w_ValueError, "empty separator") - res = rsplit(value, by, maxsplit, isutf8=1) + res = rsplit(value, by, maxsplit, isutf8=True) - return space.newlist_from_unicode(res) + return space.newlist_utf8(res) @unwrap_spec(width=int, w_fillchar=WrappedDefault(' ')) def descr_center(self, space, width, w_fillchar): @@ -622,11 +622,13 @@ if count >= 0 and len(input) == 0: return self._empty() try: - res = replace(input, w_sub._utf8, w_by._utf8, count) + res, replacements = replace_count(input, w_sub._utf8, w_by._utf8, + count, isutf8=True) except OverflowError: raise oefmt(space.w_OverflowError, "replace string is too long") - return W_UnicodeObject(res, -1) + newlength = self._length + replacements * (w_by._length - w_sub._length) + return W_UnicodeObject(res, newlength) def descr_mul(self, space, w_times): try: diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -16,42 +16,38 @@ # -------------- public API for string functions ----------------------- @specialize.ll_and_arg(2) -def _isspace(s, pos, isutf8=0): +def _isspace(s, pos, isutf8=False): if isutf8: from rpython.rlib import rutf8 return rutf8.isspace(s, pos) + char = s[pos] + if isinstance(char, str): + return char.isspace() else: - char = s[pos] - if isinstance(char, str): - return char.isspace() - else: - assert isinstance(char, unicode) - return unicodedb.isspace(ord(char)) + assert isinstance(char, unicode) + return unicodedb.isspace(ord(char)) @specialize.ll_and_arg(2) def _incr(s, pos, isutf8): - from rpython.rlib.rutf8 import next_codepoint_pos - if isutf8: - if pos == -1: - return 0 + from rpython.rlib.rutf8 import next_codepoint_pos + assert pos >= 0 return next_codepoint_pos(s, pos) else: return pos + 1 @specialize.ll_and_arg(2) def _decr(s, pos, isutf8): - from rpython.rlib.rutf8 import prev_codepoint_pos - if isutf8: - if pos == 0: + from rpython.rlib.rutf8 import prev_codepoint_pos + if pos <= 0: return -1 return prev_codepoint_pos(s, pos) else: return pos - 1 @specialize.ll_and_arg(3) -def split(value, by=None, maxsplit=-1, isutf8=0): +def split(value, by=None, maxsplit=-1, isutf8=False): if by is None: length = len(value) i = 0 @@ -83,7 +79,11 @@ else: break return res + else: + return _split_by(value, by, maxsplit) + at specialize.argtype(0) +def _split_by(value, by, maxsplit): if isinstance(value, unicode): assert isinstance(by, unicode) if isinstance(value, str): @@ -133,7 +133,7 @@ @specialize.ll_and_arg(3) -def rsplit(value, by=None, maxsplit=-1, isutf8=0): +def rsplit(value, by=None, maxsplit=-1, isutf8=False): if by is None: res = [] @@ -147,30 +147,34 @@ else: break # end of string, finished - # find the start of the word - # (more precisely, 'j' will be the space character before the word) + # find the start of the word as 'j1' if maxsplit == 0: - j = -1 # take all the rest of the string + j1 = 0 # take all the rest of the string + j = -1 else: - j = _decr(value, i, isutf8) - while j >= 0 and not _isspace(value, j, isutf8): - j = _decr(value, j, isutf8) + j1 = i + while True: + j = _decr(value, j1, isutf8) + if j < 0 or _isspace(value, j, isutf8): + break + j1 = j maxsplit -= 1 # NB. if it's already < 0, it stays < 0 - # the word is value[j+1:i+1] - j1 = _incr(value, j, isutf8) + # the word is value[j1:i+1] assert j1 >= 0 i1 = _incr(value, i, isutf8) res.append(value[j1:i1]) - if j < 0: - break # continue to look from the character before the space before the word i = _decr(value, j, isutf8) res.reverse() return res + else: + return _rsplit_by(value, by, maxsplit) + at specialize.argtype(0) +def _rsplit_by(value, by, maxsplit): if isinstance(value, unicode): assert isinstance(by, unicode) if isinstance(value, str): @@ -203,6 +207,11 @@ @specialize.argtype(0, 1) @jit.elidable def replace(input, sub, by, maxsplit=-1): + return replace_count(input, sub, by, maxsplit)[0] + + at specialize.ll_and_arg(4) + at jit.elidable +def replace_count(input, sub, by, maxsplit=-1, isutf8=False): if isinstance(input, str): Builder = StringBuilder elif isinstance(input, unicode): @@ -211,10 +220,10 @@ assert isinstance(input, list) Builder = ByteListBuilder if maxsplit == 0: - return input + return input, 0 - if not sub: + if not sub and not isutf8: upper = len(input) if maxsplit > 0 and maxsplit < upper + 2: upper = maxsplit - 1 @@ -234,9 +243,16 @@ builder.append(input[i]) builder.append(by) builder.append_slice(input, upper, len(input)) + replacements = upper + 1 else: # First compute the exact result size - cnt = count(input, sub, 0, len(input)) + if sub: + cnt = count(input, sub, 0, len(input)) + else: + assert isutf8 + from rpython.rlib import rutf8 + cnt = rutf8.compute_length_utf8(input) + 1 + if cnt > maxsplit and maxsplit > 0: cnt = maxsplit diff_len = len(by) - len(sub) @@ -245,23 +261,36 @@ result_size = ovfcheck(result_size + len(input)) except OverflowError: raise + replacements = cnt builder = Builder(result_size) start = 0 sublen = len(sub) - while maxsplit != 0: - next = find(input, sub, start, len(input)) - if next < 0: - break - builder.append_slice(input, start, next) - builder.append(by) - start = next + sublen - maxsplit -= 1 # NB. if it's already < 0, it stays < 0 + if sublen == 0: + assert isutf8 + from rpython.rlib import rutf8 + while True: + builder.append(by) + maxsplit -= 1 + if start == len(input) or maxsplit == 0: + break + next = rutf8.next_codepoint_pos(input, start) + builder.append_slice(input, start, next) + start = next + else: + while maxsplit != 0: + next = find(input, sub, start, len(input)) + if next < 0: + break + builder.append_slice(input, start, next) + builder.append(by) + start = next + sublen + maxsplit -= 1 # NB. if it's already < 0, it stays < 0 builder.append_slice(input, start, len(input)) - return builder.build() + return builder.build(), replacements def _normalize_start_end(length, start, end): if start < 0: diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -203,6 +203,24 @@ return True return False +def utf8_in_chars(value, pos, chars): + """Equivalent of u'x' in u'xyz', where the left-hand side is + a single UTF-8 character extracted from the string 'value' at 'pos'. + Only works if both 'value' and 'chars' are correctly-formed UTF-8 + strings. + """ + end = next_codepoint_pos(value, pos) + i = 0 + while i < len(chars): + k = pos + while value[k] == chars[i]: + k += 1 + i += 1 + if k == end: + return True + i += 1 + return False + def _invalid_cont_byte(ordch): return ordch>>6 != 0x2 # 0b10 diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -79,14 +79,8 @@ else: assert not rutf8.isspace(unichr(i).encode('utf8'), 0) - at given(strategies.integers(min_value=0, max_value=sys.maxunicode), - strategies.characters()) -def test_utf8_in_chars(i, uni): - if not uni: - return - if unichr(i) in uni: - response = True - else: - response = False - r = unichr(i).encode('utf8') in uni.encode('utf8') + at given(strategies.characters(), strategies.text()) +def test_utf8_in_chars(ch, txt): + response = rutf8.utf8_in_chars(ch.encode('utf8'), 0, txt.encode('utf8')) + r = (ch in txt) assert r == response From pypy.commits at gmail.com Thu Aug 24 11:22:45 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 24 Aug 2017 08:22:45 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: (fijal, arigo) Message-ID: <599eef45.54871c0a.dcbf8.d7bb@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r92255:8e029544ca2f Date: 2017-08-24 17:21 +0200 http://bitbucket.org/pypy/pypy/changeset/8e029544ca2f/ Log: (fijal, arigo) Fix unicode.translate() diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -365,36 +365,39 @@ return mod_format(space, w_values, self, do_unicode=True) def descr_translate(self, space, w_table): - selfvalue = self._utf8.decode("utf8") - w_sys = space.getbuiltinmodule('sys') - maxunicode = space.int_w(space.getattr(w_sys, - space.newtext("maxunicode"))) - result = [] - for unichar in selfvalue: + input = self._utf8 + result = StringBuilder(len(input)) + result_length = 0 + i = 0 + while i < len(input): + codepoint = rutf8.codepoint_at_pos(input, i) + i = rutf8.next_codepoint_pos(input, i) try: - w_newval = space.getitem(w_table, space.newint(ord(unichar))) + w_newval = space.getitem(w_table, space.newint(codepoint)) except OperationError as e: - if e.match(space, space.w_LookupError): - result.append(unichar) - else: + if not e.match(space, space.w_LookupError): raise else: if space.is_w(w_newval, space.w_None): continue elif space.isinstance_w(w_newval, space.w_int): - newval = space.int_w(w_newval) - if newval < 0 or newval > maxunicode: - raise oefmt(space.w_TypeError, - "character mapping must be in range(%s)", - hex(maxunicode + 1)) - result.append(unichr(newval)) - elif space.isinstance_w(w_newval, space.w_unicode): - result.append(space.utf8_w(w_newval).decode("utf8")) + codepoint = space.int_w(w_newval) + elif isinstance(w_newval, W_UnicodeObject): + result.append(w_newval._utf8) + result_length += w_newval._length + continue else: raise oefmt(space.w_TypeError, "character mapping must return integer, None " "or unicode") - return W_UnicodeObject(u''.join(result).encode("utf8"), -1) + try: + rutf8.unichr_as_utf8_append(result, codepoint, + allow_surrogates=True) + result_length += 1 + except ValueError: + raise oefmt(space.w_TypeError, + "character mapping must be in range(0x110000)") + return W_UnicodeObject(result.build(), result_length) def descr_encode(self, space, w_encoding=None, w_errors=None): encoding, errors = _get_encoding_and_errors(space, w_encoding, From pypy.commits at gmail.com Thu Aug 24 11:25:22 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 24 Aug 2017 08:25:22 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: (fijal, arigo) Message-ID: <599eefe2.41931c0a.d1c9b.3796@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r92256:bc374de6e273 Date: 2017-08-24 17:24 +0200 http://bitbucket.org/pypy/pypy/changeset/bc374de6e273/ Log: (fijal, arigo) Tweaks diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -41,7 +41,7 @@ self._length = length self._ucs4 = ucs4str if not we_are_translated(): - assert rutf8.check_utf8(utf8str) == length + assert rutf8.check_utf8(utf8str, allow_surrogates=True) == length def __repr__(self): """representation for debugging purposes""" @@ -845,12 +845,11 @@ return space.newutf8(s, len(s)) if encoding == 'utf-8': s = space.charbuf_w(w_obj) - eh = unicodehelper.decode_error_handler(space) try: - _, lgt = rutf8.str_check_utf8(s, len(s), final=True, - allow_surrogates=True) + lgt = rutf8.check_utf8(s, allow_surrogates=True) except rutf8.CheckError: XXX + eh = unicodehelper.decode_error_handler(space) eh(None, 'utf8', e.msg, s, e.startpos, e.endpos) assert False, "has to raise" return space.newutf8(s, lgt) From pypy.commits at gmail.com Thu Aug 24 11:38:08 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 24 Aug 2017 08:38:08 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: (fijal, arigo) Message-ID: <599ef2e0.9aa4df0a.99c3f.437b@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r92257:2cc3cf290fbf Date: 2017-08-24 17:37 +0200 http://bitbucket.org/pypy/pypy/changeset/2cc3cf290fbf/ Log: (fijal, arigo) Hack hack hack diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -29,6 +29,10 @@ space.newtext(msg)])) return raise_unicode_exception_encode +def convert_arg_to_w_unicode(space, w_arg, strict=None): + from pypy.objspace.std.unicodeobject import W_UnicodeObject + return W_UnicodeObject.convert_arg_to_w_unicode(space, w_arg, strict) + # ____________________________________________________________ def encode(space, w_data, encoding=None, errors='strict'): diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -374,16 +374,17 @@ def make_encoder_wrapper(name): rname = "utf8_encode_%s" % (name.replace("_encode", ""), ) - @unwrap_spec(utf8='utf8', errors='text_or_none') - def wrap_encoder(space, utf8, errors="strict"): + @unwrap_spec(errors='text_or_none') + def wrap_encoder(space, w_arg, errors="strict"): from pypy.interpreter import unicodehelper - XXX + w_arg = unicodehelper.convert_arg_to_w_unicode(space, w_arg, rname) if errors is None: errors = 'strict' state = space.fromcache(CodecState) func = getattr(unicodehelper, rname) - result = func(utf8, utf8len, + utf8len = w_arg._length + result = func(w_arg._utf8, utf8len, errors, state.encode_error_handler) return space.newtuple([space.newbytes(result), space.newint(utf8len)]) wrap_encoder.func_name = rname From pypy.commits at gmail.com Thu Aug 24 12:40:22 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 24 Aug 2017 09:40:22 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: (arigo, fijal) implement fast skipping technique in RPython Message-ID: <599f0176.f688df0a.2d174.664a@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r92258:d2735187e72f Date: 2017-08-24 18:39 +0200 http://bitbucket.org/pypy/pypy/changeset/d2735187e72f/ Log: (arigo, fijal) implement fast skipping technique in RPython diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -19,6 +19,7 @@ from rpython.rlib.rstring import StringBuilder from rpython.rlib import jit from rpython.rlib.rarithmetic import r_uint +from rpython.rtyper.lltypesystem import lltype def unichr_as_utf8(code, allow_surrogates=False): @@ -307,3 +308,65 @@ assert pos == len(s) return pos - continuation_bytes + + +UTF8_INDEX_STORAGE = lltype.GcArray(lltype.Struct( + 'utf8_loc', + ('index', lltype.Signed), + ('ofs', lltype.FixedSizeArray(lltype.Char, 16)) + )) + +EMPTY_INDEX_STORAGE = lltype.malloc(UTF8_INDEX_STORAGE, 0, immortal=True) + +def create_utf8_index_storage(utf8, utf8len): + """ Create an index storage which stores index of each 4th character + in utf8 encoded unicode string. + """ + if utf8len == 0: + return EMPTY_INDEX_STORAGE + arraysize = (utf8len + 63) // 64 + storage = lltype.malloc(UTF8_INDEX_STORAGE, arraysize) + baseindex = 0 + current = 0 + next = 0 + while True: + storage[current].index = baseindex + for i in range(16): + next = next_codepoint_pos(utf8, next) + storage[current].ofs[i] = chr(next - baseindex) + utf8len -= 4 + if utf8len <= 0: + break + next = next_codepoint_pos(utf8, next) + next = next_codepoint_pos(utf8, next) + next = next_codepoint_pos(utf8, next) + else: + current += 1 + baseindex = next + continue + break + return storage + +def codepoint_position_at_index(utf8, storage, index): + """ Return byte index of a character inside utf8 encoded string, given + storage of type UTF8_INDEX_STORAGE + """ + current = index >> 6 + ofs = ord(storage[current].ofs[(index >> 2) & 15]) + bytepos = storage[current].index + ofs + index &= 0x3 + if index == 0: + return prev_codepoint_pos(utf8, bytepos) + elif index == 1: + return bytepos + elif index == 2: + return next_codepoint_pos(utf8, bytepos) + else: + return next_codepoint_pos(utf8, next_codepoint_pos(utf8, bytepos)) + +def codepoint_at_index(utf8, storage, index): + """ Return codepoint of a character inside utf8 encoded string, given + storage of type UTF8_INDEX_STORAGE + """ + bytepos = codepoint_position_at_index(utf8, storage, index) + return codepoint_at_pos(utf8, bytepos) diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -84,3 +84,9 @@ response = rutf8.utf8_in_chars(ch.encode('utf8'), 0, txt.encode('utf8')) r = (ch in txt) assert r == response + + at given(strategies.text()) +def test_utf8_index_storage(u): + index = rutf8.create_utf8_index_storage(u.encode('utf8'), len(u)) + for i, item in enumerate(u): + rutf8.codepoint_at_index(u.encode('utf8'), index, i) == item.encode('utf8') From pypy.commits at gmail.com Thu Aug 24 16:08:49 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 24 Aug 2017 13:08:49 -0700 (PDT) Subject: [pypy-commit] pypy default: test, fix for missing __ne__ on slices Message-ID: <599f3251.f28ddf0a.56b74.7959@mx.google.com> Author: Matti Picus Branch: Changeset: r92259:5a7118bbfee9 Date: 2017-08-24 23:07 +0300 http://bitbucket.org/pypy/pypy/changeset/5a7118bbfee9/ Log: test, fix for missing __ne__ on slices diff --git a/pypy/objspace/std/sliceobject.py b/pypy/objspace/std/sliceobject.py --- a/pypy/objspace/std/sliceobject.py +++ b/pypy/objspace/std/sliceobject.py @@ -132,6 +132,18 @@ else: return space.w_False + def descr_ne(self, space, w_other): + if space.is_w(self, w_other): + return space.w_False + if not isinstance(w_other, W_SliceObject): + return space.w_NotImplemented + if space.eq_w(self.w_start, w_other.w_start) and \ + space.eq_w(self.w_stop, w_other.w_stop) and \ + space.eq_w(self.w_step, w_other.w_step): + return space.w_False + else: + return space.w_True + def descr_lt(self, space, w_other): if space.is_w(self, w_other): return space.w_False # see comments in descr_eq() @@ -177,6 +189,7 @@ __reduce__ = gateway.interp2app(W_SliceObject.descr__reduce__), __eq__ = gateway.interp2app(W_SliceObject.descr_eq), + __ne__ = gateway.interp2app(W_SliceObject.descr_ne), __lt__ = gateway.interp2app(W_SliceObject.descr_lt), start = slicewprop('w_start'), diff --git a/pypy/objspace/std/test/test_sliceobject.py b/pypy/objspace/std/test/test_sliceobject.py --- a/pypy/objspace/std/test/test_sliceobject.py +++ b/pypy/objspace/std/test/test_sliceobject.py @@ -94,6 +94,7 @@ slice1 = slice(1, 2, 3) slice2 = slice(1, 2, 3) assert slice1 == slice2 + assert not slice1 != slice2 slice2 = slice(1, 2) assert slice1 != slice2 From pypy.commits at gmail.com Fri Aug 25 03:53:24 2017 From: pypy.commits at gmail.com (arigo) Date: Fri, 25 Aug 2017 00:53:24 -0700 (PDT) Subject: [pypy-commit] pypy.org extradoc: Update the section about sandboxing again, to make it sound even less supported Message-ID: <599fd774.0b99df0a.cbefd.ca88@mx.google.com> Author: Armin Rigo Branch: extradoc Changeset: r898:4d64c0df4967 Date: 2017-08-25 09:52 +0200 http://bitbucket.org/pypy/pypy.org/changeset/4d64c0df4967/ Log: Update the section about sandboxing again, to make it sound even less supported diff --git a/download.html b/download.html --- a/download.html +++ b/download.html @@ -182,16 +182,12 @@

Sandboxing: A special safe version. Read the docs about sandboxing. -(It is also possible to translate a version that includes both -sandboxing and the JIT compiler, although as the JIT is relatively -complicated, this reduces a bit the level of confidence we can put in -the result.) Note that the sandboxed binary needs a full pypy checkout -to work. Consult the sandbox docs for details. (These are old, -PyPy 1.8.)
- Linux binary (64bit)
- Linux binary (32bit)
-

not supported

32bit

64bit

diff --git a/source/download.txt b/source/download.txt --- a/source/download.txt +++ b/source/download.txt @@ -179,16 +179,12 @@ .. __: https://bitbucket.org/pypy/revdb/ * Sandboxing: A special safe version. Read the docs about sandboxing_. - (It is also possible to translate_ a version that includes both - sandboxing and the JIT compiler, although as the JIT is relatively - complicated, this reduces a bit the level of confidence we can put in - the result.) **Note that the sandboxed binary needs a full pypy checkout - to work**. Consult the `sandbox docs`_ for details. (These are old, - PyPy 1.8.) - - * `Linux binary (64bit)`__ - - * `Linux binary (32bit)`__ + This version is **not supported** and not actively maintained. You + will likely have to fix some issues yourself, or checkout an old + version, or otherwise play around on your own. We provide this + documentation only for historical reasons. Please do not use in + production. For reference, there are some very old, unmaintained + binaries for Linux (32bit__, 64bit__). .. __: https://bitbucket.org/pypy/pypy/downloads/pypy-1.8-sandbox-linux64.tar.bz2 .. __: https://bitbucket.org/pypy/pypy/downloads/pypy-1.8-sandbox-linux.tar.bz2 From pypy.commits at gmail.com Fri Aug 25 04:36:11 2017 From: pypy.commits at gmail.com (arigo) Date: Fri, 25 Aug 2017 01:36:11 -0700 (PDT) Subject: [pypy-commit] cffi default: Improve error message Message-ID: <599fe17b.e9a9df0a.ea3c.c3c3@mx.google.com> Author: Armin Rigo Branch: Changeset: r3004:def116eeded6 Date: 2017-08-25 10:35 +0200 http://bitbucket.org/cffi/cffi/changeset/def116eeded6/ Log: Improve error message diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -44,11 +44,14 @@ def no_working_compiler_found(): sys.stderr.write(""" - No working compiler found, or bogus compiler options - passed to the compiler from Python's distutils module. - See the error messages above. - (If they are about -mno-fused-madd and you are on OS/X 10.8, - see http://stackoverflow.com/questions/22313407/ .)\n""") + No working compiler found, or bogus compiler options passed to + the compiler from Python's standard "distutils" module. See + the error messages above. Likely, the problem is not related + to CFFI but generic to the setup.py of any Python package that + tries to compile C code. (Hints: on OS/X 10.8, for errors about + -mno-fused-madd see http://stackoverflow.com/questions/22313407/ + Otherwise, see https://wiki.python.org/moin/CompLangPython or + the IRC channel #python on irc.freenode.net.)\n""") sys.exit(1) def get_config(): From pypy.commits at gmail.com Fri Aug 25 06:32:29 2017 From: pypy.commits at gmail.com (arigo) Date: Fri, 25 Aug 2017 03:32:29 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: Fix test, improve logic Message-ID: <599ffcbd.11b2df0a.d2919.ab37@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r92260:7193602c9384 Date: 2017-08-25 12:30 +0200 http://bitbucket.org/pypy/pypy/changeset/7193602c9384/ Log: Fix test, improve logic diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -93,6 +93,7 @@ """Gives the position of the previous codepoint. 'pos' must not be zero. """ + pos = r_uint(pos) pos -= 1 chr1 = ord(code[pos]) if chr1 <= 0x7F: @@ -142,6 +143,43 @@ (ordch4 & 0x3F)) # 0b00111111 assert False, "unreachable" +def codepoint_before_pos(code, pos): + """Give a codepoint in code at the position immediately before pos + - assumes valid utf8, no checking! + """ + pos = r_uint(pos) + ordch1 = ord(code[pos-1]) + if ordch1 <= 0x7F: + return ordch1 + + ordch2 = ordch1 + ordch1 = ord(code[pos-2]) + if ordch1 >= 0xC0: + # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz + return (((ordch1 & 0x1F) << 6) + # 0b00011111 + (ordch2 & 0x3F)) # 0b00111111 + + ordch3 = ordch2 + ordch2 = ordch1 + ordch1 = ord(code[pos-3]) + if ordch1 >= 0xC0: + # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz + return (((ordch1 & 0x0F) << 12) + # 0b00001111 + ((ordch2 & 0x3F) << 6) + # 0b00111111 + (ordch3 & 0x3F)) # 0b00111111 + + ordch4 = ordch3 + ordch3 = ordch2 + ordch2 = ordch1 + ordch1 = ord(code[pos-4]) + if True: + # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz + return (((ordch1 & 0x07) << 18) + # 0b00000111 + ((ordch2 & 0x3F) << 12) + # 0b00111111 + ((ordch3 & 0x3F) << 6) + # 0b00111111 + (ordch4 & 0x3F)) # 0b00111111 + assert False, "unreachable" + class CheckError(Exception): def __init__(self, pos): self.pos = pos @@ -312,25 +350,32 @@ UTF8_INDEX_STORAGE = lltype.GcArray(lltype.Struct( 'utf8_loc', - ('index', lltype.Signed), + ('baseindex', lltype.Signed), ('ofs', lltype.FixedSizeArray(lltype.Char, 16)) )) -EMPTY_INDEX_STORAGE = lltype.malloc(UTF8_INDEX_STORAGE, 0, immortal=True) +ASCII_INDEX_STORAGE_BLOCKS = 5 +ASCII_INDEX_STORAGE = lltype.malloc(UTF8_INDEX_STORAGE, + ASCII_INDEX_STORAGE_BLOCKS, + immortal=True) +for _i in range(ASCII_INDEX_STORAGE_BLOCKS): + ASCII_INDEX_STORAGE[_i].baseindex = _i * 64 + for _j in range(16): + ASCII_INDEX_STORAGE[_i].ofs[_j] = chr(_j * 4 + 1) def create_utf8_index_storage(utf8, utf8len): """ Create an index storage which stores index of each 4th character in utf8 encoded unicode string. """ - if utf8len == 0: - return EMPTY_INDEX_STORAGE + if len(utf8) == utf8len <= ASCII_INDEX_STORAGE_BLOCKS * 64: + return ASCII_INDEX_STORAGE arraysize = (utf8len + 63) // 64 storage = lltype.malloc(UTF8_INDEX_STORAGE, arraysize) baseindex = 0 current = 0 - next = 0 while True: - storage[current].index = baseindex + storage[current].baseindex = baseindex + next = baseindex for i in range(16): next = next_codepoint_pos(utf8, next) storage[current].ofs[i] = chr(next - baseindex) @@ -339,7 +384,7 @@ break next = next_codepoint_pos(utf8, next) next = next_codepoint_pos(utf8, next) - next = next_codepoint_pos(utf8, next) + next = next_codepoint_pos(utf8, next) else: current += 1 baseindex = next @@ -349,11 +394,13 @@ def codepoint_position_at_index(utf8, storage, index): """ Return byte index of a character inside utf8 encoded string, given - storage of type UTF8_INDEX_STORAGE + storage of type UTF8_INDEX_STORAGE. The index must be smaller than + the utf8 length: if needed, check explicitly before calling this + function. """ current = index >> 6 - ofs = ord(storage[current].ofs[(index >> 2) & 15]) - bytepos = storage[current].index + ofs + ofs = ord(storage[current].ofs[(index >> 2) & 0x0F]) + bytepos = storage[current].baseindex + ofs index &= 0x3 if index == 0: return prev_codepoint_pos(utf8, bytepos) @@ -368,5 +415,15 @@ """ Return codepoint of a character inside utf8 encoded string, given storage of type UTF8_INDEX_STORAGE """ - bytepos = codepoint_position_at_index(utf8, storage, index) + current = index >> 6 + ofs = ord(storage[current].ofs[(index >> 2) & 0x0F]) + bytepos = storage[current].baseindex + ofs + index &= 0x3 + if index == 0: + return codepoint_before_pos(utf8, bytepos) + if index == 3: + bytepos = next_codepoint_pos(utf8, bytepos) + index = 2 # fall-through to the next case + if index == 2: + bytepos = next_codepoint_pos(utf8, bytepos) return codepoint_at_pos(utf8, bytepos) diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -89,4 +89,12 @@ def test_utf8_index_storage(u): index = rutf8.create_utf8_index_storage(u.encode('utf8'), len(u)) for i, item in enumerate(u): - rutf8.codepoint_at_index(u.encode('utf8'), index, i) == item.encode('utf8') + assert (rutf8.codepoint_at_index(u.encode('utf8'), index, i) == + ord(item)) + + at given(strategies.text()) +def test_codepoint_position_at_index(u): + index = rutf8.create_utf8_index_storage(u.encode('utf8'), len(u)) + for i in range(len(u)): + assert (rutf8.codepoint_position_at_index(u.encode('utf8'), index, i) == + len(u[:i].encode('utf8'))) From pypy.commits at gmail.com Fri Aug 25 10:19:02 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 25 Aug 2017 07:19:02 -0700 (PDT) Subject: [pypy-commit] pypy default: Imperative py.test.skip() is evil Message-ID: <59a031d6.91a5df0a.9f412.ff05@mx.google.com> Author: Ronan Lamy Branch: Changeset: r92261:2b1b6c5545d0 Date: 2017-08-25 16:18 +0200 http://bitbucket.org/pypy/pypy/changeset/2b1b6c5545d0/ Log: Imperative py.test.skip() is evil diff --git a/rpython/flowspace/test/test_objspace.py b/rpython/flowspace/test/test_objspace.py --- a/rpython/flowspace/test/test_objspace.py +++ b/rpython/flowspace/test/test_objspace.py @@ -839,15 +839,15 @@ return x[s] graph = self.codetest(myfunc) + @py.test.mark.xfail def test_unichr_constfold(self): - py.test.skip("not working") def myfunc(): return unichr(1234) graph = self.codetest(myfunc) assert graph.startblock.exits[0].target is graph.returnblock + @py.test.mark.xfail def test_unicode_constfold(self): - py.test.skip("not working for now") def myfunc(): return unicode("1234") graph = self.codetest(myfunc) From pypy.commits at gmail.com Sat Aug 26 16:12:22 2017 From: pypy.commits at gmail.com (cfbolz) Date: Sat, 26 Aug 2017 13:12:22 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: add a link to the license so I don't need to continuously re-hunt the mailing Message-ID: <59a1d626.55281c0a.66f93.067a@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: extradoc Changeset: r5831:e59c6c8223b2 Date: 2017-08-26 22:12 +0200 http://bitbucket.org/pypy/extradoc/changeset/e59c6c8223b2/ Log: add a link to the license so I don't need to continuously re-hunt the mailing list post diff --git a/logo/LICENSE b/logo/LICENSE new file mode 100644 --- /dev/null +++ b/logo/LICENSE @@ -0,0 +1,4 @@ +The logo is (c) Samuel Reis under an CC-BY-SA license according to +https://mail.python.org/pipermail/pypy-dev/2011-April/007224.html + +https://creativecommons.org/licenses/by-sa/3.0/ From pypy.commits at gmail.com Sun Aug 27 02:54:05 2017 From: pypy.commits at gmail.com (smihnea) Date: Sat, 26 Aug 2017 23:54:05 -0700 (PDT) Subject: [pypy-commit] pypy pypy_swappedbytes: Added _swappedbytes_ support for ctypes.Structure Message-ID: <59a26c8d.41931c0a.6854a.9c3f@mx.google.com> Author: Mihnea Saracin Branch: pypy_swappedbytes Changeset: r92262:7ec88773f8b9 Date: 2017-07-27 17:29 +0300 http://bitbucket.org/pypy/pypy/changeset/7ec88773f8b9/ Log: Added _swappedbytes_ support for ctypes.Structure diff --git a/lib-python/2.7/ctypes/test/test_unaligned_structures.py b/lib-python/2.7/ctypes/test/test_unaligned_structures.py --- a/lib-python/2.7/ctypes/test/test_unaligned_structures.py +++ b/lib-python/2.7/ctypes/test/test_unaligned_structures.py @@ -37,10 +37,7 @@ for typ in byteswapped_structures: ## print >> sys.stderr, typ.value self.assertEqual(typ.value.offset, 1) - try: - o = typ() - except NotImplementedError as e: - self.skipTest(str(e)) # for PyPy + o = typ() o.value = 4 self.assertEqual(o.value, 4) diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -130,6 +130,7 @@ obj._buffer.__setattr__(self.name, arg) + def _set_shape(tp, rawfields, is_union=False): tp._ffistruct_ = _rawffi.Structure(rawfields, is_union, getattr(tp, '_pack_', 0)) @@ -224,7 +225,6 @@ res.__dict__['_index'] = -1 return res - class StructOrUnion(_CData): __metaclass__ = StructOrUnionMeta @@ -234,9 +234,6 @@ if ('_abstract_' in cls.__dict__ or cls is Structure or cls is union.Union): raise TypeError("abstract class") - if hasattr(cls, '_swappedbytes_'): - raise NotImplementedError("missing in PyPy: structure/union with " - "swapped (non-native) byte ordering") if hasattr(cls, '_ffistruct_'): self.__dict__['_buffer'] = self._ffistruct_(autofree=True) return self @@ -253,6 +250,17 @@ for name, arg in kwds.items(): self.__setattr__(name, arg) + def __getattribute__(self, item): + if item in (field[0] for field in object.__getattribute__(self, "_fields_"))\ + and hasattr(self.__class__, '_swappedbytes_'): + self._swap_bytes(item, 'get') + return object.__getattribute__(self, item) + + def __setattr__(self, key, value): + object.__setattr__(self, key, value) + if key in (field[0] for field in self._fields_) and hasattr(self.__class__, '_swappedbytes_'): + self._swap_bytes(key, 'set') + def _subarray(self, fieldtype, name): """Return a _rawffi array of length 1 whose address is the same as the address of the field 'name' of self.""" @@ -269,6 +277,63 @@ def _to_ffi_param(self): return self._buffer + def _swap_bytes(self, field, get_or_set): + def swap_2(v): + return ((v >> 8) & 0x00FF) | ((v << 8) & 0xFF00) + + def swap_4(v): + return ((v & 0x000000FF) << 24) | \ + ((v & 0x0000FF00) << 8) | \ + ((v & 0x00FF0000) >> 8) | \ + ((v >> 24) & 0xFF) + + def swap_8(v): + return ((v & 0x00000000000000FFL) << 56) | \ + ((v & 0x000000000000FF00L) << 40) | \ + ((v & 0x0000000000FF0000L) << 24) | \ + ((v & 0x00000000FF000000L) << 8) | \ + ((v & 0x000000FF00000000L) >> 8) | \ + ((v & 0x0000FF0000000000L) >> 24) | \ + ((v & 0x00FF000000000000L) >> 40) | \ + ((v >> 56) & 0xFF) + + def swap_double_float(v, typ): + from struct import pack, unpack + st = '' + if get_or_set == 'set': + if sys.byteorder == 'little': + st = pack(''.join(['>', typ]), v) + else: + st = pack(''.join(['<', typ]), v) + return unpack(typ, st)[0] + else: + packed = pack(typ, v) + if sys.byteorder == 'little': + st = unpack(''.join(['>', typ]), packed) + else: + st = unpack(''.join(['<', typ]), packed) + return st[0] + + from ctypes import sizeof, c_double, c_float + sizeof_field = 0 + typeof_field = None + for i in self._fields_: + if i[0] == field: + sizeof_field = sizeof(i[1]) + typeof_field = i[1] + field_value = object.__getattribute__(self, field) + if typeof_field == c_float: + object.__setattr__(self, field, swap_double_float(field_value, 'f')) + elif typeof_field == c_double: + object.__setattr__(self, field, swap_double_float(field_value, 'd')) + else: + if sizeof_field == 2: + object.__setattr__(self, field, swap_2(field_value)) + elif sizeof_field == 4: + object.__setattr__(self, field, swap_4(field_value)) + elif sizeof_field == 8: + object.__setattr__(self, field, swap_8(field_value)) + class StructureMeta(StructOrUnionMeta): _is_union = False diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py @@ -460,6 +460,38 @@ class X(Structure): _fields_ = [(u"i", c_int)] + def test_swapped_bytes(self): + import sys + + for i in [c_short, c_int, c_long, c_longlong, + c_float, c_double, c_ushort, c_uint, + c_ulong, c_ulonglong]: + FIELDS = [ + ('n', i) + ] + + class Native(Structure): + _fields_ = FIELDS + + class Big(BigEndianStructure): + _fields_ = FIELDS + + class Little(LittleEndianStructure): + _fields_ = FIELDS + + def dostruct(c): + ba = create_string_buffer(sizeof(c)) + ms = c.from_buffer(ba) + ms.n = 0xff00 + return repr(ba[:]) + + if sys.byteorder == 'little': + assert dostruct(Native) == dostruct(Little) + assert dostruct(Native) != dostruct(Big) + else: + assert dostruct(Native) == dostruct(Big) + assert dostruct(Native) != dostruct(Little) + class TestPointerMember(BaseCTypesTestChecker): def test_1(self): From pypy.commits at gmail.com Sun Aug 27 02:54:07 2017 From: pypy.commits at gmail.com (smihnea) Date: Sat, 26 Aug 2017 23:54:07 -0700 (PDT) Subject: [pypy-commit] pypy pypy_swappedbytes: Final modifications , 1 test still unskipped in test_byteswap.py Message-ID: <59a26c8f.f886df0a.782c.bf60@mx.google.com> Author: Mihnea Saracin Branch: pypy_swappedbytes Changeset: r92263:f611791f1958 Date: 2017-08-10 15:10 +0300 http://bitbucket.org/pypy/pypy/changeset/f611791f1958/ Log: Final modifications , 1 test still unskipped in test_byteswap.py diff --git a/lib-python/2.7/ctypes/test/test_byteswap.py b/lib-python/2.7/ctypes/test/test_byteswap.py --- a/lib-python/2.7/ctypes/test/test_byteswap.py +++ b/lib-python/2.7/ctypes/test/test_byteswap.py @@ -23,7 +23,6 @@ setattr(bits, "i%s" % i, 1) dump(bits) - @xfail def test_endian_short(self): if sys.byteorder == "little": self.assertIs(c_short.__ctype_le__, c_short) @@ -51,7 +50,6 @@ self.assertEqual(bin(s), "3412") self.assertEqual(s.value, 0x1234) - @xfail def test_endian_int(self): if sys.byteorder == "little": self.assertIs(c_int.__ctype_le__, c_int) @@ -80,7 +78,6 @@ self.assertEqual(bin(s), "78563412") self.assertEqual(s.value, 0x12345678) - @xfail def test_endian_longlong(self): if sys.byteorder == "little": self.assertIs(c_longlong.__ctype_le__, c_longlong) @@ -109,7 +106,6 @@ self.assertEqual(bin(s), "EFCDAB9078563412") self.assertEqual(s.value, 0x1234567890ABCDEF) - @xfail def test_endian_float(self): if sys.byteorder == "little": self.assertIs(c_float.__ctype_le__, c_float) @@ -128,7 +124,6 @@ self.assertAlmostEqual(s.value, math.pi, 6) self.assertEqual(bin(struct.pack(">f", math.pi)), bin(s)) - @xfail def test_endian_double(self): if sys.byteorder == "little": self.assertIs(c_double.__ctype_le__, c_double) @@ -156,7 +151,6 @@ self.assertIs(c_char.__ctype_le__, c_char) self.assertIs(c_char.__ctype_be__, c_char) - @xfail def test_struct_fields_1(self): if sys.byteorder == "little": base = BigEndianStructure @@ -221,7 +215,6 @@ self.assertEqual(s.point.x, 1) self.assertEqual(s.point.y, 2) - @xfail def test_struct_fields_2(self): # standard packing in struct uses no alignment. # So, we have to align using pad bytes. @@ -245,7 +238,6 @@ s2 = struct.pack(fmt, 0x12, 0x1234, 0x12345678, 3.14) self.assertEqual(bin(s1), bin(s2)) - @xfail def test_unaligned_nonnative_struct_fields(self): if sys.byteorder == "little": base = BigEndianStructure diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py --- a/lib_pypy/_ctypes/primitive.py +++ b/lib_pypy/_ctypes/primitive.py @@ -61,6 +61,54 @@ pyobj_container = GlobalPyobjContainer() +def swap_bytes(value, sizeof, typeof, get_or_set): + def swap_2(): + return ((value >> 8) & 0x00FF) | ((value << 8) & 0xFF00) + + def swap_4(): + return ((value & 0x000000FF) << 24) | \ + ((value & 0x0000FF00) << 8) | \ + ((value & 0x00FF0000) >> 8) | \ + ((value >> 24) & 0xFF) + + def swap_8(): + return ((value & 0x00000000000000FFL) << 56) | \ + ((value & 0x000000000000FF00L) << 40) | \ + ((value & 0x0000000000FF0000L) << 24) | \ + ((value & 0x00000000FF000000L) << 8) | \ + ((value & 0x000000FF00000000L) >> 8) | \ + ((value & 0x0000FF0000000000L) >> 24) | \ + ((value & 0x00FF000000000000L) >> 40) | \ + ((value >> 56) & 0xFF) + + def swap_double_float(typ): + from struct import pack, unpack + if get_or_set == 'set': + if sys.byteorder == 'little': + st = pack(''.join(['>', typ]), value) + else: + st = pack(''.join(['<', typ]), value) + return unpack(typ, st)[0] + else: + packed = pack(typ, value) + if sys.byteorder == 'little': + st = unpack(''.join(['>', typ]), packed) + else: + st = unpack(''.join(['<', typ]), packed) + return st[0] + + if typeof in ('c_float', 'c_float_le', 'c_float_be'): + return swap_double_float('f') + elif typeof in ('c_double', 'c_double_le', 'c_double_be'): + return swap_double_float('d') + else: + if sizeof == 2: + return swap_2() + elif sizeof == 4: + return swap_4() + elif sizeof == 8: + return swap_8() + def generic_xxx_p_from_param(cls, value): if value is None: return cls(None) @@ -271,6 +319,31 @@ def _as_ffi_pointer_(self, ffitype): return as_ffi_pointer(self, ffitype) result._as_ffi_pointer_ = _as_ffi_pointer_ + if name[-2:] != '_p' and name[-3:] not in ('_le', '_be') \ + and name not in ('c_wchar', '_SimpleCData', 'c_longdouble', 'c_bool', 'py_object'): + from sys import byteorder + if byteorder == 'big': + name += '_le' + swapped = self.__new__(self, name, bases, dct) + result.__ctype_le__ = swapped + result.__ctype_be__ = result + swapped.__ctype_be__ = result + swapped.__ctype_le__ = swapped + else: + name += '_be' + swapped = self.__new__(self, name, bases, dct) + result.__ctype_be__ = swapped + result.__ctype_le__ = result + swapped.__ctype_le__ = result + swapped.__ctype_be__ = swapped + from _ctypes import sizeof + def _getval(self): + return swap_bytes(self._buffer[0], sizeof(self), name, 'get') + def _setval(self, value): + d = result() + d.value = value + self._buffer[0] = swap_bytes(d.value, sizeof(self), name, 'set') + swapped.value = property(_getval, _setval) return result diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -40,6 +40,22 @@ else: rawfields.append((f[0], f[1]._ffishape_)) + # hack for duplicate field names + already_seen = set() + names1 = names + names = [] + for f in names1: + if f not in already_seen: + names.append(f) + already_seen.add(f) + already_seen = set() + for i in reversed(range(len(rawfields))): + if rawfields[i][0] in already_seen: + rawfields[i] = (('$DUP%d$%s' % (i, rawfields[i][0]),) + + rawfields[i][1:]) + already_seen.add(rawfields[i][0]) + # /hack + _set_shape(self, rawfields, self._is_union) fields = {} @@ -230,10 +246,22 @@ def __new__(cls, *args, **kwds): from _ctypes import union - self = super(_CData, cls).__new__(cls) - if ('_abstract_' in cls.__dict__ or cls is Structure + if ('_abstract_' in cls.__dict__ or cls is Structure or cls is union.Union): raise TypeError("abstract class") + if hasattr(cls, '_swappedbytes_'): + fields = [None] * len(cls._fields_) + for i in range(len(cls._fields_)): + if cls._fields_[i][1] == cls._fields_[i][1].__dict__.get('__ctype_be__', None): + swapped = cls._fields_[i][1].__dict__.get('__ctype_le__', cls._fields_[i][1]) + else: + swapped = cls._fields_[i][1].__dict__.get('__ctype_be__', cls._fields_[i][1]) + if len(cls._fields_[i]) < 3: + fields[i] = (cls._fields_[i][0], swapped) + else: + fields[i] = (cls._fields_[i][0], swapped, cls._fields_[i][2]) + names_and_fields(cls, fields, _CData, cls.__dict__.get('_anonymous_', None)) + self = super(_CData, cls).__new__(cls) if hasattr(cls, '_ffistruct_'): self.__dict__['_buffer'] = self._ffistruct_(autofree=True) return self @@ -250,17 +278,6 @@ for name, arg in kwds.items(): self.__setattr__(name, arg) - def __getattribute__(self, item): - if item in (field[0] for field in object.__getattribute__(self, "_fields_"))\ - and hasattr(self.__class__, '_swappedbytes_'): - self._swap_bytes(item, 'get') - return object.__getattribute__(self, item) - - def __setattr__(self, key, value): - object.__setattr__(self, key, value) - if key in (field[0] for field in self._fields_) and hasattr(self.__class__, '_swappedbytes_'): - self._swap_bytes(key, 'set') - def _subarray(self, fieldtype, name): """Return a _rawffi array of length 1 whose address is the same as the address of the field 'name' of self.""" @@ -277,63 +294,6 @@ def _to_ffi_param(self): return self._buffer - def _swap_bytes(self, field, get_or_set): - def swap_2(v): - return ((v >> 8) & 0x00FF) | ((v << 8) & 0xFF00) - - def swap_4(v): - return ((v & 0x000000FF) << 24) | \ - ((v & 0x0000FF00) << 8) | \ - ((v & 0x00FF0000) >> 8) | \ - ((v >> 24) & 0xFF) - - def swap_8(v): - return ((v & 0x00000000000000FFL) << 56) | \ - ((v & 0x000000000000FF00L) << 40) | \ - ((v & 0x0000000000FF0000L) << 24) | \ - ((v & 0x00000000FF000000L) << 8) | \ - ((v & 0x000000FF00000000L) >> 8) | \ - ((v & 0x0000FF0000000000L) >> 24) | \ - ((v & 0x00FF000000000000L) >> 40) | \ - ((v >> 56) & 0xFF) - - def swap_double_float(v, typ): - from struct import pack, unpack - st = '' - if get_or_set == 'set': - if sys.byteorder == 'little': - st = pack(''.join(['>', typ]), v) - else: - st = pack(''.join(['<', typ]), v) - return unpack(typ, st)[0] - else: - packed = pack(typ, v) - if sys.byteorder == 'little': - st = unpack(''.join(['>', typ]), packed) - else: - st = unpack(''.join(['<', typ]), packed) - return st[0] - - from ctypes import sizeof, c_double, c_float - sizeof_field = 0 - typeof_field = None - for i in self._fields_: - if i[0] == field: - sizeof_field = sizeof(i[1]) - typeof_field = i[1] - field_value = object.__getattribute__(self, field) - if typeof_field == c_float: - object.__setattr__(self, field, swap_double_float(field_value, 'f')) - elif typeof_field == c_double: - object.__setattr__(self, field, swap_double_float(field_value, 'd')) - else: - if sizeof_field == 2: - object.__setattr__(self, field, swap_2(field_value)) - elif sizeof_field == 4: - object.__setattr__(self, field, swap_4(field_value)) - elif sizeof_field == 8: - object.__setattr__(self, field, swap_8(field_value)) - class StructureMeta(StructOrUnionMeta): _is_union = False diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py @@ -22,7 +22,6 @@ assert X._fields_ == [("a", c_int)] assert Y._fields_ == [("b", c_int)] assert Z._fields_ == [("a", c_int)] - assert Y._names_ == ['a', 'b'] def test_subclass_delayed(self): @@ -594,3 +593,13 @@ x = X() assert x.x == 0 + + def test_duplicate_names(self): + class S(Structure): + _fields_ = [('a', c_int), + ('b', c_int), + ('a', c_byte)] + s = S(260, -123) + assert sizeof(s) == 3 * sizeof(c_int) + assert s.a == 4 # 256 + 4 + assert s.b == -123 From pypy.commits at gmail.com Sun Aug 27 02:54:11 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 26 Aug 2017 23:54:11 -0700 (PDT) Subject: [pypy-commit] pypy default: hg merge pypy_swappedbytes Message-ID: <59a26c93.45b01c0a.8eb03.884c@mx.google.com> Author: Armin Rigo Branch: Changeset: r92265:c64a2e680657 Date: 2017-08-27 08:52 +0200 http://bitbucket.org/pypy/pypy/changeset/c64a2e680657/ Log: hg merge pypy_swappedbytes PR #560 Adding swappedbytes support for ctypes.Structure diff --git a/lib-python/2.7/ctypes/test/test_byteswap.py b/lib-python/2.7/ctypes/test/test_byteswap.py --- a/lib-python/2.7/ctypes/test/test_byteswap.py +++ b/lib-python/2.7/ctypes/test/test_byteswap.py @@ -23,7 +23,6 @@ setattr(bits, "i%s" % i, 1) dump(bits) - @xfail def test_endian_short(self): if sys.byteorder == "little": self.assertIs(c_short.__ctype_le__, c_short) @@ -51,7 +50,6 @@ self.assertEqual(bin(s), "3412") self.assertEqual(s.value, 0x1234) - @xfail def test_endian_int(self): if sys.byteorder == "little": self.assertIs(c_int.__ctype_le__, c_int) @@ -80,7 +78,6 @@ self.assertEqual(bin(s), "78563412") self.assertEqual(s.value, 0x12345678) - @xfail def test_endian_longlong(self): if sys.byteorder == "little": self.assertIs(c_longlong.__ctype_le__, c_longlong) @@ -109,7 +106,6 @@ self.assertEqual(bin(s), "EFCDAB9078563412") self.assertEqual(s.value, 0x1234567890ABCDEF) - @xfail def test_endian_float(self): if sys.byteorder == "little": self.assertIs(c_float.__ctype_le__, c_float) @@ -128,7 +124,6 @@ self.assertAlmostEqual(s.value, math.pi, 6) self.assertEqual(bin(struct.pack(">f", math.pi)), bin(s)) - @xfail def test_endian_double(self): if sys.byteorder == "little": self.assertIs(c_double.__ctype_le__, c_double) @@ -156,7 +151,6 @@ self.assertIs(c_char.__ctype_le__, c_char) self.assertIs(c_char.__ctype_be__, c_char) - @xfail def test_struct_fields_1(self): if sys.byteorder == "little": base = BigEndianStructure @@ -221,7 +215,6 @@ self.assertEqual(s.point.x, 1) self.assertEqual(s.point.y, 2) - @xfail def test_struct_fields_2(self): # standard packing in struct uses no alignment. # So, we have to align using pad bytes. @@ -245,7 +238,6 @@ s2 = struct.pack(fmt, 0x12, 0x1234, 0x12345678, 3.14) self.assertEqual(bin(s1), bin(s2)) - @xfail def test_unaligned_nonnative_struct_fields(self): if sys.byteorder == "little": base = BigEndianStructure diff --git a/lib-python/2.7/ctypes/test/test_unaligned_structures.py b/lib-python/2.7/ctypes/test/test_unaligned_structures.py --- a/lib-python/2.7/ctypes/test/test_unaligned_structures.py +++ b/lib-python/2.7/ctypes/test/test_unaligned_structures.py @@ -37,10 +37,7 @@ for typ in byteswapped_structures: ## print >> sys.stderr, typ.value self.assertEqual(typ.value.offset, 1) - try: - o = typ() - except NotImplementedError as e: - self.skipTest(str(e)) # for PyPy + o = typ() o.value = 4 self.assertEqual(o.value, 4) diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py --- a/lib_pypy/_ctypes/primitive.py +++ b/lib_pypy/_ctypes/primitive.py @@ -61,6 +61,54 @@ pyobj_container = GlobalPyobjContainer() +def swap_bytes(value, sizeof, typeof, get_or_set): + def swap_2(): + return ((value >> 8) & 0x00FF) | ((value << 8) & 0xFF00) + + def swap_4(): + return ((value & 0x000000FF) << 24) | \ + ((value & 0x0000FF00) << 8) | \ + ((value & 0x00FF0000) >> 8) | \ + ((value >> 24) & 0xFF) + + def swap_8(): + return ((value & 0x00000000000000FFL) << 56) | \ + ((value & 0x000000000000FF00L) << 40) | \ + ((value & 0x0000000000FF0000L) << 24) | \ + ((value & 0x00000000FF000000L) << 8) | \ + ((value & 0x000000FF00000000L) >> 8) | \ + ((value & 0x0000FF0000000000L) >> 24) | \ + ((value & 0x00FF000000000000L) >> 40) | \ + ((value >> 56) & 0xFF) + + def swap_double_float(typ): + from struct import pack, unpack + if get_or_set == 'set': + if sys.byteorder == 'little': + st = pack(''.join(['>', typ]), value) + else: + st = pack(''.join(['<', typ]), value) + return unpack(typ, st)[0] + else: + packed = pack(typ, value) + if sys.byteorder == 'little': + st = unpack(''.join(['>', typ]), packed) + else: + st = unpack(''.join(['<', typ]), packed) + return st[0] + + if typeof in ('c_float', 'c_float_le', 'c_float_be'): + return swap_double_float('f') + elif typeof in ('c_double', 'c_double_le', 'c_double_be'): + return swap_double_float('d') + else: + if sizeof == 2: + return swap_2() + elif sizeof == 4: + return swap_4() + elif sizeof == 8: + return swap_8() + def generic_xxx_p_from_param(cls, value): if value is None: return cls(None) @@ -271,6 +319,31 @@ def _as_ffi_pointer_(self, ffitype): return as_ffi_pointer(self, ffitype) result._as_ffi_pointer_ = _as_ffi_pointer_ + if name[-2:] != '_p' and name[-3:] not in ('_le', '_be') \ + and name not in ('c_wchar', '_SimpleCData', 'c_longdouble', 'c_bool', 'py_object'): + from sys import byteorder + if byteorder == 'big': + name += '_le' + swapped = self.__new__(self, name, bases, dct) + result.__ctype_le__ = swapped + result.__ctype_be__ = result + swapped.__ctype_be__ = result + swapped.__ctype_le__ = swapped + else: + name += '_be' + swapped = self.__new__(self, name, bases, dct) + result.__ctype_be__ = swapped + result.__ctype_le__ = result + swapped.__ctype_le__ = result + swapped.__ctype_be__ = swapped + from _ctypes import sizeof + def _getval(self): + return swap_bytes(self._buffer[0], sizeof(self), name, 'get') + def _setval(self, value): + d = result() + d.value = value + self._buffer[0] = swap_bytes(d.value, sizeof(self), name, 'set') + swapped.value = property(_getval, _setval) return result diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -146,6 +146,7 @@ obj._buffer.__setattr__(self.name, arg) + def _set_shape(tp, rawfields, is_union=False): tp._ffistruct_ = _rawffi.Structure(rawfields, is_union, getattr(tp, '_pack_', 0)) @@ -240,19 +241,27 @@ res.__dict__['_index'] = -1 return res - class StructOrUnion(_CData): __metaclass__ = StructOrUnionMeta def __new__(cls, *args, **kwds): from _ctypes import union - self = super(_CData, cls).__new__(cls) - if ('_abstract_' in cls.__dict__ or cls is Structure + if ('_abstract_' in cls.__dict__ or cls is Structure or cls is union.Union): raise TypeError("abstract class") if hasattr(cls, '_swappedbytes_'): - raise NotImplementedError("missing in PyPy: structure/union with " - "swapped (non-native) byte ordering") + fields = [None] * len(cls._fields_) + for i in range(len(cls._fields_)): + if cls._fields_[i][1] == cls._fields_[i][1].__dict__.get('__ctype_be__', None): + swapped = cls._fields_[i][1].__dict__.get('__ctype_le__', cls._fields_[i][1]) + else: + swapped = cls._fields_[i][1].__dict__.get('__ctype_be__', cls._fields_[i][1]) + if len(cls._fields_[i]) < 3: + fields[i] = (cls._fields_[i][0], swapped) + else: + fields[i] = (cls._fields_[i][0], swapped, cls._fields_[i][2]) + names_and_fields(cls, fields, _CData, cls.__dict__.get('_anonymous_', None)) + self = super(_CData, cls).__new__(cls) if hasattr(cls, '_ffistruct_'): self.__dict__['_buffer'] = self._ffistruct_(autofree=True) return self diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py @@ -22,7 +22,6 @@ assert X._fields_ == [("a", c_int)] assert Y._fields_ == [("b", c_int)] assert Z._fields_ == [("a", c_int)] - assert Y._names_ == ['a', 'b'] def test_subclass_delayed(self): @@ -460,6 +459,38 @@ class X(Structure): _fields_ = [(u"i", c_int)] + def test_swapped_bytes(self): + import sys + + for i in [c_short, c_int, c_long, c_longlong, + c_float, c_double, c_ushort, c_uint, + c_ulong, c_ulonglong]: + FIELDS = [ + ('n', i) + ] + + class Native(Structure): + _fields_ = FIELDS + + class Big(BigEndianStructure): + _fields_ = FIELDS + + class Little(LittleEndianStructure): + _fields_ = FIELDS + + def dostruct(c): + ba = create_string_buffer(sizeof(c)) + ms = c.from_buffer(ba) + ms.n = 0xff00 + return repr(ba[:]) + + if sys.byteorder == 'little': + assert dostruct(Native) == dostruct(Little) + assert dostruct(Native) != dostruct(Big) + else: + assert dostruct(Native) == dostruct(Big) + assert dostruct(Native) != dostruct(Little) + class TestPointerMember(BaseCTypesTestChecker): def test_1(self): From pypy.commits at gmail.com Sun Aug 27 02:54:09 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 26 Aug 2017 23:54:09 -0700 (PDT) Subject: [pypy-commit] pypy pypy_swappedbytes: close branch, ready to merge Message-ID: <59a26c91.248fdf0a.7befb.352e@mx.google.com> Author: Armin Rigo Branch: pypy_swappedbytes Changeset: r92264:1f217e18c43f Date: 2017-08-27 08:47 +0200 http://bitbucket.org/pypy/pypy/changeset/1f217e18c43f/ Log: close branch, ready to merge From pypy.commits at gmail.com Sun Aug 27 03:11:29 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 27 Aug 2017 00:11:29 -0700 (PDT) Subject: [pypy-commit] pypy default: Issue #2625 Message-ID: <59a270a1.c6c7df0a.797ca.c88c@mx.google.com> Author: Armin Rigo Branch: Changeset: r92266:99fff483bea8 Date: 2017-08-27 09:10 +0200 http://bitbucket.org/pypy/pypy/changeset/99fff483bea8/ Log: Issue #2625 ctypes.cast("some-string", TP) diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py --- a/lib_pypy/_ctypes/pointer.py +++ b/lib_pypy/_ctypes/pointer.py @@ -142,6 +142,10 @@ ptr._buffer = tp._ffiarray(1, autofree=True) ptr._buffer[0] = obj._buffer result = ptr + elif isinstance(obj, bytes): + result = tp() + result._buffer[0] = buffer(obj)._pypy_raw_address() + return result elif not (isinstance(obj, _CData) and type(obj)._is_pointer_like()): raise TypeError("cast() argument 1 must be a pointer, not %s" % (type(obj),)) From pypy.commits at gmail.com Sun Aug 27 03:12:37 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 27 Aug 2017 00:12:37 -0700 (PDT) Subject: [pypy-commit] pypy default: test_struct_struct now passes Message-ID: <59a270e5.4b6b1c0a.c006f.b347@mx.google.com> Author: Armin Rigo Branch: Changeset: r92267:c4f7ee9d1f93 Date: 2017-08-27 09:12 +0200 http://bitbucket.org/pypy/pypy/changeset/c4f7ee9d1f93/ Log: test_struct_struct now passes diff --git a/lib-python/2.7/ctypes/test/test_byteswap.py b/lib-python/2.7/ctypes/test/test_byteswap.py --- a/lib-python/2.7/ctypes/test/test_byteswap.py +++ b/lib-python/2.7/ctypes/test/test_byteswap.py @@ -186,7 +186,6 @@ pass self.assertRaises(TypeError, setattr, T, "_fields_", [("x", typ)]) - @xfail def test_struct_struct(self): # nested structures with different byteorders From pypy.commits at gmail.com Sun Aug 27 13:22:23 2017 From: pypy.commits at gmail.com (mattip) Date: Sun, 27 Aug 2017 10:22:23 -0700 (PDT) Subject: [pypy-commit] pypy default: failing test ' ' + np.string_('abc') should not call np.string_.__radd__ Message-ID: <59a2ffcf.4f821c0a.b7c68.59e3@mx.google.com> Author: Matti Picus Branch: Changeset: r92269:fc115074c233 Date: 2017-08-27 19:20 +0300 http://bitbucket.org/pypy/pypy/changeset/fc115074c233/ Log: failing test ' ' + np.string_('abc') should not call np.string_.__radd__ diff --git a/pypy/module/cpyext/test/test_bytesobject.py b/pypy/module/cpyext/test/test_bytesobject.py --- a/pypy/module/cpyext/test/test_bytesobject.py +++ b/pypy/module/cpyext/test/test_bytesobject.py @@ -387,6 +387,16 @@ 0 /* tp_itemsize */ }; + PyTypeObject PyGenArrType_Type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "bar.generic", /* tp_name*/ + }; + + static PyObject* + gentype_add(PyObject* self, PyObject*other) { + return PyString_FromString("gentype_add"); + } static PyObject * stringtype_repr(PyObject *self) { @@ -443,15 +453,25 @@ memcpy(destptr, data, itemsize); return obj; } + static PyNumberMethods gentype_as_number; """, more_init = ''' + long flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_CHECKTYPES; + PyGenArrType_Type.tp_flags = flags; + gentype_as_number.nb_add = gentype_add; + PyGenArrType_Type.tp_as_number = &gentype_as_number; + if (PyType_Ready(&PyGenArrType_Type) < 0) INITERROR; + PyStringArrType_Type.tp_alloc = NULL; PyStringArrType_Type.tp_free = NULL; - PyStringArrType_Type.tp_repr = stringtype_repr; PyStringArrType_Type.tp_str = stringtype_str; - PyStringArrType_Type.tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE; + PyStringArrType_Type.tp_flags = flags; PyStringArrType_Type.tp_itemsize = sizeof(char); PyStringArrType_Type.tp_base = &PyString_Type; + Py_INCREF(&PyString_Type); + Py_INCREF(&PyGenArrType_Type); + PyStringArrType_Type.tp_bases = PyTuple_Pack(2, + &PyString_Type, &PyGenArrType_Type); PyStringArrType_Type.tp_hash = PyString_Type.tp_hash; if (PyType_Ready(&PyStringArrType_Type) < 0) INITERROR; ''') @@ -461,6 +481,8 @@ assert module.has_nb_add(a) is False assert type(a).__name__ == 'string_' assert a == 'abc' + ret = ' ' + a + assert ret == ' abc' assert 3 == module.get_len(a) b = module.newsubstr('') assert 0 == module.get_len(b) From pypy.commits at gmail.com Sun Aug 27 13:22:20 2017 From: pypy.commits at gmail.com (mattip) Date: Sun, 27 Aug 2017 10:22:20 -0700 (PDT) Subject: [pypy-commit] pypy default: test, fix - any str subtype should never have tp_as_a_number.* functions set Message-ID: <59a2ffcc.521a1c0a.60bd.9d23@mx.google.com> Author: Matti Picus Branch: Changeset: r92268:e45fdeb7813a Date: 2017-08-26 20:02 +0300 http://bitbucket.org/pypy/pypy/changeset/e45fdeb7813a/ Log: test, fix - any str subtype should never have tp_as_a_number.* functions set diff --git a/pypy/module/cpyext/test/test_bytesobject.py b/pypy/module/cpyext/test/test_bytesobject.py --- a/pypy/module/cpyext/test/test_bytesobject.py +++ b/pypy/module/cpyext/test/test_bytesobject.py @@ -367,6 +367,16 @@ """ return PyLong_FromLong(PyObject_Size(args)); """), + ('has_nb_add', "METH_O", + ''' + if (args->ob_type->tp_as_number == NULL) { + Py_RETURN_FALSE; + } + if (args->ob_type->tp_as_number->nb_add == NULL) { + Py_RETURN_FALSE; + } + Py_RETURN_TRUE; + '''), ], prologue=""" #include PyTypeObject PyStringArrType_Type = { @@ -447,6 +457,8 @@ ''') a = module.newsubstr('abc') + assert module.has_nb_add('a') is False + assert module.has_nb_add(a) is False assert type(a).__name__ == 'string_' assert a == 'abc' assert 3 == module.get_len(a) diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py --- a/pypy/module/cpyext/typeobject.py +++ b/pypy/module/cpyext/typeobject.py @@ -309,13 +309,17 @@ setattr(pto, slot_names[0], slot_func_helper) elif ((w_type is space.w_list or w_type is space.w_tuple) and slot_names[0] == 'c_tp_as_number'): - # XXX hack - hwo can we generalize this? The problem is method + # XXX hack - how can we generalize this? The problem is method # names like __mul__ map to more than one slot, and we have no # convenient way to indicate which slots CPython have filled # # We need at least this special case since Numpy checks that # (list, tuple) do __not__ fill tp_as_number pass + elif (space.issubtype_w(w_type, space.w_basestring) and + slot_names[0] == 'c_tp_as_number'): + # like above but for any str type + pass else: assert len(slot_names) == 2 struct = getattr(pto, slot_names[0]) From pypy.commits at gmail.com Sun Aug 27 15:18:43 2017 From: pypy.commits at gmail.com (mattip) Date: Sun, 27 Aug 2017 12:18:43 -0700 (PDT) Subject: [pypy-commit] pypy default: Backed out: fc115074c233, not clear if this is a PyPy bug or a CPython bug Message-ID: <59a31b13.8ac4df0a.32ba9.2bb7@mx.google.com> Author: Matti Picus Branch: Changeset: r92270:23392d66a346 Date: 2017-08-27 22:17 +0300 http://bitbucket.org/pypy/pypy/changeset/23392d66a346/ Log: Backed out: fc115074c233, not clear if this is a PyPy bug or a CPython bug diff --git a/pypy/module/cpyext/test/test_bytesobject.py b/pypy/module/cpyext/test/test_bytesobject.py --- a/pypy/module/cpyext/test/test_bytesobject.py +++ b/pypy/module/cpyext/test/test_bytesobject.py @@ -387,16 +387,6 @@ 0 /* tp_itemsize */ }; - PyTypeObject PyGenArrType_Type = { - PyObject_HEAD_INIT(NULL) - 0, /* ob_size */ - "bar.generic", /* tp_name*/ - }; - - static PyObject* - gentype_add(PyObject* self, PyObject*other) { - return PyString_FromString("gentype_add"); - } static PyObject * stringtype_repr(PyObject *self) { @@ -453,25 +443,15 @@ memcpy(destptr, data, itemsize); return obj; } - static PyNumberMethods gentype_as_number; """, more_init = ''' - long flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_CHECKTYPES; - PyGenArrType_Type.tp_flags = flags; - gentype_as_number.nb_add = gentype_add; - PyGenArrType_Type.tp_as_number = &gentype_as_number; - if (PyType_Ready(&PyGenArrType_Type) < 0) INITERROR; - PyStringArrType_Type.tp_alloc = NULL; PyStringArrType_Type.tp_free = NULL; + PyStringArrType_Type.tp_repr = stringtype_repr; PyStringArrType_Type.tp_str = stringtype_str; - PyStringArrType_Type.tp_flags = flags; + PyStringArrType_Type.tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE; PyStringArrType_Type.tp_itemsize = sizeof(char); PyStringArrType_Type.tp_base = &PyString_Type; - Py_INCREF(&PyString_Type); - Py_INCREF(&PyGenArrType_Type); - PyStringArrType_Type.tp_bases = PyTuple_Pack(2, - &PyString_Type, &PyGenArrType_Type); PyStringArrType_Type.tp_hash = PyString_Type.tp_hash; if (PyType_Ready(&PyStringArrType_Type) < 0) INITERROR; ''') @@ -481,8 +461,6 @@ assert module.has_nb_add(a) is False assert type(a).__name__ == 'string_' assert a == 'abc' - ret = ' ' + a - assert ret == ' abc' assert 3 == module.get_len(a) b = module.newsubstr('') assert 0 == module.get_len(b) From pypy.commits at gmail.com Sun Aug 27 16:35:54 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 27 Aug 2017 13:35:54 -0700 (PDT) Subject: [pypy-commit] pypy default: "Fix" the most glaring issue about sandboxing Message-ID: <59a32d2a.05371c0a.6f0d0.5cc7@mx.google.com> Author: Armin Rigo Branch: Changeset: r92271:2032a21a8320 Date: 2017-08-27 22:35 +0200 http://bitbucket.org/pypy/pypy/changeset/2032a21a8320/ Log: "Fix" the most glaring issue about sandboxing diff --git a/rpython/translator/sandbox/rsandbox.py b/rpython/translator/sandbox/rsandbox.py --- a/rpython/translator/sandbox/rsandbox.py +++ b/rpython/translator/sandbox/rsandbox.py @@ -27,17 +27,20 @@ ll_read_not_sandboxed = rposix.external('read', [rffi.INT, rffi.CCHARP, rffi.SIZE_T], rffi.SIZE_T, - sandboxsafe=True) + sandboxsafe=True, + _nowrapper=True) ll_write_not_sandboxed = rposix.external('write', [rffi.INT, rffi.CCHARP, rffi.SIZE_T], rffi.SIZE_T, - sandboxsafe=True) + sandboxsafe=True, + _nowrapper=True) @signature(types.int(), types.ptr(rffi.CCHARP.TO), types.int(), returns=types.none()) def writeall_not_sandboxed(fd, buf, length): + fd = rffi.cast(rffi.INT, fd) while length > 0: size = rffi.cast(rffi.SIZE_T, length) count = rffi.cast(lltype.Signed, ll_write_not_sandboxed(fd, buf, size)) @@ -58,7 +61,8 @@ buflen = self.buflen with lltype.scoped_alloc(rffi.CCHARP.TO, buflen) as buf: buflen = rffi.cast(rffi.SIZE_T, buflen) - count = ll_read_not_sandboxed(self.fd, buf, buflen) + fd = rffi.cast(rffi.INT, self.fd) + count = ll_read_not_sandboxed(fd, buf, buflen) count = rffi.cast(lltype.Signed, count) if count <= 0: raise IOError From pypy.commits at gmail.com Mon Aug 28 05:51:53 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 28 Aug 2017 02:51:53 -0700 (PDT) Subject: [pypy-commit] pypy default: generate tuples more efficiently to stop the occasional FailedHealthCheck Message-ID: <59a3e7b9.a481df0a.1319b.03a8@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: Changeset: r92272:7153657512df Date: 2017-08-28 11:45 +0200 http://bitbucket.org/pypy/pypy/changeset/7153657512df/ Log: generate tuples more efficiently to stop the occasional FailedHealthCheck diff --git a/rpython/jit/metainterp/test/test_bridgeopt.py b/rpython/jit/metainterp/test/test_bridgeopt.py --- a/rpython/jit/metainterp/test/test_bridgeopt.py +++ b/rpython/jit/metainterp/test/test_bridgeopt.py @@ -76,8 +76,11 @@ box_strategy = strategies.builds(InputArgInt) | strategies.builds(InputArgRef) -tuples = strategies.tuples(box_strategy, strategies.booleans()).filter( - lambda (box, known_class): isinstance(box, InputArgRef) or not known_class) +def _make_tup(box, known_class): + if isinstance(box, InputArgInt): + known_class = False + return box, known_class +tuples = strategies.builds(_make_tup, box_strategy, strategies.booleans()) boxes_known_classes = strategies.lists(tuples, min_size=1) @given(boxes_known_classes) From pypy.commits at gmail.com Mon Aug 28 05:51:57 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 28 Aug 2017 02:51:57 -0700 (PDT) Subject: [pypy-commit] pypy default: optimize this sequence: Message-ID: <59a3e7bd.0c181c0a.40289.8fb3@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: Changeset: r92274:c6568dda3266 Date: 2017-08-28 11:50 +0200 http://bitbucket.org/pypy/pypy/changeset/c6568dda3266/ Log: optimize this sequence: i2 = int_is_zero(i0) guard_false(i2) i1 = int_is_true(i0) guard_true(i1) (happens quite often in rpython list code, it seems) diff --git a/rpython/jit/metainterp/optimizeopt/rewrite.py b/rpython/jit/metainterp/optimizeopt/rewrite.py --- a/rpython/jit/metainterp/optimizeopt/rewrite.py +++ b/rpython/jit/metainterp/optimizeopt/rewrite.py @@ -10,7 +10,7 @@ from rpython.jit.metainterp.optimizeopt.info import INFO_NONNULL, INFO_NULL from rpython.jit.metainterp.optimizeopt.util import _findall, make_dispatcher_method from rpython.jit.metainterp.resoperation import rop, ResOperation, opclasses,\ - OpHelpers + OpHelpers, AbstractResOp from rpython.rlib.rarithmetic import highest_bit from rpython.rtyper.lltypesystem import llmemory from rpython.rtyper import rclass @@ -490,6 +490,11 @@ def postprocess_GUARD_TRUE(self, op): box = self.get_box_replacement(op.getarg(0)) + if (isinstance(box, AbstractResOp) and + box.getopnum() == rop.INT_IS_TRUE): + # we can't use the (current) range analysis for this because + # "anything but 0" is not a valid range + self.pure_from_args(rop.INT_IS_ZERO, [box.getarg(0)], CONST_0) self.make_constant(box, CONST_1) def optimize_GUARD_FALSE(self, op): @@ -497,6 +502,11 @@ def postprocess_GUARD_FALSE(self, op): box = self.get_box_replacement(op.getarg(0)) + if (isinstance(box, AbstractResOp) and + box.getopnum() == rop.INT_IS_ZERO): + # we can't use the (current) range analysis for this because + # "anything but 0" is not a valid range + self.pure_from_args(rop.INT_IS_TRUE, [box.getarg(0)], CONST_1) self.make_constant(box, CONST_0) def optimize_ASSERT_NOT_NONE(self, op): diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py --- a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py @@ -288,7 +288,6 @@ self.optimize_loop(ops, expected) def test_int_is_true_is_zero(self): - py.test.skip("XXX implement me") ops = """ [i0] i1 = int_is_true(i0) @@ -305,6 +304,22 @@ """ self.optimize_loop(ops, expected) + ops = """ + [i0] + i2 = int_is_zero(i0) + guard_false(i2) [] + i1 = int_is_true(i0) + guard_true(i1) [] + jump(i0) + """ + expected = """ + [i0] + i2 = int_is_zero(i0) + guard_false(i2) [] + jump(i0) + """ + self.optimize_loop(ops, expected) + def test_int_is_zero_int_is_true(self): ops = """ [i0] From pypy.commits at gmail.com Mon Aug 28 05:51:55 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 28 Aug 2017 02:51:55 -0700 (PDT) Subject: [pypy-commit] pypy default: typo (the method is unused) Message-ID: <59a3e7bb.d1c5df0a.53f6b.0167@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: Changeset: r92273:3868025e1ee1 Date: 2017-08-28 11:49 +0200 http://bitbucket.org/pypy/pypy/changeset/3868025e1ee1/ Log: typo (the method is unused) diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py --- a/rpython/jit/metainterp/resoperation.py +++ b/rpython/jit/metainterp/resoperation.py @@ -400,7 +400,7 @@ return rop.can_raise(self.getopnum()) def is_foldable_guard(self): - return rop.is_foldable_guard(self.getopnun()) + return rop.is_foldable_guard(self.getopnum()) def is_primitive_array_access(self): """ Indicates that this operations loads/stores a From pypy.commits at gmail.com Mon Aug 28 13:29:18 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 28 Aug 2017 10:29:18 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: fix merge Message-ID: <59a452ee.0783df0a.f01e4.4fc0@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r92275:f3328bccb6b2 Date: 2017-08-28 18:28 +0100 http://bitbucket.org/pypy/pypy/changeset/f3328bccb6b2/ Log: fix merge diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -1660,7 +1660,6 @@ # overwritten with a new error of the same type error = PyErr_Occurred(space) has_new_error = (error is not None) and (error is not preexist_error) - has_result = ret is not None if not expect_null and has_new_error and has_result: raise oefmt(space.w_SystemError, "An exception was set, but function returned a " diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -739,10 +739,10 @@ # uncaught interplevel exceptions are turned into SystemError expected = "ZeroDivisionError('integer division or modulo by zero',)" exc = raises(SystemError, module.crash1) - assert exc.value[0] == expected + assert exc.value.args[0] == expected exc = raises(SystemError, module.crash2) - assert exc.value[0] == expected + assert exc.value.args[0] == expected # caught exception, api.cpython_api return value works assert module.crash3() == -1 @@ -750,7 +750,7 @@ expected = 'An exception was set, but function returned a value' # PyPy only incompatibility/extension exc = raises(SystemError, module.crash4) - assert exc.value[0] == expected + assert exc.value.args[0] == expected # An exception was set by the previous call, it can pass # cleanly through a call that doesn't check error state @@ -759,7 +759,7 @@ # clear the exception but return NULL, signalling an error expected = 'Function returned a NULL result without setting an exception' exc = raises(SystemError, module.clear, None) - assert exc.value[0] == expected + assert exc.value.args[0] == expected # Set an exception and return NULL raises(TypeError, module.set, None) @@ -770,7 +770,7 @@ # Set an exception, but return non-NULL expected = 'An exception was set, but function returned a value' exc = raises(SystemError, module.set, 1) - assert exc.value[0] == expected + assert exc.value.args[0] == expected # Clear the exception and return a value, all is OK From pypy.commits at gmail.com Mon Aug 28 17:52:53 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 28 Aug 2017 14:52:53 -0700 (PDT) Subject: [pypy-commit] pypy multiphase: hg merge py3.5 Message-ID: <59a490b5.d1471c0a.b22bc.1500@mx.google.com> Author: Ronan Lamy Branch: multiphase Changeset: r92276:fdd1b3877173 Date: 2017-08-28 18:33 +0100 http://bitbucket.org/pypy/pypy/changeset/fdd1b3877173/ Log: hg merge py3.5 diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py --- a/lib-python/2.7/ctypes/__init__.py +++ b/lib-python/2.7/ctypes/__init__.py @@ -361,17 +361,20 @@ if handle is None: if flags & _FUNCFLAG_CDECL: - self._handle = _ffi.CDLL(name, mode) + pypy_dll = _ffi.CDLL(name, mode) else: - self._handle = _ffi.WinDLL(name, mode) - else: - self._handle = handle + pypy_dll = _ffi.WinDLL(name, mode) + self.__pypy_dll__ = pypy_dll + handle = int(pypy_dll) + if _sys.maxint > 2 ** 32: + handle = int(handle) # long -> int + self._handle = handle def __repr__(self): - return "<%s '%s', handle %r at 0x%x>" % ( - self.__class__.__name__, self._name, self._handle, - id(self) & (_sys.maxint * 2 + 1)) - + return "<%s '%s', handle %x at %x>" % \ + (self.__class__.__name__, self._name, + (self._handle & (_sys.maxint*2 + 1)), + id(self) & (_sys.maxint*2 + 1)) def __getattr__(self, name): if name.startswith('__') and name.endswith('__'): diff --git a/lib-python/3/ctypes/__init__.py b/lib-python/3/ctypes/__init__.py --- a/lib-python/3/ctypes/__init__.py +++ b/lib-python/3/ctypes/__init__.py @@ -346,16 +346,18 @@ if handle is None: if flags & _FUNCFLAG_CDECL: - self._handle = _ffi.CDLL(name, mode) + pypy_dll = _ffi.CDLL(name, mode) else: - self._handle = _ffi.WinDLL(name, mode) - else: - self._handle = handle + pypy_dll = _ffi.WinDLL(name, mode) + self.__pypy_dll__ = pypy_dll + handle = int(pypy_dll) + self._handle = handle def __repr__(self): - return "<%s '%s', handle %r at 0x%x>" % ( - self.__class__.__name__, self._name, self._handle, - id(self) & (_sys.maxsize * 2 + 1)) + return "<%s '%s', handle %x at 0x%x>" % \ + (self.__class__.__name__, self._name, + (self._handle & (_sys.maxsize*2 + 1)), + id(self) & (_sys.maxsize*2 + 1)) def __getattr__(self, name): if name.startswith('__') and name.endswith('__'): diff --git a/lib-python/3/datetime.py b/lib-python/3/datetime.py --- a/lib-python/3/datetime.py +++ b/lib-python/3/datetime.py @@ -810,7 +810,8 @@ month = self._month if day is None: day = self._day - return date(year, month, day) + # PyPy fix: returns type(self)() instead of date() + return type(self)(year, month, day) # Comparisons of date objects with other. @@ -1285,7 +1286,8 @@ microsecond = self.microsecond if tzinfo is True: tzinfo = self.tzinfo - return time(hour, minute, second, microsecond, tzinfo) + # PyPy fix: returns type(self)() instead of time() + return type(self)(hour, minute, second, microsecond, tzinfo) # Pickle support. @@ -1497,7 +1499,8 @@ microsecond = self.microsecond if tzinfo is True: tzinfo = self.tzinfo - return datetime(year, month, day, hour, minute, second, microsecond, + # PyPy fix: returns type(self)() instead of datetime() + return type(self)(year, month, day, hour, minute, second, microsecond, tzinfo) def astimezone(self, tz=None): diff --git a/lib-python/3/test/test_sysconfig.py b/lib-python/3/test/test_sysconfig.py --- a/lib-python/3/test/test_sysconfig.py +++ b/lib-python/3/test/test_sysconfig.py @@ -397,9 +397,16 @@ self.assertTrue('linux' in suffix, suffix) if re.match('(i[3-6]86|x86_64)$', machine): if ctypes.sizeof(ctypes.c_char_p()) == 4: - self.assertTrue(suffix.endswith('i386-linux-gnu.so') \ - or suffix.endswith('x86_64-linux-gnux32.so'), - suffix) + self.assertTrue( + suffix.endswith(( + 'i386-linux-gnu.so', + 'i486-linux-gnu.so', + 'i586-linux-gnu.so', + 'i686-linux-gnu.so', + 'x86_64-linux-gnux32.so', + )), + suffix, + ) else: # 8 byte pointer size self.assertTrue(suffix.endswith('x86_64-linux-gnu.so'), suffix) diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -82,7 +82,7 @@ return False def in_dll(self, dll, name): - return self.from_address(dll._handle.getaddressindll(name)) + return self.from_address(dll.__pypy_dll__.getaddressindll(name)) def from_buffer(self, obj, offset=0): size = self._sizeofinstances() diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py --- a/lib_pypy/_ctypes/function.py +++ b/lib_pypy/_ctypes/function.py @@ -430,7 +430,7 @@ ffires = restype.get_ffi_argtype() return _ffi.FuncPtr.fromaddr(ptr, '', ffiargs, ffires, self._flags_) - cdll = self.dll._handle + cdll = self.dll.__pypy_dll__ try: ffi_argtypes = [argtype.get_ffi_argtype() for argtype in argtypes] ffi_restype = restype.get_ffi_argtype() diff --git a/lib_pypy/pyrepl/reader.py b/lib_pypy/pyrepl/reader.py --- a/lib_pypy/pyrepl/reader.py +++ b/lib_pypy/pyrepl/reader.py @@ -239,6 +239,10 @@ def __init__(self, console): self.buffer = [] + # Enable the use of `insert` without a `prepare` call - necessary to + # facilitate the tab completion hack implemented for + # . + self.pos = 0 self.ps1 = "->> " self.ps2 = "/>> " self.ps3 = "|.. " diff --git a/lib_pypy/pyrepl/readline.py b/lib_pypy/pyrepl/readline.py --- a/lib_pypy/pyrepl/readline.py +++ b/lib_pypy/pyrepl/readline.py @@ -314,7 +314,8 @@ # history item: we use \r\n instead of just \n. If the history # file is passed to GNU readline, the extra \r are just ignored. history = self.get_reader().history - f = open(os.path.expanduser(filename), 'r', encoding='utf-8') + f = open(os.path.expanduser(filename), 'r', encoding='utf-8', + errors='replace') buffer = [] for line in f: if line.endswith('\r\n'): diff --git a/pypy/interpreter/module.py b/pypy/interpreter/module.py --- a/pypy/interpreter/module.py +++ b/pypy/interpreter/module.py @@ -10,9 +10,10 @@ class Module(W_Root): """A module.""" - _immutable_fields_ = ["w_dict?"] + _immutable_fields_ = ["w_dict?", "w_userclass?"] _frozen = False + w_userclass = None def __init__(self, space, w_name, w_dict=None): self.space = space @@ -148,6 +149,26 @@ self) return space.call_function(space.w_list, w_dict) + # These three methods are needed to implement '__class__' assignment + # between a module and a subclass of module. They give every module + # the ability to have its '__class__' set, manually. Note that if + # you instantiate a subclass of ModuleType in the first place, then + # you get an RPython instance of a subclass of Module created in the + # normal way by typedef.py. That instance has got its own + # getclass(), getslotvalue(), etc. but provided it has no __slots__, + # it is compatible with ModuleType for '__class__' assignment. + + def getclass(self, space): + if self.w_userclass is None: + return W_Root.getclass(self, space) + return self.w_userclass + + def setclass(self, space, w_cls): + self.w_userclass = w_cls + + def user_setup(self, space, w_subtype): + self.w_userclass = w_subtype + def init_extra_module_attrs(space, w_mod): w_dict = w_mod.getdict(space) diff --git a/pypy/interpreter/test/test_module.py b/pypy/interpreter/test/test_module.py --- a/pypy/interpreter/test/test_module.py +++ b/pypy/interpreter/test/test_module.py @@ -220,3 +220,45 @@ import sys m = type(sys).__new__(type(sys)) assert not m.__dict__ + + def test_class_assignment_for_module(self): + import sys + modtype = type(sys) + class X(modtype): + _foobar_ = 42 + + m = X("yytest_moduleyy") + assert type(m) is m.__class__ is X + assert m._foobar_ == 42 + m.__class__ = modtype + assert type(m) is m.__class__ is modtype + assert not hasattr(m, '_foobar_') + + m = modtype("xxtest_modulexx") + assert type(m) is m.__class__ is modtype + m.__class__ = X + assert m._foobar_ == 42 + assert type(m) is m.__class__ is X + + sys.__class__ = modtype + assert type(sys) is sys.__class__ is modtype + sys.__class__ = X + assert sys._foobar_ == 42 + sys.__class__ = modtype + + class XX(modtype): + __slots__ = ['a', 'b'] + + x = XX("zztest_modulezz") + assert x.__class__ is XX + raises(AttributeError, "x.a") + x.a = 42 + assert x.a == 42 + x.a = 43 + assert x.a == 43 + assert 'a' not in x.__dict__ + del x.a + raises(AttributeError, "x.a") + raises(AttributeError, "del x.a") + raises(TypeError, "x.__class__ = X") + raises(TypeError, "sys.__class__ = XX") diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py --- a/pypy/interpreter/typedef.py +++ b/pypy/interpreter/typedef.py @@ -130,7 +130,7 @@ return subcls _unique_subclass_cache = {} -def _getusercls(cls, reallywantdict=False): +def _getusercls(cls): from rpython.rlib import objectmodel from pypy.objspace.std.objectobject import W_ObjectObject from pypy.objspace.std.mapdict import (BaseUserClassMapdict, @@ -144,7 +144,7 @@ else: base_mixin = MapdictStorageMixin copy_methods = [BaseUserClassMapdict] - if reallywantdict or not typedef.hasdict: + if not typedef.hasdict: # the type has no dict, mapdict to provide the dict copy_methods.append(MapdictDictSupport) name += "Dict" diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -1661,7 +1661,7 @@ assert cpyext_glob_tid_ptr[0] == 0 cpyext_glob_tid_ptr[0] = tid - preexist_error = PyErr_Occurred(space) is not None + preexist_error = PyErr_Occurred(space) try: # Call the function result = call_external_function(func, *boxed_args) @@ -1685,17 +1685,19 @@ has_result = ret is not None # Check for exception consistency - has_error = PyErr_Occurred(space) is not None - if not preexist_error: - if has_error and has_result: - raise oefmt(space.w_SystemError, - "An exception was set, but function returned a " - "value") - elif not expect_null and not has_error and not has_result: - raise oefmt(space.w_SystemError, - "Function returned a NULL result without setting " - "an exception") - if has_error: + # XXX best attempt, will miss preexisting error that is + # overwritten with a new error of the same type + error = PyErr_Occurred(space) + has_new_error = (error is not None) and (error is not preexist_error) + if not expect_null and has_new_error and has_result: + raise oefmt(space.w_SystemError, + "An exception was set, but function returned a " + "value") + elif not expect_null and not has_new_error and not has_result: + raise oefmt(space.w_SystemError, + "Function returned a NULL result without setting " + "an exception") + elif has_new_error: state = space.fromcache(State) state.check_and_raise_exception() diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h --- a/pypy/module/cpyext/include/object.h +++ b/pypy/module/cpyext/include/object.h @@ -273,6 +273,11 @@ #define _PyGC_FINALIZED(o) 1 #define PyType_IS_GC(tp) 1 +#define PyObject_GC_Track(o) do { } while(0) +#define PyObject_GC_UnTrack(o) do { } while(0) +#define _PyObject_GC_TRACK(o) do { } while(0) +#define _PyObject_GC_UNTRACK(o) do { } while(0) + /* Utility macro to help write tp_traverse functions. * To use this macro, the tp_traverse function must name its arguments * "visit" and "arg". This is intended to keep tp_traverse functions diff --git a/pypy/module/cpyext/object.py b/pypy/module/cpyext/object.py --- a/pypy/module/cpyext/object.py +++ b/pypy/module/cpyext/object.py @@ -80,24 +80,6 @@ def PyObject_GC_Del(space, obj): PyObject_Free(space, obj) - at cpython_api([rffi.VOIDP], lltype.Void) -def PyObject_GC_Track(space, op): - """Adds the object op to the set of container objects tracked by the - collector. The collector can run at unexpected times so objects must be - valid while being tracked. This should be called once all the fields - followed by the tp_traverse handler become valid, usually near the - end of the constructor.""" - pass - - at cpython_api([rffi.VOIDP], lltype.Void) -def PyObject_GC_UnTrack(space, op): - """Remove the object op from the set of container objects tracked by the - collector. Note that PyObject_GC_Track() can be called again on - this object to add it back to the set of tracked objects. The deallocator - (tp_dealloc handler) should call this for the object before any of - the fields used by the tp_traverse handler become invalid.""" - pass - @cpython_api([PyObject], PyObjectP, error=CANNOT_FAIL) def _PyObject_GetDictPtr(space, op): return lltype.nullptr(PyObjectP.TO) @@ -311,7 +293,7 @@ PyErr_BadInternalCall(space) @cpython_api([PyObject, PyObject, rffi.INT_real], rffi.INT_real, error=-1) -def PyObject_RichCompareBool(space, ref1, ref2, opid_int): +def PyObject_RichCompareBool(space, w_o1, w_o2, opid_int): """Compare the values of o1 and o2 using the operation specified by opid, which must be one of Py_LT, Py_LE, Py_EQ, Py_NE, Py_GT, or Py_GE, corresponding to <, @@ -321,13 +303,13 @@ opid.""" # Quick result when objects are the same. # Guarantees that identity implies equality. - if ref1 is ref2: + if space.is_w(w_o1, w_o2): opid = rffi.cast(lltype.Signed, opid_int) if opid == Py_EQ: return 1 if opid == Py_NE: return 0 - w_res = PyObject_RichCompare(space, ref1, ref2, opid_int) + w_res = PyObject_RichCompare(space, w_o1, w_o2, opid_int) return int(space.is_true(w_res)) @cpython_api([PyObject], PyObject, result_is_ll=True) diff --git a/pypy/module/cpyext/sequence.py b/pypy/module/cpyext/sequence.py --- a/pypy/module/cpyext/sequence.py +++ b/pypy/module/cpyext/sequence.py @@ -294,6 +294,23 @@ def getitems_fixedsize(self, w_list): return self.getitems_unroll(w_list) + def copy_into(self, w_list, w_other): + w_other.strategy = self + w_other.lstorage = self.getstorage_copy(w_list) + + def clone(self, w_list): + storage = self.getstorage_copy(w_list) + w_clone = W_ListObject.from_storage_and_strategy(self.space, storage, + self) + return w_clone + + def getitems_copy(self, w_list): + return self.getitems(w_list) # getitems copies anyway + + def getstorage_copy(self, w_list): + lst = self.getitems(w_list) + return self.erase(CPyListStorage(w_list.space, lst)) + #------------------------------------------ # all these methods fail or switch strategy and then call ListObjectStrategy's method @@ -301,23 +318,9 @@ w_list.switch_to_object_strategy() w_list.strategy.setslice(w_list, start, stop, step, length) - def get_sizehint(self): - return -1 - def init_from_list_w(self, w_list, list_w): raise NotImplementedError - def clone(self, w_list): - storage = w_list.lstorage # lstorage is tuple, no need to clone - w_clone = W_ListObject.from_storage_and_strategy(self.space, storage, - self) - w_clone.switch_to_object_strategy() - return w_clone - - def copy_into(self, w_list, w_other): - w_list.switch_to_object_strategy() - w_list.strategy.copy_into(w_list, w_other) - def _resize_hint(self, w_list, hint): pass @@ -325,13 +328,6 @@ w_list.switch_to_object_strategy() return w_list.strategy.find(w_list, w_item, start, stop) - def getitems_copy(self, w_list): - w_list.switch_to_object_strategy() - return w_list.strategy.getitems_copy(w_list) - - def getstorage_copy(self, w_list): - raise NotImplementedError - def append(self, w_list, w_item): w_list.switch_to_object_strategy() w_list.strategy.append(w_list, w_item) diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py --- a/pypy/module/cpyext/stubs.py +++ b/pypy/module/cpyext/stubs.py @@ -625,18 +625,6 @@ resized object or NULL on failure.""" raise NotImplementedError - at cpython_api([PyObject], lltype.Void) -def _PyObject_GC_TRACK(space, op): - """A macro version of PyObject_GC_Track(). It should not be used for - extension modules.""" - raise NotImplementedError - - at cpython_api([PyObject], lltype.Void) -def _PyObject_GC_UNTRACK(space, op): - """A macro version of PyObject_GC_UnTrack(). It should not be used for - extension modules.""" - raise NotImplementedError - @cpython_api([PyFrameObject], PyObject) def PyGen_New(space, frame): """Create and return a new generator object based on the frame object. A @@ -1516,13 +1504,6 @@ raise NotImplementedError - at cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL) -def PyType_IS_GC(space, o): - """Return true if the type object includes support for the cycle detector; this - tests the type flag Py_TPFLAGS_HAVE_GC.""" - raise NotImplementedError - - @cpython_api([], rffi.INT_real, error=-1) def PyUnicode_ClearFreeList(space, ): """Clear the free list. Return the total number of freed items.""" diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -24,6 +24,10 @@ def PyPy_Crash2(space): 1/0 + at api.cpython_api([api.PyObject], api.PyObject, result_is_ll=True) +def PyPy_Noop(space, pyobj): + return pyobj + class TestApi: def test_signature(self): common_functions = api.FUNCTIONS_BY_HEADER[api.pypy_decl] @@ -665,6 +669,7 @@ body = """ PyAPI_FUNC(PyObject*) PyPy_Crash1(void); PyAPI_FUNC(long) PyPy_Crash2(void); + PyAPI_FUNC(PyObject*) PyPy_Noop(PyObject*); static PyObject* foo_crash1(PyObject* self, PyObject *args) { return PyPy_Crash1(); @@ -688,9 +693,27 @@ int a = PyPy_Crash2(); return PyFloat_FromDouble(a); } + static PyObject* foo_noop(PyObject* self, PyObject* args) + { + Py_INCREF(args); + return PyPy_Noop(args); + } + static PyObject* foo_set(PyObject* self, PyObject *args) + { + PyErr_SetString(PyExc_TypeError, "clear called with no error"); + if (PyLong_Check(args)) { + Py_INCREF(args); + return args; + } + return NULL; + } static PyObject* foo_clear(PyObject* self, PyObject *args) { PyErr_Clear(); + if (PyLong_Check(args)) { + Py_INCREF(args); + return args; + } return NULL; } static PyMethodDef methods[] = { @@ -698,7 +721,9 @@ { "crash2", foo_crash2, METH_NOARGS }, { "crash3", foo_crash3, METH_NOARGS }, { "crash4", foo_crash4, METH_NOARGS }, - { "clear", foo_clear, METH_NOARGS }, + { "clear", foo_clear, METH_O }, + { "set", foo_set, METH_O }, + { "noop", foo_noop, METH_O }, { NULL } }; static struct PyModuleDef moduledef = { @@ -710,15 +735,46 @@ }; """ module = self.import_module(name='foo', body=body) + # uncaught interplevel exceptions are turned into SystemError - raises(SystemError, module.crash1) - raises(SystemError, module.crash2) - # caught exception + expected = "ZeroDivisionError('integer division or modulo by zero',)" + exc = raises(SystemError, module.crash1) + assert exc.value.args[0] == expected + + exc = raises(SystemError, module.crash2) + assert exc.value.args[0] == expected + + # caught exception, api.cpython_api return value works assert module.crash3() == -1 - # An exception was set, but function returned a value - raises(SystemError, module.crash4) - # No exception set, but NULL returned - raises(SystemError, module.clear) + + expected = 'An exception was set, but function returned a value' + # PyPy only incompatibility/extension + exc = raises(SystemError, module.crash4) + assert exc.value.args[0] == expected + + # An exception was set by the previous call, it can pass + # cleanly through a call that doesn't check error state + assert module.noop(1) == 1 + + # clear the exception but return NULL, signalling an error + expected = 'Function returned a NULL result without setting an exception' + exc = raises(SystemError, module.clear, None) + assert exc.value.args[0] == expected + + # Set an exception and return NULL + raises(TypeError, module.set, None) + + # clear any exception and return a value + assert module.clear(1) == 1 + + # Set an exception, but return non-NULL + expected = 'An exception was set, but function returned a value' + exc = raises(SystemError, module.set, 1) + assert exc.value.args[0] == expected + + + # Clear the exception and return a value, all is OK + assert module.clear(1) == 1 def test_new_exception(self): mod = self.import_extension('foo', [ diff --git a/pypy/module/cpyext/test/test_object.py b/pypy/module/cpyext/test/test_object.py --- a/pypy/module/cpyext/test/test_object.py +++ b/pypy/module/cpyext/test/test_object.py @@ -416,7 +416,7 @@ Py_buffer passed to it. """ module = self.import_extension('foo', [ - ("fillinfo", "METH_VARARGS", + ("fillinfo", "METH_NOARGS", """ Py_buffer buf; PyObject *str = PyBytes_FromString("hello, world."); @@ -468,7 +468,7 @@ object. """ module = self.import_extension('foo', [ - ("fillinfo", "METH_VARARGS", + ("fillinfo", "METH_NOARGS", """ Py_buffer buf; PyObject *str = PyBytes_FromString("hello, world."); @@ -514,7 +514,7 @@ PyBuffer_FillInfo fails if WRITABLE is passed but object is readonly. """ module = self.import_extension('foo', [ - ("fillinfo", "METH_VARARGS", + ("fillinfo", "METH_NOARGS", """ Py_buffer buf; PyObject *str = PyBytes_FromString("hello, world."); @@ -541,7 +541,7 @@ decremented by PyBuffer_Release. """ module = self.import_extension('foo', [ - ("release", "METH_VARARGS", + ("release", "METH_NOARGS", """ Py_buffer buf; buf.obj = PyBytes_FromString("release me!"); @@ -560,3 +560,20 @@ Py_RETURN_NONE; """)]) assert module.release() is None + + +class AppTestPyBuffer_Release(AppTestCpythonExtensionBase): + def test_richcomp_nan(self): + module = self.import_extension('foo', [ + ("comp_eq", "METH_VARARGS", + """ + PyObject *a = PyTuple_GetItem(args, 0); + PyObject *b = PyTuple_GetItem(args, 1); + int res = PyObject_RichCompareBool(a, b, Py_EQ); + return PyLong_FromLong(res); + """),]) + a = float('nan') + b = float('nan') + assert a is b + res = module.comp_eq(a, b) + assert res == 1 diff --git a/pypy/module/cpyext/test/test_sequence.py b/pypy/module/cpyext/test/test_sequence.py --- a/pypy/module/cpyext/test/test_sequence.py +++ b/pypy/module/cpyext/test/test_sequence.py @@ -226,6 +226,15 @@ w_l.inplace_mul(2) assert space.int_w(space.len(w_l)) == 10 + def test_getstorage_copy(self, space, api): + w = space.wrap + w_l = w([1, 2, 3, 4]) + api.PySequence_Fast(w_l, "foo") # converts + + w_l1 = w([]) + space.setitem(w_l1, space.newslice(w(0), w(0), w(1)), w_l) + assert map(space.unwrap, space.unpackiterable(w_l1)) == [1, 2, 3, 4] + class AppTestSequenceObject(AppTestCpythonExtensionBase): def test_fast(self): diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py --- a/pypy/module/exceptions/interp_exceptions.py +++ b/pypy/module/exceptions/interp_exceptions.py @@ -636,6 +636,14 @@ else: WINERROR_TO_ERRNO, DEFAULT_WIN32_ERRNO = {}, 22 # EINVAL +if rwin32.WIN32: + _winerror_property = dict( + winerror = readwrite_attrproperty_w('w_winerror', W_OSError), + ) +else: + _winerror_property = dict() + + W_OSError.typedef = TypeDef( 'OSError', W_Exception.typedef, @@ -648,9 +656,9 @@ strerror = readwrite_attrproperty_w('w_strerror', W_OSError), filename = readwrite_attrproperty_w('w_filename', W_OSError), filename2= readwrite_attrproperty_w('w_filename2',W_OSError), - winerror = readwrite_attrproperty_w('w_winerror', W_OSError), characters_written = GetSetProperty(W_OSError.descr_get_written, W_OSError.descr_set_written), + **_winerror_property ) W_BlockingIOError = _new_exception( diff --git a/pypy/module/imp/test/test_app.py b/pypy/module/imp/test/test_app.py --- a/pypy/module/imp/test/test_app.py +++ b/pypy/module/imp/test/test_app.py @@ -81,15 +81,17 @@ def test_suffixes(self): import imp for suffix, mode, type in imp.get_suffixes(): - if mode == imp.PY_SOURCE: + if type == imp.PY_SOURCE: assert suffix == '.py' - assert type == 'r' - elif mode == imp.PY_COMPILED: + assert mode == 'r' + elif type == imp.PY_COMPILED: assert suffix in ('.pyc', '.pyo') - assert type == 'rb' - elif mode == imp.C_EXTENSION: + assert mode == 'rb' + elif type == imp.C_EXTENSION: assert suffix.endswith(('.pyd', '.so')) - assert type == 'rb' + assert mode == 'rb' + else: + assert False, ("Unknown type", suffix, mode, type) def test_ext_suffixes(self): import _imp diff --git a/pypy/module/readline/test/test_readline.py b/pypy/module/readline/test/test_readline.py --- a/pypy/module/readline/test/test_readline.py +++ b/pypy/module/readline/test/test_readline.py @@ -29,3 +29,14 @@ readline.add_history("dummy") assert readline.get_history_item(1) == "entrée 1" assert readline.get_history_item(2) == "entrée 22" + + + def test_insert_text_leading_tab(self): + """ + A literal tab can be inserted at the beginning of a line. + + See + """ + import readline + readline.insert_text("\t") + assert readline.get_line_buffer() == b"\t" diff --git a/pypy/module/test_lib_pypy/README.txt b/pypy/module/test_lib_pypy/README.txt --- a/pypy/module/test_lib_pypy/README.txt +++ b/pypy/module/test_lib_pypy/README.txt @@ -1,4 +1,7 @@ This directory contains app-level tests are supposed to be run *after* translation. So you run them by saying: -pypy pytest.py +../../goal/pypy-c pytest.py + +Note that if you run it with a PyPy from elsewhere, it will not pick +up the changes to lib-python and lib_pypy. diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_loading.py b/pypy/module/test_lib_pypy/ctypes_tests/test_loading.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_loading.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_loading.py @@ -43,6 +43,12 @@ cdll.LoadLibrary(lib) CDLL(lib) + def test__handle(self): + lib = find_library("c") + if lib: + cdll = CDLL(lib) + assert type(cdll._handle) in (int, long) + if os.name in ("nt", "ce"): def test_load_library(self): if is_resource_enabled("printing"): diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -364,8 +364,8 @@ characters, all remaining cased characters have lowercase. """ - @unwrap_spec(w_deletechars=WrappedDefault('')) - def descr_translate(self, space, w_table, w_deletechars): + @unwrap_spec(w_delete=WrappedDefault('')) + def descr_translate(self, space, w_table, w_delete): """B.translate(table[, deletechars]) -> copy of B Return a copy of the string B, where all characters occurring diff --git a/pypy/objspace/std/objectobject.py b/pypy/objspace/std/objectobject.py --- a/pypy/objspace/std/objectobject.py +++ b/pypy/objspace/std/objectobject.py @@ -141,13 +141,17 @@ def descr_set___class__(space, w_obj, w_newcls): from pypy.objspace.std.typeobject import W_TypeObject + from pypy.interpreter.module import Module + # if not isinstance(w_newcls, W_TypeObject): raise oefmt(space.w_TypeError, - "__class__ must be set to new-style class, not '%T' " + "__class__ must be set to a class, not '%T' " "object", w_newcls) - if not w_newcls.is_heaptype(): + if not (w_newcls.is_heaptype() or + w_newcls is space.gettypeobject(Module.typedef)): raise oefmt(space.w_TypeError, - "__class__ assignment: only for heap types") + "__class__ assignment only supported for heap types " + "or ModuleType subclasses") w_oldcls = space.type(w_obj) assert isinstance(w_oldcls, W_TypeObject) if (w_oldcls.get_full_instance_layout() == diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -742,8 +742,8 @@ DEFAULT_NOOP_TABLE = ''.join([chr(i) for i in range(256)]) # for bytes and bytearray, overridden by unicode - @unwrap_spec(w_deletechars=WrappedDefault('')) - def descr_translate(self, space, w_table, w_deletechars): + @unwrap_spec(w_delete=WrappedDefault('')) + def descr_translate(self, space, w_table, w_delete): if space.is_w(w_table, space.w_None): table = self.DEFAULT_NOOP_TABLE else: @@ -753,7 +753,7 @@ "translation table must be 256 characters long") string = self._val(space) - deletechars = self._op_val(space, w_deletechars) + deletechars = self._op_val(space, w_delete) if len(deletechars) == 0: buf = self._builder(len(string)) for char in string: diff --git a/pypy/objspace/std/test/test_typeobject.py b/pypy/objspace/std/test/test_typeobject.py --- a/pypy/objspace/std/test/test_typeobject.py +++ b/pypy/objspace/std/test/test_typeobject.py @@ -1284,6 +1284,65 @@ raises(ValueError, type, 'A\x00B', (), {}) raises(TypeError, type, b'A', (), {}) + def test_incomplete_extend(self): """ + # Extending an unitialized type with type.__mro__ is None must + # throw a reasonable TypeError exception, instead of failing + # with a segfault. + class M(type): + def mro(cls): + if cls.__mro__ is None and cls.__name__ != 'X': + try: + class X(cls): + pass + except TypeError: + found.append(1) + return type.mro(cls) + found = [] + class A(metaclass=M): + pass + assert found == [1] + """ + + def test_incomplete_extend_2(self): """ + # Same as test_incomplete_extend, with multiple inheritance + class M(type): + def mro(cls): + if cls.__mro__ is None and cls.__name__ == 'Second': + try: + class X(First, cls): + pass + except TypeError: + found.append(1) + return type.mro(cls) + found = [] + class Base(metaclass=M): + pass + class First(Base): + pass + class Second(Base): + pass + assert found == [1] + """ + + def test_incomplete_extend_3(self): """ + # this case "works", but gives a slightly strange error message + # on both CPython and PyPy + class M(type): + def mro(cls): + if cls.__mro__ is None and cls.__name__ == 'A': + try: + Base.__new__(cls) + except TypeError: + found.append(1) + return type.mro(cls) + found = [] + class Base(metaclass=M): + pass + class A(Base): + pass + assert found == [1] + """ + class AppTestWithMethodCacheCounter: spaceconfig = {"objspace.std.withmethodcachecounter": True} diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py --- a/pypy/objspace/std/typeobject.py +++ b/pypy/objspace/std/typeobject.py @@ -546,19 +546,24 @@ space = self.space if self.is_heaptype(): return self.getdictvalue(space, '__module__') + elif self.is_cpytype(): + dot = self.name.rfind('.') else: dot = self.name.find('.') - if dot >= 0: - mod = self.name[:dot] - else: - mod = "builtins" - return space.newtext(mod) + if dot >= 0: + mod = self.name[:dot] + else: + mod = "builtins" + return space.newtext(mod) def getname(self, space): if self.is_heaptype(): result = self.name else: - dot = self.name.find('.') + if self.is_cpytype(): + dot = self.name.rfind('.') + else: + dot = self.name.find('.') if dot >= 0: result = self.name[dot+1:] else: @@ -1036,6 +1041,9 @@ for w_candidate in bases_w: if not isinstance(w_candidate, W_TypeObject): continue + if not w_candidate.hasmro: + raise oefmt(w_candidate.space.w_TypeError, + "Cannot extend an incomplete type '%N'", w_candidate) if w_bestbase is None: w_bestbase = w_candidate # for now continue diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -552,10 +552,11 @@ self.reg_bindings[result_v] = loc return loc if v not in self.reg_bindings: + # v not in a register. allocate one for result_v and move v there prev_loc = self.frame_manager.loc(v) - loc = self.force_allocate_reg(v, forbidden_vars) + loc = self.force_allocate_reg(result_v, forbidden_vars) self.assembler.regalloc_mov(prev_loc, loc) - assert v in self.reg_bindings + return loc if self.longevity[v][1] > self.position: # we need to find a new place for variable v and # store result in the same place diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -504,7 +504,7 @@ clt.frame_info = rffi.cast(jitframe.JITFRAMEINFOPTR, frame_info) clt.frame_info.clear() # for now - if log: + if log or self._debug: number = looptoken.number operations = self._inject_debugging_code(looptoken, operations, 'e', number) @@ -589,7 +589,7 @@ faildescr.adr_jump_offset) self.mc.force_frame_size(DEFAULT_FRAME_BYTES) descr_number = compute_unique_id(faildescr) - if log: + if log or self._debug: operations = self._inject_debugging_code(faildescr, operations, 'b', descr_number) arglocs = self.rebuild_faillocs_from_descr(faildescr, inputargs) @@ -1618,18 +1618,6 @@ else: not_implemented("save_into_mem size = %d" % size) - def _genop_getfield(self, op, arglocs, resloc): - base_loc, ofs_loc, size_loc, sign_loc = arglocs - assert isinstance(size_loc, ImmedLoc) - source_addr = AddressLoc(base_loc, ofs_loc) - self.load_from_mem(resloc, source_addr, size_loc, sign_loc) - - genop_getfield_gc_i = _genop_getfield - genop_getfield_gc_r = _genop_getfield - genop_getfield_gc_f = _genop_getfield - genop_getfield_raw_i = _genop_getfield - genop_getfield_raw_f = _genop_getfield - def _genop_gc_load(self, op, arglocs, resloc): base_loc, ofs_loc, size_loc, sign_loc = arglocs assert isinstance(size_loc, ImmedLoc) diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -1305,7 +1305,7 @@ self.rm.possibly_free_var(tmpbox_high) def compute_hint_frame_locations(self, operations): - # optimization only: fill in the 'hint_frame_locations' dictionary + # optimization only: fill in the 'hint_frame_pos' dictionary # of 'fm' based on the JUMP at the end of the loop, by looking # at where we would like the boxes to be after the jump. op = operations[-1] @@ -1320,7 +1320,7 @@ self._compute_hint_frame_locations_from_descr(descr) #else: # The loop ends in a JUMP going back to a LABEL in the same loop. - # We cannot fill 'hint_frame_locations' immediately, but we can + # We cannot fill 'hint_frame_pos' immediately, but we can # wait until the corresponding consider_label() to know where the # we would like the boxes to be after the jump. diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py --- a/rpython/rlib/rposix.py +++ b/rpython/rlib/rposix.py @@ -205,6 +205,18 @@ if not is_valid_fd(fd): from errno import EBADF raise OSError(EBADF, 'Bad file descriptor') + + def _bound_for_write(fd, count): + if count > 32767 and c_isatty(fd): + # CPython Issue #11395, PyPy Issue #2636: the Windows console + # returns an error (12: not enough space error) on writing into + # stdout if stdout mode is binary and the length is greater than + # 66,000 bytes (or less, depending on heap usage). Can't easily + # test that, because we need 'fd' to be non-redirected... + count = 32767 + elif count > 0x7fffffff: + count = 0x7fffffff + return count else: def is_valid_fd(fd): return 1 @@ -213,6 +225,9 @@ def validate_fd(fd): pass + def _bound_for_write(fd, count): + return count + def closerange(fd_low, fd_high): # this behaves like os.closerange() from Python 2.6. for fd in xrange(fd_low, fd_high): @@ -449,6 +464,7 @@ def write(fd, data): count = len(data) validate_fd(fd) + count = _bound_for_write(fd, count) with rffi.scoped_nonmovingbuffer(data) as buf: return handle_posix_error('write', c_write(fd, buf, count)) diff --git a/rpython/rtyper/tool/rffi_platform.py b/rpython/rtyper/tool/rffi_platform.py --- a/rpython/rtyper/tool/rffi_platform.py +++ b/rpython/rtyper/tool/rffi_platform.py @@ -710,7 +710,8 @@ size, _ = expected_size_and_sign return lltype.FixedSizeArray(fieldtype.OF, size/_sizeof(fieldtype.OF)) raise TypeError("conflict between translating python and compiler field" - " type %r for %r" % (fieldtype, fieldname)) + " type %r for symbol %r, expected size+sign %r" % ( + fieldtype, fieldname, expected_size_and_sign)) def expose_value_as_rpython(value): if intmask(value) == value: From pypy.commits at gmail.com Mon Aug 28 17:52:56 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 28 Aug 2017 14:52:56 -0700 (PDT) Subject: [pypy-commit] pypy multiphase: fix translation Message-ID: <59a490b8.83341c0a.89b94.6c75@mx.google.com> Author: Ronan Lamy Branch: multiphase Changeset: r92277:4581a4a84e6f Date: 2017-08-28 22:52 +0100 http://bitbucket.org/pypy/pypy/changeset/4581a4a84e6f/ Log: fix translation diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -1522,6 +1522,7 @@ look_for += ' or ' + also_look_for else: look_for = also_look_for + assert look_for is not None msg = u"function %s not found in library %s" % ( look_for.decode('utf-8'), space.unicode_w(space.newfilename(path))) w_path = space.newfilename(path) From pypy.commits at gmail.com Tue Aug 29 22:56:36 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 29 Aug 2017 19:56:36 -0700 (PDT) Subject: [pypy-commit] pypy multiphase: clear exception when module exec sets one without reporting failure Message-ID: <59a62964.50901c0a.cc0d0.3bf8@mx.google.com> Author: Ronan Lamy Branch: multiphase Changeset: r92279:7a20909775d3 Date: 2017-08-30 03:55 +0100 http://bitbucket.org/pypy/pypy/changeset/7a20909775d3/ Log: clear exception when module exec sets one without reporting failure diff --git a/pypy/module/cpyext/modsupport.py b/pypy/module/cpyext/modsupport.py --- a/pypy/module/cpyext/modsupport.py +++ b/pypy/module/cpyext/modsupport.py @@ -152,15 +152,16 @@ execf = rffi.cast(execfunctype, cur_slot[0].c_value) res = generic_cpy_call(space, execf, w_mod) has_error = PyErr_Occurred(space) is not None + state = space.fromcache(State) if rffi.cast(lltype.Signed, res): if has_error: - state = space.fromcache(State) state.check_and_raise_exception() else: raise oefmt(space.w_SystemError, "execution of module %S failed without " "setting an exception", w_mod.w_name) if has_error: + state.clear_exception() raise oefmt(space.w_SystemError, "execution of module %S raised unreported " "exception", w_mod.w_name) From pypy.commits at gmail.com Tue Aug 29 22:56:33 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 29 Aug 2017 19:56:33 -0700 (PDT) Subject: [pypy-commit] pypy multiphase: check for stray exceptions at test teardown Message-ID: <59a62961.8fbadf0a.19fb5.6542@mx.google.com> Author: Ronan Lamy Branch: multiphase Changeset: r92278:48e754e6916d Date: 2017-08-30 03:50 +0100 http://bitbucket.org/pypy/pypy/changeset/48e754e6916d/ Log: check for stray exceptions at test teardown diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -363,6 +363,8 @@ for name in self.imported_module_names: self.unimport_module(name) self.cleanup() + state = self.space.fromcache(State) + assert not state.operror class AppTestCpythonExtension(AppTestCpythonExtensionBase): From pypy.commits at gmail.com Wed Aug 30 07:48:34 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 30 Aug 2017 04:48:34 -0700 (PDT) Subject: [pypy-commit] pypy py3.5-sendmsg-recvmsg: Merged py3.5 into py3.5-sendmsg-recvmsg Message-ID: <59a6a612.0ac41c0a.f4fc1.5d71@mx.google.com> Author: Ronan Lamy Branch: py3.5-sendmsg-recvmsg Changeset: r92280:05eff6c713c4 Date: 2017-08-30 12:48 +0100 http://bitbucket.org/pypy/pypy/changeset/05eff6c713c4/ Log: Merged py3.5 into py3.5-sendmsg-recvmsg diff too long, truncating to 2000 out of 25457 lines diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -27,16 +27,17 @@ ^pypy/module/cpyext/test/.+\.manifest$ ^pypy/module/test_lib_pypy/ctypes_tests/.+\.o$ ^pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test\.o$ -^pypy/module/cppyy/src/.+\.o$ -^pypy/module/cppyy/bench/.+\.so$ -^pypy/module/cppyy/bench/.+\.root$ -^pypy/module/cppyy/bench/.+\.d$ -^pypy/module/cppyy/src/.+\.errors$ -^pypy/module/cppyy/test/.+_rflx\.cpp$ -^pypy/module/cppyy/test/.+\.so$ -^pypy/module/cppyy/test/.+\.rootmap$ -^pypy/module/cppyy/test/.+\.exe$ -^pypy/module/cppyy/test/.+_cint.h$ +^pypy/module/_cppyy/src/.+\.o$ +^pypy/module/_cppyy/bench/.+\.so$ +^pypy/module/_cppyy/bench/.+\.root$ +^pypy/module/_cppyy/bench/.+\.d$ +^pypy/module/_cppyy/src/.+\.errors$ +^pypy/module/_cppyy/test/.+_rflx\.cpp$ +^pypy/module/_cppyy/test/.+\.so$ +^pypy/module/_cppyy/test/.+\.rootmap$ +^pypy/module/_cppyy/test/.+\.exe$ +^pypy/module/_cppyy/test/.+_cint.h$ +^pypy/module/_cppyy/.+/*\.pcm$ ^pypy/module/test_lib_pypy/cffi_tests/__pycache__.+$ ^pypy/doc/.+\.html$ ^pypy/doc/config/.+\.rst$ @@ -93,6 +94,3 @@ ^release/ ^rpython/_cache$ -pypy/module/cppyy/.+/*\.pcm - - diff --git a/Makefile b/Makefile --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ RUNINTERP = $(PYPY_EXECUTABLE) endif -.PHONY: cffi_imports +.PHONY: pypy-c cffi_imports pypy-c: @echo @@ -32,7 +32,7 @@ @echo "====================================================================" @echo @sleep 5 - $(RUNINTERP) rpython/bin/rpython -Ojit pypy/goal/targetpypystandalone.py + cd pypy/goal && $(RUNINTERP) ../../rpython/bin/rpython -Ojit targetpypystandalone.py # Note: the -jN option, or MAKEFLAGS=-jN, are not usable. They are # replaced with an opaque --jobserver option by the time this Makefile @@ -40,4 +40,4 @@ # http://lists.gnu.org/archive/html/help-make/2010-08/msg00106.html cffi_imports: pypy-c - PYTHONPATH=. ./pypy-c pypy/tool/build_cffi_imports.py || /bin/true + PYTHONPATH=. pypy/goal/pypy-c pypy/tool/build_cffi_imports.py || /bin/true diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py --- a/lib-python/2.7/ctypes/__init__.py +++ b/lib-python/2.7/ctypes/__init__.py @@ -361,17 +361,20 @@ if handle is None: if flags & _FUNCFLAG_CDECL: - self._handle = _ffi.CDLL(name, mode) + pypy_dll = _ffi.CDLL(name, mode) else: - self._handle = _ffi.WinDLL(name, mode) - else: - self._handle = handle + pypy_dll = _ffi.WinDLL(name, mode) + self.__pypy_dll__ = pypy_dll + handle = int(pypy_dll) + if _sys.maxint > 2 ** 32: + handle = int(handle) # long -> int + self._handle = handle def __repr__(self): - return "<%s '%s', handle %r at 0x%x>" % ( - self.__class__.__name__, self._name, self._handle, - id(self) & (_sys.maxint * 2 + 1)) - + return "<%s '%s', handle %x at %x>" % \ + (self.__class__.__name__, self._name, + (self._handle & (_sys.maxint*2 + 1)), + id(self) & (_sys.maxint*2 + 1)) def __getattr__(self, name): if name.startswith('__') and name.endswith('__'): diff --git a/lib-python/2.7/distutils/sysconfig_pypy.py b/lib-python/2.7/distutils/sysconfig_pypy.py --- a/lib-python/2.7/distutils/sysconfig_pypy.py +++ b/lib-python/2.7/distutils/sysconfig_pypy.py @@ -218,6 +218,10 @@ compiler.shared_lib_extension = so_ext +def get_config_h_filename(): + """Returns the path of pyconfig.h.""" + inc_dir = get_python_inc(plat_specific=1) + return os.path.join(inc_dir, 'pyconfig.h') from sysconfig_cpython import ( parse_makefile, _variable_rx, expand_makefile_vars) diff --git a/lib-python/2.7/distutils/unixccompiler.py b/lib-python/2.7/distutils/unixccompiler.py --- a/lib-python/2.7/distutils/unixccompiler.py +++ b/lib-python/2.7/distutils/unixccompiler.py @@ -226,7 +226,19 @@ return "-L" + dir def _is_gcc(self, compiler_name): - return "gcc" in compiler_name or "g++" in compiler_name + # XXX PyPy workaround, look at the big comment below for more + # context. On CPython, the hack below works fine because + # `compiler_name` contains the name of the actual compiler which was + # used at compile time (e.g. 'x86_64-linux-gnu-gcc' on my machine). + # PyPy hardcodes it to 'cc', so the hack doesn't work, and the end + # result is that we pass the wrong option to the compiler. + # + # The workaround is to *always* pretend to be GCC if we are on Linux: + # this should cover the vast majority of real systems, including the + # ones which use clang (which understands the '-Wl,-rpath' syntax as + # well) + return (sys.platform == "linux2" or + "gcc" in compiler_name or "g++" in compiler_name) def runtime_library_dir_option(self, dir): # XXX Hackish, at the very least. See Python bug #445902: diff --git a/lib-python/3/ctypes/__init__.py b/lib-python/3/ctypes/__init__.py --- a/lib-python/3/ctypes/__init__.py +++ b/lib-python/3/ctypes/__init__.py @@ -346,16 +346,18 @@ if handle is None: if flags & _FUNCFLAG_CDECL: - self._handle = _ffi.CDLL(name, mode) + pypy_dll = _ffi.CDLL(name, mode) else: - self._handle = _ffi.WinDLL(name, mode) - else: - self._handle = handle + pypy_dll = _ffi.WinDLL(name, mode) + self.__pypy_dll__ = pypy_dll + handle = int(pypy_dll) + self._handle = handle def __repr__(self): - return "<%s '%s', handle %r at 0x%x>" % ( - self.__class__.__name__, self._name, self._handle, - id(self) & (_sys.maxsize * 2 + 1)) + return "<%s '%s', handle %x at 0x%x>" % \ + (self.__class__.__name__, self._name, + (self._handle & (_sys.maxsize*2 + 1)), + id(self) & (_sys.maxsize*2 + 1)) def __getattr__(self, name): if name.startswith('__') and name.endswith('__'): diff --git a/lib-python/3/datetime.py b/lib-python/3/datetime.py --- a/lib-python/3/datetime.py +++ b/lib-python/3/datetime.py @@ -810,7 +810,8 @@ month = self._month if day is None: day = self._day - return date(year, month, day) + # PyPy fix: returns type(self)() instead of date() + return type(self)(year, month, day) # Comparisons of date objects with other. @@ -1285,7 +1286,8 @@ microsecond = self.microsecond if tzinfo is True: tzinfo = self.tzinfo - return time(hour, minute, second, microsecond, tzinfo) + # PyPy fix: returns type(self)() instead of time() + return type(self)(hour, minute, second, microsecond, tzinfo) # Pickle support. @@ -1497,7 +1499,8 @@ microsecond = self.microsecond if tzinfo is True: tzinfo = self.tzinfo - return datetime(year, month, day, hour, minute, second, microsecond, + # PyPy fix: returns type(self)() instead of datetime() + return type(self)(year, month, day, hour, minute, second, microsecond, tzinfo) def astimezone(self, tz=None): diff --git a/lib-python/3/distutils/sysconfig_pypy.py b/lib-python/3/distutils/sysconfig_pypy.py --- a/lib-python/3/distutils/sysconfig_pypy.py +++ b/lib-python/3/distutils/sysconfig_pypy.py @@ -81,6 +81,19 @@ g['LIBDIR'] = os.path.join(sys.prefix, 'lib') g['VERSION'] = get_python_version() + if sys.platform[:6] == "darwin": + import platform + if platform.machine() == 'i386': + if platform.architecture()[0] == '32bit': + arch = 'i386' + else: + arch = 'x86_64' + else: + # just a guess + arch = platform.machine() + g['LDSHARED'] += ' -undefined dynamic_lookup' + g['CC'] += ' -arch %s' % (arch,) + global _config_vars _config_vars = g diff --git a/lib-python/3/stat.py b/lib-python/3/stat.py --- a/lib-python/3/stat.py +++ b/lib-python/3/stat.py @@ -139,13 +139,21 @@ def filemode(mode): """Convert a file's mode to a string of the form '-rwxrwxrwx'.""" perm = [] + + # The first group gets a question mark if none of the bits match the mode. + empty = "?" + for table in _filemode_table: for bit, char in table: if mode & bit == bit: perm.append(char) break else: - perm.append("-") + perm.append(empty) + + # All the rest of the positions get a - if the bits don't match. + empty = "-" + return "".join(perm) diff --git a/lib-python/3/test/test_pyexpat.py b/lib-python/3/test/test_pyexpat.py --- a/lib-python/3/test/test_pyexpat.py +++ b/lib-python/3/test/test_pyexpat.py @@ -11,7 +11,7 @@ from xml.parsers import expat from xml.parsers.expat import errors -from test.support import sortdict +from test.support import sortdict, impl_detail class SetAttributeTest(unittest.TestCase): @@ -446,6 +446,7 @@ self.assertEqual(os.path.basename(entry[0]), filename) self.assertEqual(entry[2], funcname) + @impl_detail("PyPy does not have pyexpat.c", pypy=False) def test_exception(self): parser = expat.ParserCreate() parser.StartElementHandler = self.StartElementHandler diff --git a/lib-python/3/test/test_stat.py b/lib-python/3/test/test_stat.py --- a/lib-python/3/test/test_stat.py +++ b/lib-python/3/test/test_stat.py @@ -138,6 +138,10 @@ self.assertS_IS("REG", st_mode) self.assertEqual(modestr, '-r--r--r--') self.assertEqual(self.statmod.S_IMODE(st_mode), 0o444) + + # If there are only permission bits, no type bytes, a question + # mark is rendered in the type field. + self.assertEqual(self.statmod.filemode(0o420), '?r---w----') else: os.chmod(TESTFN, 0o700) st_mode, modestr = self.get_mode() diff --git a/lib-python/3/test/test_sysconfig.py b/lib-python/3/test/test_sysconfig.py --- a/lib-python/3/test/test_sysconfig.py +++ b/lib-python/3/test/test_sysconfig.py @@ -397,9 +397,16 @@ self.assertTrue('linux' in suffix, suffix) if re.match('(i[3-6]86|x86_64)$', machine): if ctypes.sizeof(ctypes.c_char_p()) == 4: - self.assertTrue(suffix.endswith('i386-linux-gnu.so') \ - or suffix.endswith('x86_64-linux-gnux32.so'), - suffix) + self.assertTrue( + suffix.endswith(( + 'i386-linux-gnu.so', + 'i486-linux-gnu.so', + 'i586-linux-gnu.so', + 'i686-linux-gnu.so', + 'x86_64-linux-gnux32.so', + )), + suffix, + ) else: # 8 byte pointer size self.assertTrue(suffix.endswith('x86_64-linux-gnu.so'), suffix) diff --git a/lib_pypy/_cffi_ssl/README.md b/lib_pypy/_cffi_ssl/README.md --- a/lib_pypy/_cffi_ssl/README.md +++ b/lib_pypy/_cffi_ssl/README.md @@ -5,9 +5,15 @@ it renames the compiled shared object to _pypy_openssl.so (which means that cryptography can ship their own cffi backend) -NOTE: currently, we have changed ``_cffi_src/openssl/callbacks.py`` to -not rely on the CPython C API, and ``_cffi_src/utils.py`` for issue #2575 -(29c9a89359e4). (The first change is now backported.) +NOTE: currently, we have the following changes: + +* ``_cffi_src/openssl/callbacks.py`` to not rely on the CPython C API + (this change is now backported) + +* ``_cffi_src/utils.py`` for issue #2575 (29c9a89359e4) + +* ``_cffi_src/openssl/x509_vfy.py`` for issue #2605 (ca4d0c90f5a1) + # Tests? diff --git a/lib_pypy/_cffi_ssl/_cffi_src/openssl/x509_vfy.py b/lib_pypy/_cffi_ssl/_cffi_src/openssl/x509_vfy.py --- a/lib_pypy/_cffi_ssl/_cffi_src/openssl/x509_vfy.py +++ b/lib_pypy/_cffi_ssl/_cffi_src/openssl/x509_vfy.py @@ -221,10 +221,16 @@ static const long X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM = 0; static const long X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED = 0; static const long X509_V_ERR_SUITE_B_CANNOT_SIGN_P_384_WITH_P_256 = 0; +#ifndef X509_V_ERR_HOSTNAME_MISMATCH static const long X509_V_ERR_HOSTNAME_MISMATCH = 0; +#endif +#ifndef X509_V_ERR_EMAIL_MISMATCH static const long X509_V_ERR_EMAIL_MISMATCH = 0; +#endif +#ifndef X509_V_ERR_IP_ADDRESS_MISMATCH static const long X509_V_ERR_IP_ADDRESS_MISMATCH = 0; #endif +#endif /* OpenSSL 1.0.2beta2+ verification parameters */ #if CRYPTOGRAPHY_OPENSSL_102BETA2_OR_GREATER && \ diff --git a/lib_pypy/_cffi_ssl/_stdssl/certificate.py b/lib_pypy/_cffi_ssl/_stdssl/certificate.py --- a/lib_pypy/_cffi_ssl/_stdssl/certificate.py +++ b/lib_pypy/_cffi_ssl/_stdssl/certificate.py @@ -173,14 +173,13 @@ return tuple(dn) -STATIC_BIO_BUF = ffi.new("char[]", 2048) - def _bio_get_str(biobuf): - length = lib.BIO_gets(biobuf, STATIC_BIO_BUF, len(STATIC_BIO_BUF)-1) + bio_buf = ffi.new("char[]", 2048) + length = lib.BIO_gets(biobuf, bio_buf, len(bio_buf)-1) if length < 0: if biobuf: lib.BIO_free(biobuf) raise ssl_error(None) - return _str_with_len(STATIC_BIO_BUF, length) + return _str_with_len(bio_buf, length) def _decode_certificate(certificate): retval = {} diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -82,7 +82,7 @@ return False def in_dll(self, dll, name): - return self.from_address(dll._handle.getaddressindll(name)) + return self.from_address(dll.__pypy_dll__.getaddressindll(name)) def from_buffer(self, obj, offset=0): size = self._sizeofinstances() diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py --- a/lib_pypy/_ctypes/function.py +++ b/lib_pypy/_ctypes/function.py @@ -430,7 +430,7 @@ ffires = restype.get_ffi_argtype() return _ffi.FuncPtr.fromaddr(ptr, '', ffiargs, ffires, self._flags_) - cdll = self.dll._handle + cdll = self.dll.__pypy_dll__ try: ffi_argtypes = [argtype.get_ffi_argtype() for argtype in argtypes] ffi_restype = restype.get_ffi_argtype() diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -40,6 +40,22 @@ else: rawfields.append((f[0], f[1]._ffishape_)) + # hack for duplicate field names + already_seen = set() + names1 = names + names = [] + for f in names1: + if f not in already_seen: + names.append(f) + already_seen.add(f) + already_seen = set() + for i in reversed(range(len(rawfields))): + if rawfields[i][0] in already_seen: + rawfields[i] = (('$DUP%d$%s' % (i, rawfields[i][0]),) + + rawfields[i][1:]) + already_seen.add(rawfields[i][0]) + # /hack + _set_shape(self, rawfields, self._is_union) fields = {} diff --git a/lib_pypy/_curses.py b/lib_pypy/_curses.py --- a/lib_pypy/_curses.py +++ b/lib_pypy/_curses.py @@ -411,7 +411,7 @@ val = lib.mvwget_wch(self._win, *args, wch) else: raise error("get_wch requires 0 or 2 arguments") - _check_ERR(val, "get_wch"): + _check_ERR(val, "get_wch") return wch[0] def getkey(self, *args): diff --git a/lib_pypy/_tkinter/tklib_build.py b/lib_pypy/_tkinter/tklib_build.py --- a/lib_pypy/_tkinter/tklib_build.py +++ b/lib_pypy/_tkinter/tklib_build.py @@ -22,12 +22,27 @@ linklibs = ['tcl', 'tk'] libdirs = [] else: - for _ver in ['', '8.6', '8.5', '']: + # On some Linux distributions, the tcl and tk libraries are + # stored in /usr/include, so we must check this case also + libdirs = [] + found = False + for _ver in ['', '8.6', '8.5']: incdirs = ['/usr/include/tcl' + _ver] linklibs = ['tcl' + _ver, 'tk' + _ver] - libdirs = [] if os.path.isdir(incdirs[0]): + found = True break + if not found: + for _ver in ['8.6', '8.5', '']: + incdirs = [] + linklibs = ['tcl' + _ver, 'tk' + _ver] + if os.path.isfile(''.join(['/usr/lib/lib', linklibs[1], '.so'])): + found = True + break + if not found: + sys.stderr.write("*** TCL libraries not found! Falling back...\n") + incdirs = [] + linklibs = ['tcl', 'tk'] config_ffi = FFI() config_ffi.cdef(""" diff --git a/lib_pypy/cffi/_cffi_include.h b/lib_pypy/cffi/_cffi_include.h --- a/lib_pypy/cffi/_cffi_include.h +++ b/lib_pypy/cffi/_cffi_include.h @@ -95,6 +95,7 @@ #define _cffi_from_c_ulong PyLong_FromUnsignedLong #define _cffi_from_c_longlong PyLong_FromLongLong #define _cffi_from_c_ulonglong PyLong_FromUnsignedLongLong +#define _cffi_from_c__Bool PyBool_FromLong #define _cffi_to_c_double PyFloat_AsDouble #define _cffi_to_c_float PyFloat_AsDouble diff --git a/lib_pypy/cffi/_embedding.h b/lib_pypy/cffi/_embedding.h --- a/lib_pypy/cffi/_embedding.h +++ b/lib_pypy/cffi/_embedding.h @@ -1,7 +1,12 @@ /***** Support code for embedding *****/ -#if defined(_MSC_VER) +#ifdef __cplusplus +extern "C" { +#endif + + +#if defined(_WIN32) # define CFFI_DLLEXPORT __declspec(dllexport) #elif defined(__GNUC__) # define CFFI_DLLEXPORT __attribute__((visibility("default"))) @@ -525,3 +530,7 @@ #undef cffi_compare_and_swap #undef cffi_write_barrier #undef cffi_read_barrier + +#ifdef __cplusplus +} +#endif diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -394,12 +394,17 @@ replace_with = ' ' + replace_with return self._backend.getcname(cdecl, replace_with) - def gc(self, cdata, destructor): + def gc(self, cdata, destructor, size=0): """Return a new cdata object that points to the same data. Later, when this new cdata object is garbage-collected, 'destructor(old_cdata_object)' will be called. + + The optional 'size' gives an estimate of the size, used to + trigger the garbage collection more eagerly. So far only used + on PyPy. It tells the GC that the returned object keeps alive + roughly 'size' bytes of external memory. """ - return self._backend.gcp(cdata, destructor) + return self._backend.gcp(cdata, destructor, size) def _get_cached_btype(self, type): assert self._lock.acquire(False) is False diff --git a/lib_pypy/cffi/backend_ctypes.py b/lib_pypy/cffi/backend_ctypes.py --- a/lib_pypy/cffi/backend_ctypes.py +++ b/lib_pypy/cffi/backend_ctypes.py @@ -1002,7 +1002,7 @@ _weakref_cache_ref = None - def gcp(self, cdata, destructor): + def gcp(self, cdata, destructor, size=0): if self._weakref_cache_ref is None: import weakref class MyRef(weakref.ref): diff --git a/lib_pypy/cffi/recompiler.py b/lib_pypy/cffi/recompiler.py --- a/lib_pypy/cffi/recompiler.py +++ b/lib_pypy/cffi/recompiler.py @@ -412,6 +412,9 @@ prnt(' }') prnt(' p[0] = (const void *)0x%x;' % self._version) prnt(' p[1] = &_cffi_type_context;') + prnt('#if PY_MAJOR_VERSION >= 3') + prnt(' return NULL;') + prnt('#endif') prnt('}') # on Windows, distutils insists on putting init_cffi_xyz in # 'export_symbols', so instead of fighting it, just give up and @@ -578,7 +581,7 @@ def _convert_expr_from_c(self, tp, var, context): if isinstance(tp, model.BasePrimitiveType): - if tp.is_integer_type(): + if tp.is_integer_type() and tp.name != '_Bool': return '_cffi_from_c_int(%s, %s)' % (var, tp.name) elif isinstance(tp, model.UnknownFloatType): return '_cffi_from_c_double(%s)' % (var,) diff --git a/lib_pypy/cffi/vengine_cpy.py b/lib_pypy/cffi/vengine_cpy.py --- a/lib_pypy/cffi/vengine_cpy.py +++ b/lib_pypy/cffi/vengine_cpy.py @@ -296,7 +296,7 @@ def _convert_expr_from_c(self, tp, var, context): if isinstance(tp, model.PrimitiveType): - if tp.is_integer_type(): + if tp.is_integer_type() and tp.name != '_Bool': return '_cffi_from_c_int(%s, %s)' % (var, tp.name) elif tp.name != 'long double': return '_cffi_from_c_%s(%s)' % (tp.name.replace(' ', '_'), var) @@ -872,6 +872,7 @@ #define _cffi_from_c_ulong PyLong_FromUnsignedLong #define _cffi_from_c_longlong PyLong_FromLongLong #define _cffi_from_c_ulonglong PyLong_FromUnsignedLongLong +#define _cffi_from_c__Bool PyBool_FromLong #define _cffi_to_c_double PyFloat_AsDouble #define _cffi_to_c_float PyFloat_AsDouble diff --git a/lib_pypy/pyrepl/reader.py b/lib_pypy/pyrepl/reader.py --- a/lib_pypy/pyrepl/reader.py +++ b/lib_pypy/pyrepl/reader.py @@ -239,6 +239,10 @@ def __init__(self, console): self.buffer = [] + # Enable the use of `insert` without a `prepare` call - necessary to + # facilitate the tab completion hack implemented for + # . + self.pos = 0 self.ps1 = "->> " self.ps2 = "/>> " self.ps3 = "|.. " diff --git a/lib_pypy/pyrepl/readline.py b/lib_pypy/pyrepl/readline.py --- a/lib_pypy/pyrepl/readline.py +++ b/lib_pypy/pyrepl/readline.py @@ -297,10 +297,7 @@ line = line.rstrip('\n') if isinstance(line, unicode): return line # on py3k - try: - return unicode(line, ENCODING) - except UnicodeDecodeError: # bah, silently fall back... - return unicode(line, 'utf-8', 'replace') + return unicode(line, 'utf-8', 'replace') def get_history_length(self): return self.saved_history_length @@ -317,7 +314,8 @@ # history item: we use \r\n instead of just \n. If the history # file is passed to GNU readline, the extra \r are just ignored. history = self.get_reader().history - f = open(os.path.expanduser(filename), 'r') + f = open(os.path.expanduser(filename), 'r', encoding='utf-8', + errors='replace') buffer = [] for line in f: if line.endswith('\r\n'): @@ -334,15 +332,12 @@ def write_history_file(self, filename='~/.history'): maxlength = self.saved_history_length history = self.get_reader().get_trimmed_history(maxlength) - f = open(os.path.expanduser(filename), 'w') + f = open(os.path.expanduser(filename), 'w', encoding='utf-8') for entry in history: # if we are on py3k, we don't need to encode strings before # writing it to a file if isinstance(entry, unicode) and sys.version_info < (3,): - try: - entry = entry.encode(ENCODING) - except UnicodeEncodeError: # bah, silently fall back... - entry = entry.encode('utf-8') + entry = entry.encode('utf-8') entry = entry.replace('\n', '\r\n') # multiline history support f.write(entry + '\n') f.close() diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py --- a/pypy/config/pypyoption.py +++ b/pypy/config/pypyoption.py @@ -39,7 +39,7 @@ "thread", "itertools", "pyexpat", "cpyext", "array", "binascii", "_multiprocessing", '_warnings', "_collections", "_multibytecodec", "_continuation", "_cffi_backend", - "_csv", "_pypyjson", "_posixsubprocess", # "cppyy", "micronumpy" + "_csv", "_pypyjson", "_posixsubprocess", # "_cppyy", "micronumpy" "_jitlog", ]) @@ -71,8 +71,10 @@ if name in translation_modules: translation_modules.remove(name) - if "cppyy" in working_modules: - working_modules.remove("cppyy") # not tested on win32 + if "_cppyy" in working_modules: + working_modules.remove("_cppyy") # not tested on win32 + if "_vmprof" in working_modules: + working_modules.remove("_vmprof") # FIXME: missing details # The _locale module is needed by site.py on Windows default_modules.add("_locale") @@ -81,8 +83,8 @@ working_modules.remove('fcntl') # LOCK_NB not defined working_modules.remove("_minimal_curses") working_modules.remove("termios") - if "cppyy" in working_modules: - working_modules.remove("cppyy") # depends on ctypes + if "_cppyy" in working_modules: + working_modules.remove("_cppyy") # depends on ctypes #if sys.platform.startswith("linux"): # _mach = os.popen('uname -m', 'r').read().strip() @@ -94,7 +96,7 @@ '_multiprocessing': [('objspace.usemodules.time', True), ('objspace.usemodules.thread', True)], 'cpyext': [('objspace.usemodules.array', True)], - 'cppyy': [('objspace.usemodules.cpyext', True)], + '_cppyy': [('objspace.usemodules.cpyext', True)], 'faulthandler': [('objspace.usemodules._vmprof', True)], } module_suggests = { @@ -227,11 +229,6 @@ "use specialised tuples", default=False), - BoolOption("withcelldict", - "use dictionaries that are optimized for being used as module dicts", - default=False, - requires=[("objspace.honor__builtins__", False)]), - BoolOption("withliststrategies", "enable optimized ways to store lists of primitives ", default=True), @@ -291,7 +288,7 @@ # extra optimizations with the JIT if level == 'jit': - config.objspace.std.suggest(withcelldict=True) + pass # none at the moment def enable_allworkingmodules(config): diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst --- a/pypy/doc/build.rst +++ b/pypy/doc/build.rst @@ -10,6 +10,18 @@ minutes on a fast machine -- and RAM-hungry. You will need **at least** 2 GB of memory on a 32-bit machine and 4GB on a 64-bit machine. +Before you start +---------------- + +Our normal development workflow avoids a full translation by using test-driven +development. You can read more about how to develop PyPy here_, and latest +translated (hopefully functional) binary packages are available on our +buildbot's `nightly builds`_ + +.. _here: getting-started-dev.html +.. _`nightly builds`: http://buildbot.pypy.org/nightly + +You will need the build dependencies below to run the tests. Clone the repository -------------------- @@ -140,22 +152,61 @@ Run the translation ------------------- +We usually translate in the ``pypy/goal`` directory, so all the following +commands assume your ``$pwd`` is there. + Translate with JIT:: - cd pypy/goal pypy ../../rpython/bin/rpython --opt=jit Translate without JIT:: - cd pypy/goal pypy ../../rpython/bin/rpython --opt=2 +Note this translates pypy via the ``targetpypystandalone.py`` file, so these +are shorthand for:: + + pypy ../../rpython/bin/rpython targetpypystandalone.py + +More help is availabe via ``--help`` at either option position, and more info +can be found in the :doc:`config/index` section. + (You can use ``python`` instead of ``pypy`` here, which will take longer but works too.) -If everything works correctly this will create an executable ``pypy-c`` in the -current directory. The executable behaves mostly like a normal Python -interpreter (see :doc:`cpython_differences`). +If everything works correctly this will: + +1. Run the rpython `translation chain`_, producing a database of the + entire pypy interpreter. This step is currently singe threaded, and RAM + hungry. As part of this step, the chain creates a large number of C code + files and a Makefile to compile them in a + directory controlled by the ``PYPY_USESSION_DIR`` environment variable. +2. Create an executable ``pypy-c`` by running the Makefile. This step can + utilize all possible cores on the machine. +3. Copy the needed binaries to the current directory. +4. Generate c-extension modules for any cffi-based stdlib modules. + + +The resulting executable behaves mostly like a normal Python +interpreter (see :doc:`cpython_differences`), and is ready for testing, for +use as a base interpreter for a new virtualenv, or for packaging into a binary +suitable for installation on another machine running the same OS as the build +machine. + +Note that step 4 is merely done as a convenience, any of the steps may be rerun +without rerunning the previous steps. + +.. _`translation chain`: https://rpython.readthedocs.io/en/latest/translation.html + + +Making a debug build of PyPy +---------------------------- + +If the Makefile is rerun with the lldebug or lldebug0 target, appropriate +compilation flags are added to add debug info and reduce compiler optimizations +to ``-O0`` respectively. If you stop in a debugger, you will see the +very wordy machine-generated C code from the rpython translation step, which +takes a little bit of reading to relate back to the rpython code. Build cffi import libraries for the stdlib ------------------------------------------ @@ -169,14 +220,6 @@ .. _`out-of-line API mode`: http://cffi.readthedocs.org/en/latest/overview.html#real-example-api-level-out-of-line -Translating with non-standard options -------------------------------------- - -It is possible to have non-standard features enabled for translation, -but they are not really tested any more. Look, for example, at the -:doc:`objspace proxies ` document. - - Packaging (preparing for installation) -------------------------------------- @@ -205,14 +248,16 @@ * PyPy 2.5.1 or earlier: normal users would see permission errors. Installers need to run ``pypy -c "import gdbm"`` and other similar - commands at install time; the exact list is in `package.py`_. Users + commands at install time; the exact list is in + :source:`pypy/tool/release/package.py `. Users seeing a broken installation of PyPy can fix it after-the-fact if they have sudo rights, by running once e.g. ``sudo pypy -c "import gdbm``. * PyPy 2.6 and later: anyone would get ``ImportError: no module named _gdbm_cffi``. Installers need to run ``pypy _gdbm_build.py`` in the ``lib_pypy`` directory during the installation process (plus others; - see the exact list in `package.py`_). Users seeing a broken + see the exact list in :source:`pypy/tool/release/package.py `). + Users seeing a broken installation of PyPy can fix it after-the-fact, by running ``pypy /path/to/lib_pypy/_gdbm_build.py``. This command produces a file called ``_gdbm_cffi.pypy-41.so`` locally, which is a C extension diff --git a/pypy/doc/config/objspace.std.withcelldict.txt b/pypy/doc/config/objspace.std.withcelldict.txt deleted file mode 100644 --- a/pypy/doc/config/objspace.std.withcelldict.txt +++ /dev/null @@ -1,2 +0,0 @@ -Enable cell-dicts. This optimization is not helpful without the JIT. In the -presence of the JIT, it greatly helps looking up globals. diff --git a/pypy/doc/configuration.rst b/pypy/doc/configuration.rst --- a/pypy/doc/configuration.rst +++ b/pypy/doc/configuration.rst @@ -188,4 +188,6 @@ can be found on the ``config`` attribute of all ``TranslationContext`` instances and are described in :source:`rpython/config/translationoption.py`. The interpreter options are attached to the object space, also under the name ``config`` and are -described in :source:`pypy/config/pypyoption.py`. +described in :source:`pypy/config/pypyoption.py`. Both set of options are +documented in the :doc:`config/index` section. + diff --git a/pypy/doc/cppyy.rst b/pypy/doc/cppyy.rst deleted file mode 100644 --- a/pypy/doc/cppyy.rst +++ /dev/null @@ -1,672 +0,0 @@ -cppyy: C++ bindings for PyPy -============================ - -The cppyy module delivers dynamic Python-C++ bindings. -It is designed for automation, high performance, scale, interactivity, and -handling all of modern C++ (11, 14, etc.). -It is based on `Cling`_ which, through `LLVM`_/`clang`_, provides C++ -reflection and interactivity. -Reflection information is extracted from C++ header files. -Cppyy itself is built into PyPy (an alternative exists for CPython), but -it requires a `backend`_, installable through pip, to interface with Cling. - -.. _Cling: https://root.cern.ch/cling -.. _LLVM: http://llvm.org/ -.. _clang: http://clang.llvm.org/ -.. _backend: https://pypi.python.org/pypi/PyPy-cppyy-backend - - -Installation ------------- - -This assumes PyPy2.7 v5.7 or later; earlier versions use a Reflex-based cppyy -module, which is no longer supported. -Both the tooling and user-facing Python codes are very backwards compatible, -however. -Further dependencies are cmake (for general build), Python2.7 (for LLVM), and -a modern C++ compiler (one that supports at least C++11). - -Assuming you have a recent enough version of PyPy installed, use pip to -complete the installation of cppyy:: - - $ MAKE_NPROCS=4 pypy-c -m pip install --verbose PyPy-cppyy-backend - -Set the number of parallel builds ('4' in this example, through the MAKE_NPROCS -environment variable) to a number appropriate for your machine. -The building process may take quite some time as it includes a customized -version of LLVM as part of Cling, which is why --verbose is recommended so that -you can see the build progress. - -The default installation will be under -$PYTHONHOME/site-packages/cppyy_backend/lib, -which needs to be added to your dynamic loader path (LD_LIBRARY_PATH). -If you need the dictionary and class map generation tools (used in the examples -below), you need to add $PYTHONHOME/site-packages/cppyy_backend/bin to your -executable path (PATH). - - -Basic bindings example ----------------------- - -These examples assume that cppyy_backend is pointed to by the environment -variable CPPYYHOME, and that CPPYYHOME/lib is added to LD_LIBRARY_PATH and -CPPYYHOME/bin to PATH. - -Let's first test with a trivial example whether all packages are properly -installed and functional. -Create a C++ header file with some class in it (all functions are made inline -for convenience; if you have out-of-line code, link with it as appropriate):: - - $ cat MyClass.h - class MyClass { - public: - MyClass(int i = -99) : m_myint(i) {} - - int GetMyInt() { return m_myint; } - void SetMyInt(int i) { m_myint = i; } - - public: - int m_myint; - }; - -Then, generate the bindings using ``genreflex`` (installed under -cppyy_backend/bin in site_packages), and compile the code:: - - $ genreflex MyClass.h - $ g++ -std=c++11 -fPIC -rdynamic -O2 -shared -I$CPPYYHOME/include MyClass_rflx.cpp -o libMyClassDict.so -L$CPPYYHOME/lib -lCling - -Next, make sure that the library can be found through the dynamic lookup path -(the ``LD_LIBRARY_PATH`` environment variable on Linux, ``PATH`` on Windows), -for example by adding ".". -Now you're ready to use the bindings. -Since the bindings are designed to look pythonistic, it should be -straightforward:: - - $ pypy-c - >>>> import cppyy - >>>> cppyy.load_reflection_info("libMyClassDict.so") - - >>>> myinst = cppyy.gbl.MyClass(42) - >>>> print myinst.GetMyInt() - 42 - >>>> myinst.SetMyInt(33) - >>>> print myinst.m_myint - 33 - >>>> myinst.m_myint = 77 - >>>> print myinst.GetMyInt() - 77 - >>>> help(cppyy.gbl.MyClass) # shows that normal python introspection works - -That's all there is to it! - - -Automatic class loader ----------------------- - -There is one big problem in the code above, that prevents its use in a (large -scale) production setting: the explicit loading of the reflection library. -Clearly, if explicit load statements such as these show up in code downstream -from the ``MyClass`` package, then that prevents the ``MyClass`` author from -repackaging or even simply renaming the dictionary library. - -The solution is to make use of an automatic class loader, so that downstream -code never has to call ``load_reflection_info()`` directly. -The class loader makes use of so-called rootmap files, which ``genreflex`` -can produce. -These files contain the list of available C++ classes and specify the library -that needs to be loaded for their use (as an aside, this listing allows for a -cross-check to see whether reflection info is generated for all classes that -you expect). -By convention, the rootmap files should be located next to the reflection info -libraries, so that they can be found through the normal shared library search -path. -They can be concatenated together, or consist of a single rootmap file per -library. -For example:: - - $ genreflex MyClass.h --rootmap=libMyClassDict.rootmap --rootmap-lib=libMyClassDict.so - $ g++ -std=c++11 -fPIC -rdynamic -O2 -shared -I$CPPYYHOME/include MyClass_rflx.cpp -o libMyClassDict.so -L$CPPYYHOME/lib -lCling - -where the first option (``--rootmap``) specifies the output file name, and the -second option (``--rootmap-lib``) the name of the reflection library where -``MyClass`` will live. -It is necessary to provide that name explicitly, since it is only in the -separate linking step where this name is fixed. -If the second option is not given, the library is assumed to be libMyClass.so, -a name that is derived from the name of the header file. - -With the rootmap file in place, the above example can be rerun without explicit -loading of the reflection info library:: - - $ pypy-c - >>>> import cppyy - >>>> myinst = cppyy.gbl.MyClass(42) - >>>> print myinst.GetMyInt() - 42 - >>>> # etc. ... - -As a caveat, note that the class loader is currently limited to classes only. - - -Advanced example ----------------- - -The following snippet of C++ is very contrived, to allow showing that such -pathological code can be handled and to show how certain features play out in -practice:: - - $ cat MyAdvanced.h - #include - - class Base1 { - public: - Base1(int i) : m_i(i) {} - virtual ~Base1() {} - int m_i; - }; - - class Base2 { - public: - Base2(double d) : m_d(d) {} - virtual ~Base2() {} - double m_d; - }; - - class C; - - class Derived : public virtual Base1, public virtual Base2 { - public: - Derived(const std::string& name, int i, double d) : Base1(i), Base2(d), m_name(name) {} - virtual C* gimeC() { return (C*)0; } - std::string m_name; - }; - - Base2* BaseFactory(const std::string& name, int i, double d) { - return new Derived(name, i, d); - } - -This code is still only in a header file, with all functions inline, for -convenience of the example. -If the implementations live in a separate source file or shared library, the -only change needed is to link those in when building the reflection library. - -If you were to run ``genreflex`` like above in the basic example, you will -find that not all classes of interest will be reflected, nor will be the -global factory function. -In particular, ``std::string`` will be missing, since it is not defined in -this header file, but in a header file that is included. -In practical terms, general classes such as ``std::string`` should live in a -core reflection set, but for the moment assume we want to have it in the -reflection library that we are building for this example. - -The ``genreflex`` script can be steered using a so-called `selection file`_ -(see "Generating Reflex Dictionaries") -which is a simple XML file specifying, either explicitly or by using a -pattern, which classes, variables, namespaces, etc. to select from the given -header file. -With the aid of a selection file, a large project can be easily managed: -simply ``#include`` all relevant headers into a single header file that is -handed to ``genreflex``. -In fact, if you hand multiple header files to ``genreflex``, then a selection -file is almost obligatory: without it, only classes from the last header will -be selected. -Then, apply a selection file to pick up all the relevant classes. -For our purposes, the following rather straightforward selection will do -(the name ``lcgdict`` for the root is historical, but required):: - - $ cat MyAdvanced.xml - - - - - - - -.. _selection file: https://root.cern.ch/how/how-use-reflex - -Now the reflection info can be generated and compiled:: - - $ genreflex MyAdvanced.h --selection=MyAdvanced.xml - $ g++ -std=c++11 -fPIC -rdynamic -O2 -shared -I$CPPYYHOME/include MyAdvanced_rflx.cpp -o libAdvExDict.so -L$CPPYYHOME/lib -lCling - -and subsequently be used from PyPy:: - - >>>> import cppyy - >>>> cppyy.load_reflection_info("libAdvExDict.so") - - >>>> d = cppyy.gbl.BaseFactory("name", 42, 3.14) - >>>> type(d) - - >>>> isinstance(d, cppyy.gbl.Base1) - True - >>>> isinstance(d, cppyy.gbl.Base2) - True - >>>> d.m_i, d.m_d - (42, 3.14) - >>>> d.m_name == "name" - True - >>>> - -Again, that's all there is to it! - -A couple of things to note, though. -If you look back at the C++ definition of the ``BaseFactory`` function, -you will see that it declares the return type to be a ``Base2``, yet the -bindings return an object of the actual type ``Derived``? -This choice is made for a couple of reasons. -First, it makes method dispatching easier: if bound objects are always their -most derived type, then it is easy to calculate any offsets, if necessary. -Second, it makes memory management easier: the combination of the type and -the memory address uniquely identifies an object. -That way, it can be recycled and object identity can be maintained if it is -entered as a function argument into C++ and comes back to PyPy as a return -value. -Last, but not least, casting is decidedly unpythonistic. -By always providing the most derived type known, casting becomes unnecessary. -For example, the data member of ``Base2`` is simply directly available. -Note also that the unreflected ``gimeC`` method of ``Derived`` does not -preclude its use. -It is only the ``gimeC`` method that is unusable as long as class ``C`` is -unknown to the system. - - -Features --------- - -The following is not meant to be an exhaustive list, since cppyy is still -under active development. -Furthermore, the intention is that every feature is as natural as possible on -the python side, so if you find something missing in the list below, simply -try it out. -It is not always possible to provide exact mapping between python and C++ -(active memory management is one such case), but by and large, if the use of a -feature does not strike you as obvious, it is more likely to simply be a bug. -That is a strong statement to make, but also a worthy goal. -For the C++ side of the examples, refer to this :doc:`example code `, which was -bound using:: - - $ genreflex example.h --deep --rootmap=libexampleDict.rootmap --rootmap-lib=libexampleDict.so - $ g++ -std=c++11 -fPIC -rdynamic -O2 -shared -I$CPPYYHOME/include example_rflx.cpp -o libexampleDict.so -L$CPPYYHOME/lib -lCling - -* **abstract classes**: Are represented as python classes, since they are - needed to complete the inheritance hierarchies, but will raise an exception - if an attempt is made to instantiate from them. - Example:: - - >>>> from cppyy.gbl import AbstractClass, ConcreteClass - >>>> a = AbstractClass() - Traceback (most recent call last): - File "", line 1, in - TypeError: cannot instantiate abstract class 'AbstractClass' - >>>> issubclass(ConcreteClass, AbstractClass) - True - >>>> c = ConcreteClass() - >>>> isinstance(c, AbstractClass) - True - >>>> - -* **arrays**: Supported for builtin data types only, as used from module - ``array``. - Out-of-bounds checking is limited to those cases where the size is known at - compile time (and hence part of the reflection info). - Example:: - - >>>> from cppyy.gbl import ConcreteClass - >>>> from array import array - >>>> c = ConcreteClass() - >>>> c.array_method(array('d', [1., 2., 3., 4.]), 4) - 1 2 3 4 - >>>> - -* **builtin data types**: Map onto the expected equivalent python types, with - the caveat that there may be size differences, and thus it is possible that - exceptions are raised if an overflow is detected. - -* **casting**: Is supposed to be unnecessary. - Object pointer returns from functions provide the most derived class known - in the hierarchy of the object being returned. - This is important to preserve object identity as well as to make casting, - a pure C++ feature after all, superfluous. - Example:: - - >>>> from cppyy.gbl import AbstractClass, ConcreteClass - >>>> c = ConcreteClass() - >>>> ConcreteClass.show_autocast.__doc__ - 'AbstractClass* ConcreteClass::show_autocast()' - >>>> d = c.show_autocast() - >>>> type(d) - - >>>> - - However, if need be, you can perform C++-style reinterpret_casts (i.e. - without taking offsets into account), by taking and rebinding the address - of an object:: - - >>>> from cppyy import addressof, bind_object - >>>> e = bind_object(addressof(d), AbstractClass) - >>>> type(e) - - >>>> - -* **classes and structs**: Get mapped onto python classes, where they can be - instantiated as expected. - If classes are inner classes or live in a namespace, their naming and - location will reflect that. - Example:: - - >>>> from cppyy.gbl import ConcreteClass, Namespace - >>>> ConcreteClass == Namespace.ConcreteClass - False - >>>> n = Namespace.ConcreteClass.NestedClass() - >>>> type(n) - - >>>> - -* **data members**: Public data members are represented as python properties - and provide read and write access on instances as expected. - Private and protected data members are not accessible. - Example:: - - >>>> from cppyy.gbl import ConcreteClass - >>>> c = ConcreteClass() - >>>> c.m_int - 42 - >>>> - -* **default arguments**: C++ default arguments work as expected, but python - keywords are not supported. - It is technically possible to support keywords, but for the C++ interface, - the formal argument names have no meaning and are not considered part of the - API, hence it is not a good idea to use keywords. - Example:: - - >>>> from cppyy.gbl import ConcreteClass - >>>> c = ConcreteClass() # uses default argument - >>>> c.m_int - 42 - >>>> c = ConcreteClass(13) - >>>> c.m_int - 13 - >>>> - -* **doc strings**: The doc string of a method or function contains the C++ - arguments and return types of all overloads of that name, as applicable. - Example:: - - >>>> from cppyy.gbl import ConcreteClass - >>>> print ConcreteClass.array_method.__doc__ - void ConcreteClass::array_method(int*, int) - void ConcreteClass::array_method(double*, int) - >>>> - -* **enums**: Are translated as ints with no further checking. - -* **functions**: Work as expected and live in their appropriate namespace - (which can be the global one, ``cppyy.gbl``). - -* **inheritance**: All combinations of inheritance on the C++ (single, - multiple, virtual) are supported in the binding. - However, new python classes can only use single inheritance from a bound C++ - class. - Multiple inheritance would introduce two "this" pointers in the binding. - This is a current, not a fundamental, limitation. - The C++ side will not see any overridden methods on the python side, as - cross-inheritance is planned but not yet supported. - Example:: - - >>>> from cppyy.gbl import ConcreteClass - >>>> help(ConcreteClass) - Help on class ConcreteClass in module __main__: - - class ConcreteClass(AbstractClass) - | Method resolution order: - | ConcreteClass - | AbstractClass - | cppyy.CPPObject - | __builtin__.CPPInstance - | __builtin__.object - | - | Methods defined here: - | - | ConcreteClass(self, *args) - | ConcreteClass::ConcreteClass(const ConcreteClass&) - | ConcreteClass::ConcreteClass(int) - | ConcreteClass::ConcreteClass() - | - etc. .... - -* **memory**: C++ instances created by calling their constructor from python - are owned by python. - You can check/change the ownership with the _python_owns flag that every - bound instance carries. - Example:: - - >>>> from cppyy.gbl import ConcreteClass - >>>> c = ConcreteClass() - >>>> c._python_owns # True: object created in Python - True - >>>> - -* **methods**: Are represented as python methods and work as expected. - They are first class objects and can be bound to an instance. - Virtual C++ methods work as expected. - To select a specific virtual method, do like with normal python classes - that override methods: select it from the class that you need, rather than - calling the method on the instance. - To select a specific overload, use the __dispatch__ special function, which - takes the name of the desired method and its signature (which can be - obtained from the doc string) as arguments. - -* **namespaces**: Are represented as python classes. - Namespaces are more open-ended than classes, so sometimes initial access may - result in updates as data and functions are looked up and constructed - lazily. - Thus the result of ``dir()`` on a namespace shows the classes available, - even if they may not have been created yet. - It does not show classes that could potentially be loaded by the class - loader. - Once created, namespaces are registered as modules, to allow importing from - them. - Namespace currently do not work with the class loader. - Fixing these bootstrap problems is on the TODO list. - The global namespace is ``cppyy.gbl``. - -* **NULL**: Is represented as ``cppyy.gbl.nullptr``. - In C++11, the keyword ``nullptr`` is used to represent ``NULL``. - For clarity of intent, it is recommended to use this instead of ``None`` - (or the integer ``0``, which can serve in some cases), as ``None`` is better - understood as ``void`` in C++. - -* **operator conversions**: If defined in the C++ class and a python - equivalent exists (i.e. all builtin integer and floating point types, as well - as ``bool``), it will map onto that python conversion. - Note that ``char*`` is mapped onto ``__str__``. - Example:: - - >>>> from cppyy.gbl import ConcreteClass - >>>> print ConcreteClass() - Hello operator const char*! - >>>> - -* **operator overloads**: If defined in the C++ class and if a python - equivalent is available (not always the case, think e.g. of ``operator||``), - then they work as expected. - Special care needs to be taken for global operator overloads in C++: first, - make sure that they are actually reflected, especially for the global - overloads for ``operator==`` and ``operator!=`` of STL vector iterators in - the case of gcc (note that they are not needed to iterate over a vector). - Second, make sure that reflection info is loaded in the proper order. - I.e. that these global overloads are available before use. - -* **pointers**: For builtin data types, see arrays. - For objects, a pointer to an object and an object looks the same, unless - the pointer is a data member. - In that case, assigning to the data member will cause a copy of the pointer - and care should be taken about the object's life time. - If a pointer is a global variable, the C++ side can replace the underlying - object and the python side will immediately reflect that. - -* **PyObject***: Arguments and return types of ``PyObject*`` can be used, and - passed on to CPython API calls. - Since these CPython-like objects need to be created and tracked (this all - happens through ``cpyext``) this interface is not particularly fast. - -* **static data members**: Are represented as python property objects on the - class and the meta-class. - Both read and write access is as expected. - -* **static methods**: Are represented as python's ``staticmethod`` objects - and can be called both from the class as well as from instances. - -* **strings**: The std::string class is considered a builtin C++ type and - mixes quite well with python's str. - Python's str can be passed where a ``const char*`` is expected, and an str - will be returned if the return type is ``const char*``. - -* **templated classes**: Are represented in a meta-class style in python. - This may look a little bit confusing, but conceptually is rather natural. - For example, given the class ``std::vector``, the meta-class part would - be ``std.vector``. - Then, to get the instantiation on ``int``, do ``std.vector(int)`` and to - create an instance of that class, do ``std.vector(int)()``:: - - >>>> import cppyy - >>>> cppyy.load_reflection_info('libexampleDict.so') - >>>> cppyy.gbl.std.vector # template metatype - - >>>> cppyy.gbl.std.vector(int) # instantiates template -> class - '> - >>>> cppyy.gbl.std.vector(int)() # instantiates class -> object - <__main__.std::vector object at 0x00007fe480ba4bc0> - >>>> - - Note that templates can be build up by handing actual types to the class - instantiation (as done in this vector example), or by passing in the list of - template arguments as a string. - The former is a lot easier to work with if you have template instantiations - using classes that themselves are templates in the arguments (think e.g a - vector of vectors). - All template classes must already exist in the loaded reflection info, they - do not work (yet) with the class loader. - - For compatibility with other bindings generators, use of square brackets - instead of parenthesis to instantiate templates is supported as well. - -* **templated functions**: Automatically participate in overloading and are - used in the same way as other global functions. - -* **templated methods**: For now, require an explicit selection of the - template parameters. - This will be changed to allow them to participate in overloads as expected. - -* **typedefs**: Are simple python references to the actual classes to which - they refer. - -* **unary operators**: Are supported if a python equivalent exists, and if the - operator is defined in the C++ class. - -You can always find more detailed examples and see the full of supported -features by looking at the tests in pypy/module/cppyy/test. - -If a feature or reflection info is missing, this is supposed to be handled -gracefully. -In fact, there are unit tests explicitly for this purpose (even as their use -becomes less interesting over time, as the number of missing features -decreases). -Only when a missing feature is used, should there be an exception. -For example, if no reflection info is available for a return type, then a -class that has a method with that return type can still be used. -Only that one specific method can not be used. - - -Templates ---------- - -Templates can be automatically instantiated, assuming the appropriate header -files have been loaded or are accessible to the class loader. -This is the case for example for all of STL. -For example:: - - $ cat MyTemplate.h - #include - - class MyClass { - public: - MyClass(int i = -99) : m_i(i) {} - MyClass(const MyClass& s) : m_i(s.m_i) {} - MyClass& operator=(const MyClass& s) { m_i = s.m_i; return *this; } - ~MyClass() {} - int m_i; - }; - -Run the normal ``genreflex`` and compilation steps:: - - $ genreflex MyTemplate.h --selection=MyTemplate.xml - $ g++ -std=c++11 -fPIC -rdynamic -O2 -shared -I$CPPYYHOME/include MyTemplate_rflx.cpp -o libTemplateDict.so -L$CPPYYHOME/lib -lCling - -Subsequent use should be as expected. -Note the meta-class style of "instantiating" the template:: - - >>>> import cppyy - >>>> cppyy.load_reflection_info("libTemplateDict.so") - >>>> std = cppyy.gbl.std - >>>> MyClass = cppyy.gbl.MyClass - >>>> v = std.vector(MyClass)() - >>>> v += [MyClass(1), MyClass(2), MyClass(3)] - >>>> for m in v: - .... print m.m_i, - .... - 1 2 3 - >>>> - -The arguments to the template instantiation can either be a string with the -full list of arguments, or the explicit classes. -The latter makes for easier code writing if the classes passed to the -instantiation are themselves templates. - - -The fast lane -------------- - -By default, cppyy will use direct function pointers through `CFFI`_ whenever -possible. If this causes problems for you, you can disable it by setting the -CPPYY_DISABLE_FASTPATH environment variable. - -.. _CFFI: https://cffi.readthedocs.io/en/latest/ - - -CPython -------- - -Most of the ideas in cppyy come originally from the `PyROOT`_ project, which -contains a CPython-based cppyy.py module (with similar dependencies as the -one that comes with PyPy). -A standalone pip-installable version is planned, but for now you can install -ROOT through your favorite distribution installer (available in the science -section). - -.. _PyROOT: https://root.cern.ch/pyroot - -There are a couple of minor differences between the two versions of cppyy -(the CPython version has a few more features). -Work is on-going to integrate the nightly tests of both to make sure their -feature sets are equalized. - - -Python3 -------- - -The CPython version of cppyy supports Python3, assuming your packager has -build the backend for it. -The cppyy module has not been tested with the `Py3k`_ version of PyPy. -Note that the generated reflection information (from ``genreflex``) is fully -independent of Python, and does not need to be rebuild when switching versions -or interpreters. - -.. _Py3k: https://bitbucket.org/pypy/pypy/src/py3k - - -.. toctree:: - :hidden: - - cppyy_example diff --git a/pypy/doc/cppyy_example.rst b/pypy/doc/cppyy_example.rst deleted file mode 100644 --- a/pypy/doc/cppyy_example.rst +++ /dev/null @@ -1,59 +0,0 @@ -File example.h -============== - -:: - - #include - #include - - class AbstractClass { - public: - virtual ~AbstractClass() {} - virtual void abstract_method() = 0; - }; - - class ConcreteClass : AbstractClass { - public: - ConcreteClass(int n=42) : m_int(n) {} - ~ConcreteClass() {} - - virtual void abstract_method() { - std::cout << "called concrete method" << std::endl; - } - - void array_method(int* ad, int size) { - for (int i=0; i < size; ++i) - std::cout << ad[i] << ' '; - std::cout << std::endl; - } - - void array_method(double* ad, int size) { - for (int i=0; i < size; ++i) - std::cout << ad[i] << ' '; - std::cout << std::endl; - } - - AbstractClass* show_autocast() { - return this; - } - - operator const char*() { - return "Hello operator const char*!"; - } - - public: - int m_int; - }; - - namespace Namespace { - - class ConcreteClass { - public: - class NestedClass { - public: - std::vector m_v; - }; - - }; - - } // namespace Namespace diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -337,6 +337,8 @@ - ``frozenset`` (empty frozenset only) + - unbound method objects (for Python 2 only) + This change requires some changes to ``id`` as well. ``id`` fulfills the following condition: ``x is y <=> id(x) == id(y)``. Therefore ``id`` of the above types will return a value that is computed from the argument, and can diff --git a/pypy/doc/extending.rst b/pypy/doc/extending.rst --- a/pypy/doc/extending.rst +++ b/pypy/doc/extending.rst @@ -12,7 +12,7 @@ * Write them in pure Python and use ctypes_. -* Write them in C++ and bind them through :doc:`cppyy ` using Cling. +* Write them in C++ and bind them through cppyy_ using Cling. * Write them as `RPython mixed modules`_. @@ -61,29 +61,22 @@ .. _libffi: http://sourceware.org/libffi/ -Cling and cppyy ---------------- +cppyy +----- -The builtin :doc:`cppyy ` module uses reflection information, provided by -`Cling`_ (which needs to be `installed separately`_), of C/C++ code to -automatically generate bindings at runtime. -In Python, classes and functions are always runtime structures, so when they -are generated matters not for performance. -However, if the backend itself is capable of dynamic behavior, it is a much -better functional match, allowing tighter integration and more natural -language mappings. +For C++, _cppyy_ is an automated bindings generator available for both +PyPy and CPython. +_cppyy_ relies on declarations from C++ header files to dynamically +construct Python equivalent classes, functions, variables, etc. +It is designed for use by large scale programs and supports modern C++. +With PyPy, it leverages the built-in ``_cppyy`` module, allowing the JIT to +remove most of the cross-language overhead. -The :doc:`cppyy ` module is written in RPython, thus PyPy's JIT is able to remove -most cross-language call overhead. +To install, run ``pip install cppyy``. +Further details are available in the `full documentation`_. -:doc:Full details are `available here `. +.. _`full documentation`: https://cppyy.readthedocs.org/ -.. _installed separately: https://pypi.python.org/pypi/PyPy-cppyy-backend -.. _Cling: https://root.cern.ch/cling - -.. toctree:: - - cppyy RPython Mixed Modules --------------------- diff --git a/pypy/doc/getting-started-dev.rst b/pypy/doc/getting-started-dev.rst --- a/pypy/doc/getting-started-dev.rst +++ b/pypy/doc/getting-started-dev.rst @@ -35,8 +35,8 @@ * Edit things. Use ``hg diff`` to see what you changed. Use ``hg add`` to make Mercurial aware of new files you added, e.g. new test files. - Use ``hg status`` to see if there are such files. Run tests! (See - the rest of this page.) + Use ``hg status`` to see if there are such files. Write and run tests! + (See the rest of this page.) * Commit regularly with ``hg commit``. A one-line commit message is fine. We love to have tons of commits; make one as soon as you have @@ -113,6 +113,10 @@ make sure you have the correct version installed which you can find out with the ``--version`` switch. +You will need the `build requirements`_ to run tests successfully, since many of +them compile little pieces of PyPy and then run the tests inside that minimal +interpreter + Now on to running some tests. PyPy has many different test directories and you can use shell completion to point at directories or files:: @@ -141,7 +145,7 @@ .. _py.test testing tool: http://pytest.org .. _py.test usage and invocations: http://pytest.org/latest/usage.html#usage - +.. _`build requirements`: build.html#install-build-time-dependencies Special Introspection Features of the Untranslated Python Interpreter --------------------------------------------------------------------- diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst --- a/pypy/doc/how-to-release.rst +++ b/pypy/doc/how-to-release.rst @@ -40,6 +40,9 @@ sure things are ported back to the trunk and to the branch as necessary. +* Maybe bump the SOABI number in module/imp/importing. This has many + implications, so make sure the PyPy community agrees to the change. + * Update and write documentation * update pypy/doc/contributor.rst (and possibly LICENSE) diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -5,6 +5,14 @@ .. this is a revision shortly after release-pypy2.7-v5.8.0 .. startrev: 558bd00b3dd8 +In previous versions of PyPy, ``instance.method`` would return always +the same bound method object, when gotten out of the same instance (as +far as ``is`` and ``id()`` can tell). CPython doesn't do that. Now +PyPy, like CPython, returns a different bound method object every time. +For ``type.method``, PyPy2 still returns always the same *unbound* +method object; CPython does it for built-in types but not for +user-defined types. + .. branch: cffi-complex .. branch: cffi-char16-char32 @@ -25,3 +33,43 @@ .. branch: cpyext-hash_notimpl If ``tp_hash`` is ``PyObject_HashNotImplemented``, set ``obj.__dict__['__hash__']`` to None + +.. branch: cppyy-packaging + +Renaming of ``cppyy`` to ``_cppyy``. +The former is now an external package installable with ``pip install cppyy``. + +.. branch: Enable_PGO_for_clang + +.. branch: nopax + +At the end of translation, run ``attr -q -s pax.flags -V m`` on +PAX-enabled systems on the produced binary. This seems necessary +because PyPy uses a JIT. + +.. branch: pypy_bytearray + +Improve ``bytearray`` performance (backported from py3.5) + +.. branch: gc-del-limit-growth + +Fix the bounds in the GC when allocating a lot of objects with finalizers, +fixes issue #2590 + +.. branch: arrays-force-less + +Small improvement to optimize list accesses with constant indexes better by +throwing away information about them less eagerly. + + +.. branch: getarrayitem-into-bridges + +More information is retained into a bridge: knowledge about the content of +arrays (at fixed indices) is stored in guards (and thus available at the +beginning of bridges). Also, some better feeding of information about known +fields of constant objects into bridges. + +.. branch: cpyext-leakchecking + +Add support for leakfinder in cpyext tests (disabled for now, due to too many +failures). diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py --- a/pypy/interpreter/argument.py +++ b/pypy/interpreter/argument.py @@ -2,6 +2,7 @@ Arguments objects. """ from rpython.rlib.debug import make_sure_not_resized +from rpython.rlib.objectmodel import not_rpython from rpython.rlib import jit from rpython.rlib.objectmodel import enforceargs from rpython.rlib.rstring import StringBuilder @@ -48,8 +49,8 @@ # behaviour but produces better error messages self.methodcall = methodcall + @not_rpython def __repr__(self): - """ NOT_RPYTHON """ name = self.__class__.__name__ if not self.keywords: return '%s(%s)' % (name, self.arguments_w,) diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1,4 +1,5 @@ import sys +import py from rpython.rlib.cache import Cache from rpython.tool.uid import HUGEVAL_BYTES @@ -1271,8 +1272,22 @@ self.setitem(w_globals, w_key, self.builtin) return statement.exec_code(self, w_globals, w_locals) + @not_rpython + def appdef(self, source): + '''Create interp-level function object from app-level source. + + The source should be in the same format as for space.appexec(): + """(foo, bar): return 'baz'""" + ''' + source = source.lstrip() + assert source.startswith('('), "incorrect header in:\n%s" % (source,) + source = py.code.Source("def anonymous%s\n" % source) + w_glob = self.newdict(module=True) + self.exec_(str(source), w_glob, w_glob) + return self.getitem(w_glob, self.newtext('anonymous')) + @specialize.arg(2) - def appexec(self, posargs_w, source): + def appexec(self, posargs_w, source, cache=True): """ return value from executing given source at applevel. The source must look like '''(x, y): @@ -1280,7 +1295,11 @@ return result ''' """ - w_func = self.fromcache(AppExecCache).getorbuild(source) + if cache: + w_func = self.fromcache(AppExecCache).getorbuild(source) + else: + # NB: since appdef() is not-RPython, using cache=False also is. + w_func = self.appdef(source) args = Arguments(self, list(posargs_w)) return self.call_args(w_func, args) @@ -1817,15 +1836,7 @@ class AppExecCache(SpaceCache): @not_rpython def build(cache, source): - space = cache.space - # XXX will change once we have our own compiler - import py - source = source.lstrip() - assert source.startswith('('), "incorrect header in:\n%s" % (source,) - source = py.code.Source("def anonymous%s\n" % source) - w_glob = space.newdict(module=True) - space.exec_(str(source), w_glob, w_glob) - return space.getitem(w_glob, space.newtext('anonymous')) + return cache.space.appdef(source) # Table describing the regular part of the interface of object spaces, diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py --- a/pypy/interpreter/error.py +++ b/pypy/interpreter/error.py @@ -7,7 +7,7 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import we_are_translated, specialize -from rpython.rlib.objectmodel import dont_inline +from rpython.rlib.objectmodel import dont_inline, not_rpython from rpython.rlib import rstack, rstackovf from rpython.rlib import rwin32 from rpython.rlib import runicode @@ -65,8 +65,9 @@ self.match(space, space.w_KeyboardInterrupt)) # note: an extra case is added in OpErrFmtNoArgs + @not_rpython def __str__(self): - "NOT_RPYTHON: Convenience for tracebacks." + "Convenience for tracebacks." s = self._w_value space = getattr(self.w_type, 'space', None) if space is not None: @@ -119,15 +120,16 @@ if RECORD_INTERPLEVEL_TRACEBACK: self.debug_excs.append(sys.exc_info()) + @not_rpython def print_application_traceback(self, space, file=None): - "NOT_RPYTHON: Dump a standard application-level traceback." + "Dump a standard application-level traceback." if file is None: file = sys.stderr self.print_app_tb_only(file) print >> file, self.errorstr(space) + @not_rpython def print_app_tb_only(self, file): - "NOT_RPYTHON" tb = self._application_traceback if tb: import linecache @@ -154,8 +156,9 @@ print >> file, l tb = tb.next + @not_rpython def print_detailed_traceback(self, space=None, file=None): - """NOT_RPYTHON: Dump a nice detailed interpreter- and + """Dump a nice detailed interpreter- and application-level traceback, useful to debug the interpreter.""" if file is None: file = sys.stderr diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py --- a/pypy/interpreter/executioncontext.py +++ b/pypy/interpreter/executioncontext.py @@ -1,6 +1,7 @@ import sys from pypy.interpreter.error import OperationError, get_cleared_operation_error from rpython.rlib.unroll import unrolling_iterable +from rpython.rlib.objectmodel import specialize, not_rpython from rpython.rlib import jit, rgc, objectmodel TICK_COUNTER_STEP = 100 @@ -410,8 +411,9 @@ # to run at the next possible bytecode self.reset_ticker(-1) + @not_rpython def register_periodic_action(self, action, use_bytecode_counter): - """NOT_RPYTHON: + """ Register the PeriodicAsyncAction action to be called whenever the tick counter becomes smaller than 0. If 'use_bytecode_counter' is True, make sure that we decrease the tick counter at every bytecode. diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py --- a/pypy/interpreter/function.py +++ b/pypy/interpreter/function.py @@ -516,8 +516,9 @@ def __init__(self, space, w_function, w_instance): self.space = space + assert w_instance is not None # unbound methods only exist in Python 2 self.w_function = w_function - self.w_instance = w_instance # or None + self.w_instance = w_instance def descr_method__new__(space, w_subtype, w_function, w_instance): if space.is_w(w_instance, space.w_None): @@ -577,24 +578,6 @@ return space.w_False return space.newbool(space.eq_w(self.w_function, w_other.w_function)) - def is_w(self, space, other): - if not isinstance(other, Method): - return False - return (self.w_instance is other.w_instance and - self.w_function is other.w_function) - - def immutable_unique_id(self, space): - from pypy.objspace.std.util import IDTAG_METHOD as tag - from pypy.objspace.std.util import IDTAG_SHIFT - if self.w_instance is not None: - id = space.bigint_w(space.id(self.w_instance)) - id = id.lshift(LONG_BIT) - else: - id = rbigint.fromint(0) - id = id.or_(space.bigint_w(space.id(self.w_function))) - id = id.lshift(IDTAG_SHIFT).int_or_(tag) - return space.newlong_from_rbigint(id) - def descr_method_hash(self): space = self.space w_result = space.hash(self.w_function) @@ -606,15 +589,16 @@ from pypy.interpreter.gateway import BuiltinCode w_mod = space.getbuiltinmodule('_pickle_support') mod = space.interp_w(MixedModule, w_mod) - w_instance = self.w_instance or space.w_None + w_instance = self.w_instance w_function = self.w_function if (isinstance(w_function, Function) and isinstance(w_function.code, BuiltinCode)): new_inst = mod.get('builtin_method_new') tup = [w_instance, space.newtext(w_function.name)] else: - new_inst = mod.get('method_new') - tup = [self.w_function, w_instance] + w_builtins = space.getbuiltinmodule('builtins') + new_inst = space.getattr(w_builtins, space.newtext('getattr')) + tup = [w_instance, space.newunicode(w_function.getname(space))] return space.newtuple([new_inst, space.newtuple(tup)]) diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py --- a/pypy/interpreter/gateway.py +++ b/pypy/interpreter/gateway.py @@ -23,7 +23,7 @@ DescrMismatch) from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.function import ClassMethod, FunctionWithFixedCode -from rpython.rlib.objectmodel import we_are_translated +from rpython.rlib.objectmodel import we_are_translated, not_rpython from rpython.rlib.rarithmetic import r_longlong, r_int, r_ulonglong, r_uint from rpython.tool.sourcetools import func_with_new_name, compile2 @@ -75,8 +75,8 @@ def _freeze_(self): return True + @not_rpython def unwrap(self, space, w_value): - """NOT_RPYTHON""" raise NotImplementedError @@ -399,8 +399,8 @@ class BuiltinActivation(object): _immutable_ = True + @not_rpython def __init__(self, behavior): From pypy.commits at gmail.com Wed Aug 30 07:54:53 2017 From: pypy.commits at gmail.com (stevie_92) Date: Wed, 30 Aug 2017 04:54:53 -0700 (PDT) Subject: [pypy-commit] pypy cpyext-gc-trialdeletion: Implemented non-incremental cycle detection, removed simple trial deletion Message-ID: <59a6a78d.48badf0a.f1519.e126@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-trialdeletion Changeset: r92281:2fb71fc31ef4 Date: 2017-08-06 13:48 +0200 http://bitbucket.org/pypy/pypy/changeset/2fb71fc31ef4/ Log: Implemented non-incremental cycle detection, removed simple trial deletion diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py --- a/pypy/module/cpyext/pyobject.py +++ b/pypy/module/cpyext/pyobject.py @@ -15,6 +15,10 @@ from rpython.rlib.objectmodel import keepalive_until_here from rpython.rtyper.annlowlevel import llhelper from rpython.rlib import rawrefcount +from rpython.rlib.rawrefcount import ( + REFCNT_MASK, REFCNT_FROM_PYPY, REFCNT_OVERFLOW, REFCNT_CYCLE_BUFFERED, + REFCNT_CLR_MASK, REFCNT_CLR_GREEN, REFCNT_CLR_PURPLE, + W_MARKER_DEALLOCATING) from rpython.rlib.debug import fatalerror, debug_print from pypy.module.cpyext.api import slot_function from pypy.module.cpyext.typeobjectdefs import visitproc @@ -190,9 +194,6 @@ py_obj.c_ob_refcnt += rawrefcount.REFCNT_FROM_PYPY rawrefcount.create_link_pypy(w_obj, py_obj) - -w_marker_deallocating = W_Root() - def from_ref(space, ref): """ Finds the interpreter object corresponding to the given reference. If the @@ -203,7 +204,7 @@ return None w_obj = rawrefcount.to_obj(W_Root, ref) if w_obj is not None: - if w_obj is not w_marker_deallocating: + if w_obj is not W_MARKER_DEALLOCATING: return w_obj fatalerror( "*** Invalid usage of a dying CPython object ***\n" @@ -250,7 +251,7 @@ def pyobj_has_w_obj(pyobj): w_obj = rawrefcount.to_obj(W_Root, pyobj) - return w_obj is not None and w_obj is not w_marker_deallocating + return w_obj is not None and w_obj is not W_MARKER_DEALLOCATING def is_pyobj(x): @@ -270,27 +271,6 @@ hop.exception_cannot_occur() return hop.inputconst(lltype.Bool, hop.s_result.const) -def _decref(pyobj): - if pyobj.c_ob_refcnt & rawrefcount.REFCNT_OVERFLOW == 0: - pyobj.c_ob_refcnt -= 1 - else: - if pyobj.c_ob_refcnt & rawrefcount.REFCNT_MASK \ - == rawrefcount.REFCNT_OVERFLOW: - pyobj.c_ob_refcnt -= 1 - elif rawrefcount.overflow_sub(pyobj): - pyobj.c_ob_refcnt -= 1 - -def _incref(pyobj): - if pyobj.c_ob_refcnt & rawrefcount.REFCNT_OVERFLOW == 0: - pyobj.c_ob_refcnt += 1 - else: - if pyobj.c_ob_refcnt & rawrefcount.REFCNT_MASK \ - == rawrefcount.REFCNT_OVERFLOW: - pyobj.c_ob_refcnt += 1 - rawrefcount.overflow_new(pyobj) - else: - rawrefcount.overflow_add(pyobj) - @specialize.ll() def make_ref(space, obj, w_userdata=None): """Increment the reference counter of the PyObject and return it. @@ -301,7 +281,7 @@ else: pyobj = as_pyobj(space, obj, w_userdata) if pyobj: - _incref(pyobj) + rawrefcount.incref(pyobj) if not is_pyobj(obj): keepalive_until_here(obj) return pyobj @@ -321,7 +301,7 @@ w_obj = obj pyobj = as_pyobj(space, w_obj) if pyobj: - _decref(pyobj) + rawrefcount.decref(pyobj) keepalive_until_here(w_obj) return w_obj @@ -334,122 +314,30 @@ if is_pyobj(obj): obj = rffi.cast(PyObject, obj) if obj: - _decref(obj) - - if obj.c_ob_refcnt & rawrefcount.REFCNT_MASK == 0 and \ - rawrefcount.get_trialdeletion_phase() != 1: - if obj.c_ob_refcnt & rawrefcount.REFCNT_FROM_PYPY == 0: + rawrefcount.decref(obj) + rc = obj.c_ob_refcnt + if (rc & REFCNT_MASK == 0): + if (rc & REFCNT_FROM_PYPY == 0 and + rc & REFCNT_CLR_MASK != REFCNT_CLR_PURPLE): _Py_Dealloc(space, obj) - elif obj.c_ob_refcnt & rawrefcount.REFCNT_CLR_GREEN == 0: - if rawrefcount.get_trialdeletion_phase() == 0: - trial_delete(space, obj) + elif (rc & REFCNT_CLR_MASK != REFCNT_CLR_GREEN): + possible_root(space, obj) else: get_w_obj_and_decref(space, obj) - at specialize.ll() -def refcnt_overflow(space, obj): - if is_pyobj(obj): - pyobj = rffi.cast(PyObject, obj) - else: - pyobj = as_pyobj(space, obj, None) - if pyobj: - if (pyobj.c_ob_refcnt & rawrefcount.REFCNT_MASK == - rawrefcount.REFCNT_OVERFLOW): - return rawrefcount.REFCNT_OVERFLOW - else: - return (pyobj.c_ob_refcnt & rawrefcount.REFCNT_MASK) \ - + rawrefcount.overflow_get(pyobj) - return 0 - -def traverse(space, obj, visit): - from pypy.module.cpyext.api import generic_cpy_call - if obj.c_ob_type and obj.c_ob_type.c_tp_traverse: - generic_cpy_call(space, obj.c_ob_type.c_tp_traverse, obj, visit, - rffi.cast(rffi.VOIDP, obj)) - -def clear(space, obj): - from pypy.module.cpyext.api import generic_cpy_call - if obj.c_ob_type: - generic_cpy_call(space, obj.c_ob_type.c_tp_clear, obj) - - at slot_function([PyObject, rffi.VOIDP], rffi.INT_real, error=-1) -def visit_decref(space, obj, args): - _decref(obj) - debug_print("visited dec", obj, "new refcnt", obj.c_ob_refcnt) - if (obj not in rawrefcount.get_visited()): - rawrefcount.add_visited(obj) - from pypy.module.cpyext.slotdefs import llslot - traverse(space, obj, rffi.cast(visitproc, llslot(space, visit_decref))) - return 0 - - at slot_function([PyObject, rffi.VOIDP], rffi.INT_real, error=-1) -def visit_incref(space, obj, args): - _incref(obj) - debug_print("visited inc", obj, "new refcnt", obj.c_ob_refcnt) - if (obj not in rawrefcount.get_visited()): - rawrefcount.add_visited(obj) - from pypy.module.cpyext.slotdefs import llslot - traverse(space, obj, rffi.cast(visitproc, llslot(space, visit_incref))) - return 0 - - at specialize.ll() -def trial_delete(space, obj): +def possible_root(space, obj): + debug_print("possible root", obj) + rc = obj.c_ob_refcnt if not obj.c_ob_type or not obj.c_ob_type.c_tp_traverse: - obj.c_ob_refcnt = obj.c_ob_refcnt | rawrefcount.REFCNT_CLR_GREEN - return - - from pypy.module.cpyext.slotdefs import llslot - visitproc_incref = rffi.cast(visitproc, llslot(space, visit_incref)) - visitproc_decref = rffi.cast(visitproc, llslot(space, visit_decref)) - - rawrefcount.set_trialdeletion_phase(1) - - debug_print("trial_delete", obj, "refct after decref", obj.c_ob_refcnt) - - debug_print("decref phase") - rawrefcount.clear_visited() - rawrefcount.add_visited(obj) - traverse(space, obj, visitproc_decref) - - debug_print("checkref phase") - visited = [] - alive = [] - for visited_obj in rawrefcount.get_visited(): - visited.append(visited_obj) - if visited_obj.c_ob_refcnt != 0 and \ - visited_obj.c_ob_refcnt != rawrefcount.REFCNT_FROM_PYPY: - alive.append(visited_obj) - debug_print("alive", visited_obj) - - debug_print("incref phase") - rawrefcount.clear_visited() - for alive_obj in alive: - if alive_obj not in rawrefcount.get_visited(): - rawrefcount.add_visited(alive_obj) - traverse(space, alive_obj, visitproc_incref) - - alive = [] - for alive_obj in rawrefcount.get_visited(): - debug_print("alive", alive_obj, alive_obj.c_ob_refcnt) - alive.append(alive_obj) - - for reachable_obj in visited: - if reachable_obj not in rawrefcount.get_visited(): - rawrefcount.add_visited(reachable_obj) - traverse(space, reachable_obj, visitproc_incref) - - debug_print("clear phase") - rawrefcount.set_trialdeletion_phase(2) - - for reachable_obj in visited: - if reachable_obj not in alive: - if reachable_obj.c_ob_refcnt < rawrefcount.REFCNT_FROM_PYPY \ - and reachable_obj.c_ob_refcnt > 0: - debug_print("clear", reachable_obj) - clear(space, reachable_obj) - - rawrefcount.set_trialdeletion_phase(0) - rawrefcount.clear_visited() + debug_print("mark green", obj) + rc = rc & ~REFCNT_CLR_MASK | REFCNT_CLR_GREEN + elif rc & REFCNT_CLR_MASK != REFCNT_CLR_PURPLE: + rc = rc & ~REFCNT_CLR_MASK | REFCNT_CLR_PURPLE + if rc & REFCNT_CYCLE_BUFFERED == 0: + debug_print("mark purple", obj) + rawrefcount.buffer_pyobj(obj) + rc = rc | REFCNT_CYCLE_BUFFERED + obj.c_ob_refcnt = rc @cpython_api([PyObject], lltype.Void) def Py_IncRef(space, obj): @@ -463,6 +351,20 @@ def _Py_RefCnt_Overflow(space, obj): return refcnt_overflow(space, obj) + at specialize.ll() +def refcnt_overflow(space, obj): + if is_pyobj(obj): + pyobj = rffi.cast(PyObject, obj) + else: + pyobj = as_pyobj(space, obj, None) + if pyobj: + if (pyobj.c_ob_refcnt & REFCNT_MASK == REFCNT_OVERFLOW): + return REFCNT_OVERFLOW + else: + return (pyobj.c_ob_refcnt & REFCNT_MASK) + \ + rawrefcount.overflow_get(pyobj) + return 0 + @cpython_api([PyObject], lltype.Void) def _Py_NewReference(space, obj): obj.c_ob_refcnt = 1 @@ -477,7 +379,7 @@ pto = obj.c_ob_type #print >>sys.stderr, "Calling dealloc slot", pto.c_tp_dealloc, "of", obj, \ # "'s type which is", rffi.charp2str(pto.c_tp_name) - rawrefcount.mark_deallocating(w_marker_deallocating, obj) + rawrefcount.mark_deallocating(W_MARKER_DEALLOCATING, obj) generic_cpy_call(space, pto.c_tp_dealloc, obj) @cpython_api([rffi.VOIDP], lltype.Signed, error=CANNOT_FAIL) diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -73,6 +73,8 @@ from rpython.rlib.debug import ll_assert, debug_print, debug_start, debug_stop from rpython.rlib.objectmodel import specialize from rpython.memory.gc.minimarkpage import out_of_memory +from pypy.module.cpyext.api import slot_function, PyObject +from rpython.rtyper.lltypesystem import rffi # # Handles the objects in 2 generations: @@ -187,6 +189,105 @@ ('forw', llmemory.Address)) FORWARDSTUBPTR = lltype.Ptr(FORWARDSTUB) NURSARRAY = lltype.Array(llmemory.Address) +VISIT_FUNCTYPE = rffi.CCallback([PyObject, rffi.VOIDP], + rffi.INT_real) + +def traverse(obj, func_ptr): + from pypy.module.cpyext.api import generic_cpy_call + from pypy.module.cpyext.typeobjectdefs import visitproc + if obj.c_ob_type and obj.c_ob_type.c_tp_traverse: + visitproc_ptr = rffi.cast(visitproc, func_ptr) + generic_cpy_call(True, obj.c_ob_type.c_tp_traverse, obj, + visitproc_ptr, rffi.cast(rffi.VOIDP, obj)) + +def visit_mark_gray(obj, args): + from rpython.rlib.rawrefcount import (REFCNT_CLR_GREEN, + REFCNT_CLR_MASK, + decref) + decref(obj) + rc = obj.c_ob_refcnt + if rc & REFCNT_CLR_MASK != REFCNT_CLR_GREEN: + mark_gray_recursive(obj) + return rffi.cast(rffi.INT_real, 0) + +def mark_gray_recursive(obj): + from rpython.rlib.rawrefcount import (REFCNT_CLR_GRAY, + REFCNT_CLR_MASK) + from rpython.rtyper.annlowlevel import llhelper + debug_print("mark_gray_recursive", obj) + rc = obj.c_ob_refcnt + if rc & REFCNT_CLR_MASK != REFCNT_CLR_GRAY: + obj.c_ob_refcnt = obj.c_ob_refcnt & ~REFCNT_CLR_MASK | REFCNT_CLR_GRAY + func_ptr = llhelper(VISIT_FUNCTYPE, visit_mark_gray) + traverse(obj, func_ptr) + +def visit_scan_black(obj, args): + from rpython.rlib.rawrefcount import (REFCNT_CLR_BLACK, + REFCNT_CLR_MASK, + REFCNT_CLR_GREEN, + incref) + incref(obj) + rc = obj.c_ob_refcnt + if (rc & REFCNT_CLR_MASK != REFCNT_CLR_BLACK and + rc & REFCNT_CLR_MASK != REFCNT_CLR_GREEN): + scan_black_recursive(obj) + return rffi.cast(rffi.INT_real, 0) + +def scan_black_recursive(obj): + from rpython.rlib.rawrefcount import (REFCNT_CLR_BLACK, + REFCNT_CLR_MASK) + from rpython.rtyper.annlowlevel import llhelper + debug_print("scan_black_recursive", obj) + rc = obj.c_ob_refcnt + obj.c_ob_refcnt = rc & ~REFCNT_CLR_MASK | REFCNT_CLR_BLACK + func_ptr = llhelper(VISIT_FUNCTYPE, visit_scan_black) + traverse(obj, func_ptr) + +def visit_scan(obj, args): + scan_recursive(obj) + return rffi.cast(rffi.INT_real, 0) + +def scan_recursive(obj): + from rpython.rlib.rawrefcount import (REFCNT_CLR_WHITE, + REFCNT_CLR_GRAY, + REFCNT_CLR_GREEN, + REFCNT_CLR_MASK, + REFCNT_MASK) + from rpython.rtyper.annlowlevel import llhelper + debug_print("scan_recursive", obj) + rc = obj.c_ob_refcnt + if (rc & REFCNT_CLR_MASK == REFCNT_CLR_GRAY or + rc & REFCNT_CLR_MASK == REFCNT_CLR_GREEN): + if rc & REFCNT_MASK > 0 and rc & REFCNT_CLR_MASK != REFCNT_CLR_GREEN: + scan_black_recursive(obj) + else: + obj.c_ob_refcnt = rc & ~REFCNT_CLR_MASK | REFCNT_CLR_WHITE + func_ptr = llhelper(VISIT_FUNCTYPE, visit_scan) + traverse(obj, func_ptr) + +def visit_collect_white(obj, args): + collect_white_recursive(obj) + return rffi.cast(rffi.INT_real, 0) + +def collect_white_recursive(obj): + from rpython.rlib.rawrefcount import (REFCNT_CLR_WHITE, + REFCNT_CLR_BLACK, + REFCNT_CLR_MASK, + REFCNT_CYCLE_BUFFERED, + REFCNT_FROM_PYPY) + from pypy.module.cpyext.api import generic_cpy_call + from rpython.rtyper.annlowlevel import llhelper + debug_print("collect_white_recursive", obj) + rc = obj.c_ob_refcnt + if (rc & REFCNT_CLR_MASK == REFCNT_CLR_WHITE and + rc & REFCNT_CYCLE_BUFFERED == 0): + obj.c_ob_refcnt = rc & ~REFCNT_CLR_MASK | REFCNT_CLR_BLACK + func_ptr = llhelper(VISIT_FUNCTYPE, visit_collect_white) + traverse(obj, func_ptr) + if (rc & REFCNT_FROM_PYPY == 0 and + obj.c_ob_type and obj.c_ob_type.c_tp_free): + debug_print("free", obj) + generic_cpy_call(True, obj.c_ob_type.c_tp_free, obj) # ____________________________________________________________ @@ -1685,6 +1786,7 @@ # # visit the P list from rawrefcount, if enabled. if self.rrc_enabled: + self.rrc_collect_cycles() # TODO only for testing self.rrc_minor_collection_trace() # # visit the "probably young" objects with finalizers. They @@ -2303,6 +2405,7 @@ self.visit_all_objects() # if self.rrc_enabled: + self.rrc_collect_cycles() self.rrc_major_collection_trace() # ll_assert(not (self.probably_young_objects_with_finalizers @@ -2901,13 +3004,13 @@ _ADDRARRAY = lltype.Array(llmemory.Address, hints={'nolength': True}) PYOBJ_HDR = lltype.Struct('GCHdr_PyObject', - ('ob_refcnt', lltype.Signed), - ('ob_pypy_link', lltype.Signed)) + ('c_ob_refcnt', lltype.Signed), + ('c_ob_pypy_link', lltype.Signed)) PYOBJ_HDR_PTR = lltype.Ptr(PYOBJ_HDR) RAWREFCOUNT_DEALLOC_TRIGGER = lltype.Ptr(lltype.FuncType([], lltype.Void)) def _pyobj(self, pyobjaddr): - return llmemory.cast_adr_to_ptr(pyobjaddr, self.PYOBJ_HDR_PTR) + return llmemory.cast_adr_to_ptr(pyobjaddr, lltype.Ptr(PyObject.TO)) def rawrefcount_init(self, dealloc_trigger_callback): # see pypy/doc/discussion/rawrefcount.rst @@ -2916,6 +3019,7 @@ self.rrc_p_list_old = self.AddressStack() self.rrc_o_list_young = self.AddressStack() self.rrc_o_list_old = self.AddressStack() + self.rrc_buffered = self.AddressStack() self.rrc_p_dict = self.AddressDict() # non-nursery keys only self.rrc_p_dict_nurs = self.AddressDict() # nursery keys only self.rrc_dealloc_trigger_callback = dealloc_trigger_callback @@ -2937,7 +3041,7 @@ ll_assert(self.rrc_enabled, "rawrefcount.init not called") obj = llmemory.cast_ptr_to_adr(gcobj) objint = llmemory.cast_adr_to_int(obj, "symbolic") - self._pyobj(pyobject).ob_pypy_link = objint + self._pyobj(pyobject).c_ob_pypy_link = objint # lst = self.rrc_p_list_young if self.is_in_nursery(obj): @@ -2957,14 +3061,17 @@ else: self.rrc_o_list_old.append(pyobject) objint = llmemory.cast_adr_to_int(obj, "symbolic") - self._pyobj(pyobject).ob_pypy_link = objint + self._pyobj(pyobject).c_ob_pypy_link = objint # there is no rrc_o_dict + def rawrefcount_buffer_pyobj(self, pyobject): + self.rrc_buffered.append(pyobject) + def rawrefcount_mark_deallocating(self, gcobj, pyobject): ll_assert(self.rrc_enabled, "rawrefcount.init not called") obj = llmemory.cast_ptr_to_adr(gcobj) # should be a prebuilt obj objint = llmemory.cast_adr_to_int(obj, "symbolic") - self._pyobj(pyobject).ob_pypy_link = objint + self._pyobj(pyobject).c_ob_pypy_link = objint def rawrefcount_from_obj(self, gcobj): obj = llmemory.cast_ptr_to_adr(gcobj) @@ -2975,7 +3082,7 @@ return dct.get(obj) def rawrefcount_to_obj(self, pyobject): - obj = llmemory.cast_int_to_adr(self._pyobj(pyobject).ob_pypy_link) + obj = llmemory.cast_int_to_adr(self._pyobj(pyobject).c_ob_pypy_link) return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF) def rawrefcount_next_dead(self): @@ -2996,15 +3103,13 @@ self.singleaddr) def _rrc_minor_trace(self, pyobject, singleaddr): - from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY - from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT + from rpython.rlib.rawrefcount import REFCNT_MASK # - rc = self._pyobj(pyobject).ob_refcnt - if rc == REFCNT_FROM_PYPY or rc == REFCNT_FROM_PYPY_LIGHT: + rc = self._pyobj(pyobject).c_ob_refcnt + if rc & REFCNT_MASK == 0: pass # the corresponding object may die else: - # force the corresponding object to be alive - intobj = self._pyobj(pyobject).ob_pypy_link + intobj = self._pyobj(pyobject).c_ob_pypy_link singleaddr.address[0] = llmemory.cast_int_to_adr(intobj) self._trace_drag_out1(singleaddr) @@ -3021,14 +3126,14 @@ no_o_dict) def _rrc_minor_free(self, pyobject, surviving_list, surviving_dict): - intobj = self._pyobj(pyobject).ob_pypy_link + intobj = self._pyobj(pyobject).c_ob_pypy_link obj = llmemory.cast_int_to_adr(intobj) if self.is_in_nursery(obj): if self.is_forwarded(obj): # Common case: survives and moves obj = self.get_forwarding_address(obj) intobj = llmemory.cast_adr_to_int(obj, "symbolic") - self._pyobj(pyobject).ob_pypy_link = intobj + self._pyobj(pyobject).c_ob_pypy_link = intobj surviving = True if surviving_dict: # Surviving nursery object: was originally in @@ -3059,23 +3164,24 @@ def _rrc_free(self, pyobject): from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT + from rpython.rlib.rawrefcount import REFCNT_MASK # - rc = self._pyobj(pyobject).ob_refcnt + rc = self._pyobj(pyobject).c_ob_refcnt if rc >= REFCNT_FROM_PYPY_LIGHT: rc -= REFCNT_FROM_PYPY_LIGHT - if rc == 0: + if rc & REFCNT_MASK == 0: lltype.free(self._pyobj(pyobject), flavor='raw') else: # can only occur if LIGHT is used in create_link_pyobj() - self._pyobj(pyobject).ob_refcnt = rc - self._pyobj(pyobject).ob_pypy_link = 0 + self._pyobj(pyobject).c_ob_refcnt = rc + self._pyobj(pyobject).c_ob_pypy_link = 0 else: ll_assert(rc >= REFCNT_FROM_PYPY, "refcount underflow?") ll_assert(rc < int(REFCNT_FROM_PYPY_LIGHT * 0.99), "refcount underflow from REFCNT_FROM_PYPY_LIGHT?") rc -= REFCNT_FROM_PYPY - self._pyobj(pyobject).ob_pypy_link = 0 - if rc == 0: + self._pyobj(pyobject).c_ob_pypy_link = 0 + if rc & REFCNT_MASK == 0: self.rrc_dealloc_pending.append(pyobject) # an object with refcnt == 0 cannot stay around waiting # for its deallocator to be called. Some code (lxml) @@ -3086,22 +3192,62 @@ # because after a Py_INCREF()/Py_DECREF() on it, its # tp_dealloc is also called! rc = 1 - self._pyobj(pyobject).ob_refcnt = rc + self._pyobj(pyobject).c_ob_refcnt = rc _rrc_free._always_inline_ = True + def rrc_collect_cycles(self): + self.rrc_buffered.foreach(self._rrc_cycle_mark_roots, None) + self.rrc_buffered.foreach(self._rrc_cycle_scan_roots, None) + self.rrc_buffered.foreach(self._rrc_cycle_collect_roots, None) + + def _rrc_cycle_mark_roots(self, pyobject, ignore): + from pypy.module.cpyext.api import generic_cpy_call + from rpython.rlib.rawrefcount import (REFCNT_CYCLE_BUFFERED, + REFCNT_CLR_MASK, + REFCNT_CLR_PURPLE, + REFCNT_MASK, + W_MARKER_DEALLOCATING, + mark_deallocating) + obj = self._pyobj(pyobject) + rc = obj.c_ob_refcnt + debug_print("_rrc_cycle_mark_roots", obj) + if rc & REFCNT_CLR_MASK == REFCNT_CLR_PURPLE and \ + rc & REFCNT_MASK > 0: + mark_gray_recursive(obj) + else: + obj.c_ob_refcnt = rc & ~REFCNT_CYCLE_BUFFERED + self.rrc_buffered.remove(pyobject) + if rc & REFCNT_MASK == 0: + mark_deallocating(W_MARKER_DEALLOCATING, obj) + generic_cpy_call(True, obj.c_ob_type.c_tp_dealloc, obj) + + def _rrc_cycle_scan_roots(self, pyobject, ignore): + obj = self._pyobj(pyobject) + debug_print("_rrc_cycle_scan_roots", obj) + scan_recursive(obj) + + def _rrc_cycle_collect_roots(self, pyobject, ignore): + from rpython.rlib.rawrefcount import REFCNT_CYCLE_BUFFERED + obj = self._pyobj(pyobject) + debug_print("_rrc_cycle_collect_roots", obj) + self.rrc_buffered.remove(pyobject) + obj.c_ob_refcnt = obj.c_ob_refcnt & ~REFCNT_CYCLE_BUFFERED + collect_white_recursive(obj) + def rrc_major_collection_trace(self): self.rrc_p_list_old.foreach(self._rrc_major_trace, None) def _rrc_major_trace(self, pyobject, ignore): - from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY - from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT + from rpython.rlib.rawrefcount import (REFCNT_FROM_PYPY, + REFCNT_FROM_PYPY_LIGHT, + REFCNT_MASK) # - rc = self._pyobj(pyobject).ob_refcnt - if rc == REFCNT_FROM_PYPY or rc == REFCNT_FROM_PYPY_LIGHT: + rc = self._pyobj(pyobject).c_ob_refcnt + if rc & REFCNT_MASK == 0: pass # the corresponding object may die else: # force the corresponding object to be alive - intobj = self._pyobj(pyobject).ob_pypy_link + intobj = self._pyobj(pyobject).c_ob_pypy_link obj = llmemory.cast_int_to_adr(intobj) self.objects_to_trace.append(obj) self.visit_all_objects() @@ -3131,7 +3277,7 @@ # This is true if the obj has one of the following two flags: # * GCFLAG_VISITED: was seen during tracing # * GCFLAG_NO_HEAP_PTRS: immortal object never traced (so far) - intobj = self._pyobj(pyobject).ob_pypy_link + intobj = self._pyobj(pyobject).c_ob_pypy_link obj = llmemory.cast_int_to_adr(intobj) if self.header(obj).tid & (GCFLAG_VISITED | GCFLAG_NO_HEAP_PTRS): surviving_list.append(pyobject) diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -4,6 +4,7 @@ from rpython.memory.gc.test.test_direct import BaseDirectGCTest from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT +from pypy.module.cpyext.api import PyObject, PyTypeObject PYOBJ_HDR = IncrementalMiniMarkGC.PYOBJ_HDR PYOBJ_HDR_PTR = IncrementalMiniMarkGC.PYOBJ_HDR_PTR @@ -56,21 +57,22 @@ self._collect(major=False) p1 = self.stackroots.pop() p1ref = lltype.cast_opaque_ptr(llmemory.GCREF, p1) - r1 = lltype.malloc(PYOBJ_HDR, flavor='raw', immortal=create_immortal) - r1.ob_refcnt = rc - r1.ob_pypy_link = 0 + r1 = lltype.malloc(PyObject.TO, flavor='raw', immortal=create_immortal) + r1.c_ob_refcnt = rc + r1.c_ob_pypy_link = 0 + r1.c_ob_type = lltype.nullptr(PyTypeObject) r1addr = llmemory.cast_ptr_to_adr(r1) if is_pyobj: assert not is_light self.gc.rawrefcount_create_link_pyobj(p1ref, r1addr) else: self.gc.rawrefcount_create_link_pypy(p1ref, r1addr) - assert r1.ob_refcnt == rc - assert r1.ob_pypy_link != 0 + assert r1.c_ob_refcnt == rc + assert r1.c_ob_pypy_link != 0 def check_alive(extra_refcount): - assert r1.ob_refcnt == rc + extra_refcount - assert r1.ob_pypy_link != 0 + assert r1.c_ob_refcnt == rc + extra_refcount + assert r1.c_ob_pypy_link != 0 p1ref = self.gc.rawrefcount_to_obj(r1addr) p1 = lltype.cast_opaque_ptr(lltype.Ptr(S), p1ref) assert p1.x == intval @@ -87,13 +89,13 @@ p2 = self.malloc(S) p2.x = 84 p2ref = lltype.cast_opaque_ptr(llmemory.GCREF, p2) - r2 = lltype.malloc(PYOBJ_HDR, flavor='raw') - r2.ob_refcnt = 1 - r2.ob_pypy_link = 0 + r2 = lltype.malloc(PyObject.TO, flavor='raw') + r2.c_ob_refcnt = 1 + r2.c_ob_pypy_link = 0 r2addr = llmemory.cast_ptr_to_adr(r2) # p2 and r2 are not linked - assert r1.ob_pypy_link != 0 - assert r2.ob_pypy_link == 0 + assert r1.c_ob_pypy_link != 0 + assert r2.c_ob_pypy_link == 0 assert self.gc.rawrefcount_from_obj(p1ref) == r1addr assert self.gc.rawrefcount_from_obj(p2ref) == llmemory.NULL assert self.gc.rawrefcount_to_obj(r1addr) == p1ref @@ -106,16 +108,16 @@ p1, p1ref, r1, r1addr, check_alive = ( self._rawrefcount_pair(42, is_light=True, create_old=old)) check_alive(0) - r1.ob_refcnt += 1 + r1.c_ob_refcnt += 1 self._collect(major=False) check_alive(+1) self._collect(major=True) check_alive(+1) - r1.ob_refcnt -= 1 + r1.c_ob_refcnt -= 1 self._collect(major=False) p1 = check_alive(0) self._collect(major=True) - py.test.raises(RuntimeError, "r1.ob_refcnt") # dead + py.test.raises(RuntimeError, "r1.c_ob_refcnt") # dead py.test.raises(RuntimeError, "p1.x") # dead self.gc.check_no_more_rawrefcount_state() assert self.trigger == [] @@ -129,7 +131,7 @@ if old: check_alive(0) self._collect(major=True) - py.test.raises(RuntimeError, "r1.ob_refcnt") # dead + py.test.raises(RuntimeError, "r1.c_ob_refcnt") # dead py.test.raises(RuntimeError, "p1.x") # dead self.gc.check_no_more_rawrefcount_state() @@ -147,7 +149,7 @@ check_alive(0) assert p1.x == 42 self._collect(major=True) - py.test.raises(RuntimeError, "r1.ob_refcnt") # dead + py.test.raises(RuntimeError, "r1.c_ob_refcnt") # dead py.test.raises(RuntimeError, "p1.x") # dead self.gc.check_no_more_rawrefcount_state() @@ -164,18 +166,18 @@ p1, p1ref, r1, r1addr, check_alive = ( self._rawrefcount_pair(42, is_light=False, create_old=old)) check_alive(0) - r1.ob_refcnt += 1 + r1.c_ob_refcnt += 1 self._collect(major=False) check_alive(+1) self._collect(major=True) check_alive(+1) - r1.ob_refcnt -= 1 + r1.c_ob_refcnt -= 1 self._collect(major=False) p1 = check_alive(0) self._collect(major=True, expected_trigger=1) py.test.raises(RuntimeError, "p1.x") # dead - assert r1.ob_refcnt == 1 # in the pending list - assert r1.ob_pypy_link == 0 + assert r1.c_ob_refcnt == 1 # in the pending list + assert r1.c_ob_pypy_link == 0 assert self.gc.rawrefcount_next_dead() == r1addr assert self.gc.rawrefcount_next_dead() == llmemory.NULL assert self.gc.rawrefcount_next_dead() == llmemory.NULL @@ -197,8 +199,8 @@ assert p1.x == 42 self._collect(major=True, expected_trigger=1) py.test.raises(RuntimeError, "p1.x") # dead - assert r1.ob_refcnt == 1 - assert r1.ob_pypy_link == 0 + assert r1.c_ob_refcnt == 1 + assert r1.c_ob_pypy_link == 0 assert self.gc.rawrefcount_next_dead() == r1addr self.gc.check_no_more_rawrefcount_state() lltype.free(r1, flavor='raw') @@ -214,8 +216,8 @@ else: self._collect(major=False, expected_trigger=1) py.test.raises(RuntimeError, "p1.x") # dead - assert r1.ob_refcnt == 1 - assert r1.ob_pypy_link == 0 + assert r1.c_ob_refcnt == 1 + assert r1.c_ob_pypy_link == 0 assert self.gc.rawrefcount_next_dead() == r1addr self.gc.check_no_more_rawrefcount_state() lltype.free(r1, flavor='raw') @@ -232,10 +234,10 @@ p1, p1ref, r1, r1addr, check_alive = ( self._rawrefcount_pair(42, is_pyobj=True, force_external=external)) check_alive(0) - r1.ob_refcnt += 1 # the pyobject is kept alive + r1.c_ob_refcnt += 1 # the pyobject is kept alive self._collect(major=False) - assert r1.ob_refcnt == 1 # refcnt dropped to 1 - assert r1.ob_pypy_link == 0 # detached + assert r1.c_ob_refcnt == 1 # refcnt dropped to 1 + assert r1.c_ob_pypy_link == 0 # detached self.gc.check_no_more_rawrefcount_state() lltype.free(r1, flavor='raw') @@ -252,8 +254,8 @@ self._collect(major=True, expected_trigger=1) else: self._collect(major=False, expected_trigger=1) - assert r1.ob_refcnt == 1 # refcnt 1, in the pending list - assert r1.ob_pypy_link == 0 # detached + assert r1.c_ob_refcnt == 1 # refcnt 1, in the pending list + assert r1.c_ob_pypy_link == 0 # detached assert self.gc.rawrefcount_next_dead() == r1addr self.gc.check_no_more_rawrefcount_state() lltype.free(r1, flavor='raw') @@ -277,8 +279,8 @@ assert self.trigger == [] self._collect(major=True, expected_trigger=1) py.test.raises(RuntimeError, "p1.x") # dead - assert r1.ob_refcnt == 1 - assert r1.ob_pypy_link == 0 + assert r1.c_ob_refcnt == 1 + assert r1.c_ob_pypy_link == 0 assert self.gc.rawrefcount_next_dead() == r1addr self.gc.check_no_more_rawrefcount_state() lltype.free(r1, flavor='raw') @@ -289,3 +291,10 @@ check_alive(0) self._collect(major=True) check_alive(0) + + def test_cycle(self): + p1, p1ref, r1, r1addr, check_alive = ( + self._rawrefcount_pair(42, is_pyobj=True)) + self.gc.rawrefcount_buffer_pyobj(r1addr) + self.gc.rrc_collect_cycles() + lltype.free(r1, flavor='raw') diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -482,6 +482,10 @@ GCClass.rawrefcount_mark_deallocating, [s_gc, s_gcref, SomeAddress()], annmodel.s_None) + self.rawrefcount_buffer_pyobj = getfn( + GCClass.rawrefcount_buffer_pyobj, + [s_gc, SomeAddress()], + annmodel.s_None) self.rawrefcount_from_obj_ptr = getfn( GCClass.rawrefcount_from_obj, [s_gc, s_gcref], SomeAddress(), inline = True) @@ -1292,6 +1296,13 @@ [self.rawrefcount_mark_deallocating, self.c_const_gc, v_gcobj, v_pyobject]) + def gct_gc_rawrefcount_buffer_pyobj(self, hop): + [v_pyobject] = hop.spaceop.args + assert v_pyobject.concretetype == llmemory.Address + hop.genop("direct_call", + [self.rawrefcount_buffer_pyobj, self.c_const_gc, + v_pyobject]) + def gct_gc_rawrefcount_from_obj(self, hop): [v_gcobj] = hop.spaceop.args assert v_gcobj.concretetype == llmemory.GCREF diff --git a/rpython/rlib/rawrefcount.py b/rpython/rlib/rawrefcount.py --- a/rpython/rlib/rawrefcount.py +++ b/rpython/rlib/rawrefcount.py @@ -9,7 +9,8 @@ from rpython.rlib.objectmodel import we_are_translated, specialize, not_rpython from rpython.rtyper.extregistry import ExtRegistryEntry from rpython.translator.tool.cbuild import ExternalCompilationInfo -from rpython.rlib import rgc +from rpython.rlib import rgc, objectmodel +from pypy.interpreter.baseobjspace import W_Root MAX_BIT = int(math.log(sys.maxint, 2)) @@ -33,6 +34,7 @@ REFCNT_CLR_GREEN = 4 << REFCNT_CLR_OFFS # Acyclic REFCNT_CLR_RED = 5 << REFCNT_CLR_OFFS # Cand cycle undergoing SIGMA-comp. REFCNT_CLR_ORANGE = 6 << REFCNT_CLR_OFFS # Cand cycle awaiting epoch boundary +REFCNT_CLR_MASK = 7 << REFCNT_CLR_OFFS # Cyclic reference count with overflow bit REFCNT_CRC_OVERFLOW = 1 << REFCNT_CRC_OFFS + REFCNT_BITS @@ -46,6 +48,8 @@ RAWREFCOUNT_DEALLOC_TRIGGER = lltype.Ptr(lltype.FuncType([], lltype.Void)) +W_MARKER_DEALLOCATING = W_Root() + def _build_pypy_link(p): res = len(_adr2pypy) @@ -70,21 +74,41 @@ _refcount_overflow = dict() +def incref(pyobj): + if pyobj.c_ob_refcnt & REFCNT_OVERFLOW == 0: + pyobj.c_ob_refcnt += 1 + else: + if pyobj.c_ob_refcnt & REFCNT_MASK == REFCNT_OVERFLOW: + pyobj.c_ob_refcnt += 1 + overflow_new(pyobj) + else: + overflow_add(pyobj) + +def decref(pyobj): + if pyobj.c_ob_refcnt & REFCNT_OVERFLOW == 0: + pyobj.c_ob_refcnt -= 1 + else: + if pyobj.c_ob_refcnt & REFCNT_MASK == REFCNT_OVERFLOW: + pyobj.c_ob_refcnt -= 1 + elif overflow_sub(pyobj): + pyobj.c_ob_refcnt -= 1 + # TODO: if object moves, address changes! def overflow_new(obj): - _refcount_overflow[id(obj)] = 0 + _refcount_overflow[objectmodel.current_object_addr_as_int(obj)] = 0 def overflow_add(obj): - _refcount_overflow[id(obj)] += 1 + _refcount_overflow[objectmodel.current_object_addr_as_int(obj)] += 1 def overflow_sub(obj): - c = _refcount_overflow[id(obj)] + addr = objectmodel.current_object_addr_as_int(obj) + c = _refcount_overflow[addr] if c > 0: - _refcount_overflow[id(obj)] = c - 1 + _refcount_overflow[addr] = c - 1 return False else: - _refcount_overflow.pop(id(obj)) + _refcount_overflow.pop(addr) return True def overflow_get(obj): - return _refcount_overflow[id(obj)] + return _refcount_overflow[objectmodel.current_object_addr_as_int(obj)] # TODO: _cyclic_refcount_overflow = dict() @@ -136,6 +160,10 @@ ob.c_ob_pypy_link = _build_pypy_link(marker) @not_rpython +def buffer_pyobj(ob): + pass # TODO: implement? + + at not_rpython def from_obj(OB_PTR_TYPE, p): ob = _pypy2ob.get(p) if ob is None: @@ -321,6 +349,19 @@ class Entry(ExtRegistryEntry): + _about_ = buffer_pyobj + + def compute_result_annotation(self, s_ob): + pass + + def specialize_call(self, hop): + name = 'gc_rawrefcount_buffer_pyobj' + hop.exception_cannot_occur() + v_ob = hop.inputarg(hop.args_r[0], arg=0) + hop.genop(name, [_unspec_ob(hop, v_ob)]) + + +class Entry(ExtRegistryEntry): _about_ = from_obj def compute_result_annotation(self, s_OB_PTR_TYPE, s_p): diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py --- a/rpython/rlib/rgc.py +++ b/rpython/rlib/rgc.py @@ -522,8 +522,8 @@ translator = hop.rtyper.annotator.translator fq = hop.args_s[0].const graph = translator._graphof(fq.finalizer_trigger.im_func) - #InstanceRepr.check_graph_of_del_does_not_call_too_much(hop.rtyper, - # graph) + InstanceRepr.check_graph_of_del_does_not_call_too_much(hop.rtyper, + graph) hop.exception_cannot_occur() return hop.inputconst(lltype.Signed, hop.s_result.const) diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py --- a/rpython/rtyper/llinterp.py +++ b/rpython/rtyper/llinterp.py @@ -966,6 +966,9 @@ def op_gc_rawrefcount_mark_deallocating(self, *args): raise NotImplementedError("gc_rawrefcount_mark_deallocating") + def op_gc_rawrefcount_buffer_pyobj(self, *args): + raise NotImplementedError("gc_rawrefcount_buffer_pyobj") + def op_gc_rawrefcount_next_dead(self, *args): raise NotImplementedError("gc_rawrefcount_next_dead") diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -492,6 +492,7 @@ 'gc_rawrefcount_create_link_pypy': LLOp(), 'gc_rawrefcount_create_link_pyobj': LLOp(), 'gc_rawrefcount_mark_deallocating': LLOp(), + 'gc_rawrefcount_buffer_pyobj': LLOp(), 'gc_rawrefcount_from_obj': LLOp(sideeffects=False), 'gc_rawrefcount_to_obj': LLOp(sideeffects=False), 'gc_rawrefcount_next_dead': LLOp(), diff --git a/rpython/rtyper/lltypesystem/lltype.py b/rpython/rtyper/lltypesystem/lltype.py --- a/rpython/rtyper/lltypesystem/lltype.py +++ b/rpython/rtyper/lltypesystem/lltype.py @@ -564,7 +564,7 @@ def _container_example(self): def ex(*args): return self.RESULT._defl() - return _func(self, _callable=ex) + return _func(self, {'_callable': ex}) def _trueargs(self): return [arg for arg in self.ARGS if arg is not Void] @@ -2094,7 +2094,7 @@ class _func(_container): - def __init__(self, TYPE, **attrs): + def __init__(self, TYPE, attrs): attrs.setdefault('_TYPE', TYPE) attrs.setdefault('_name', '?') attrs.setdefault('_callable', None) @@ -2303,7 +2303,8 @@ hash(tuple(attrs.items())) except TypeError: raise TypeError("'%r' must be hashable"%attrs) - o = _func(TYPE, _name=name, **attrs) + attrs['_name'] = name + o = _func(TYPE, attrs) return _ptr(Ptr(TYPE), o) def _getconcretetype(v): diff --git a/rpython/rtyper/rclass.py b/rpython/rtyper/rclass.py --- a/rpython/rtyper/rclass.py +++ b/rpython/rtyper/rclass.py @@ -585,8 +585,8 @@ assert len(s_func.descriptions) == 1 funcdesc, = s_func.descriptions graph = funcdesc.getuniquegraph() - #self.check_graph_of_del_does_not_call_too_much(self.rtyper, - # graph) + self.check_graph_of_del_does_not_call_too_much(self.rtyper, + graph) FUNCTYPE = FuncType([Ptr(source_repr.object_type)], Void) destrptr = functionptr(FUNCTYPE, graph.name, graph=graph, From pypy.commits at gmail.com Wed Aug 30 08:18:36 2017 From: pypy.commits at gmail.com (Dodan) Date: Wed, 30 Aug 2017 05:18:36 -0700 (PDT) Subject: [pypy-commit] pypy py3.5-sendmsg-recvmsg: Fixed code formatting issues. Thanks rlamy! Message-ID: <59a6ad1c.02d71c0a.c8276.a93d@mx.google.com> Author: Dodan Mihai Branch: py3.5-sendmsg-recvmsg Changeset: r92282:7d4e4f3183e5 Date: 2017-08-30 15:17 +0300 http://bitbucket.org/pypy/pypy/changeset/7d4e4f3183e5/ Log: Fixed code formatting issues. Thanks rlamy! diff --git a/pypy/module/_socket/interp_socket.py b/pypy/module/_socket/interp_socket.py --- a/pypy/module/_socket/interp_socket.py +++ b/pypy/module/_socket/interp_socket.py @@ -285,8 +285,6 @@ except SocketError as e: raise converted_error(space, e) - - def close_w(self, space): """close() @@ -449,10 +447,9 @@ return space.newtuple([space.newbytes(data), w_addr]) @unwrap_spec(message_size=int, ancbufsize=int, flags=int) - def recvmsg_w(self,space,message_size, ancbufsize = 0, flags = 0): + def recvmsg_w(self, space, message_size, ancbufsize=0, flags=0): """ - recvfrom(message_size[, ancbufsize[, flags]]) -> (message, ancillary, flags, address) - recvmsg(message_size, [ancbufsize,[flags]]) -> (message, ancillary, flags, address) + recvmsg(message_size[, ancbufsize[, flags]]) -> (message, ancillary, flags, address) Receive normal data (up to bufsize bytes) and ancillary data from the socket. The ancbufsize argument sets the size in bytes of the internal buffer used to receive the ancillary data; it defaults to 0, meaning that no ancillary data will be received. From pypy.commits at gmail.com Wed Aug 30 14:26:05 2017 From: pypy.commits at gmail.com (stevie_92) Date: Wed, 30 Aug 2017 11:26:05 -0700 (PDT) Subject: [pypy-commit] pypy cpyext-gc-trialdeletion: Added simple tests for cycle detection Message-ID: <59a7033d.5496df0a.d406b.5fa9@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-trialdeletion Changeset: r92283:f1659c93bbd0 Date: 2017-08-30 20:25 +0200 http://bitbucket.org/pypy/pypy/changeset/f1659c93bbd0/ Log: Added simple tests for cycle detection diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -4,7 +4,9 @@ from rpython.memory.gc.test.test_direct import BaseDirectGCTest from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT -from pypy.module.cpyext.api import PyObject, PyTypeObject +from pypy.module.cpyext.api import (PyObject, PyTypeObject, PyTypeObjectPtr, + PyObjectFields, cpython_struct) +from pypy.module.cpyext.complexobject import PyComplexObject PYOBJ_HDR = IncrementalMiniMarkGC.PYOBJ_HDR PYOBJ_HDR_PTR = IncrementalMiniMarkGC.PYOBJ_HDR_PTR @@ -83,6 +85,49 @@ return p1 return p1, p1ref, r1, r1addr, check_alive + def _rawrefcount_cycle_obj(self): + from pypy.module.cpyext.typeobjectdefs import visitproc, traverseproc + from rpython.rtyper.lltypesystem import rffi + from rpython.rtyper.annlowlevel import llhelper + from rpython.rlib.rawrefcount import (REFCNT_CLR_PURPLE) + from rpython.rtyper.tool import rffi_platform + + self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + + # construct test type + TEST_P = lltype.Ptr(lltype.ForwardReference()) + TEST_P.TO.become(lltype.Struct('test', + ('base', PyObject.TO), + ('next', TEST_P), + ('value', lltype.Signed))) + + def test_tp_traverse(obj, visit, args): + from pypy.module.cpyext.api import generic_cpy_call + test = rffi.cast(TEST_P, obj) + vret = 0 + if test.next is not None: + next = rffi.cast(PyObject, test.next) + vret = visit(next, args) + if vret != 0: + return vret + return vret + + TRAVERSE_FUNCTYPE = rffi.CCallback([PyObject, visitproc, rffi.VOIDP], + rffi.INT_real) + func_ptr = llhelper(TRAVERSE_FUNCTYPE, test_tp_traverse) + rffi_func_ptr = rffi.cast(traverseproc, func_ptr) + t1 = lltype.malloc(PyTypeObject, flavor='raw', immortal=True) + t1.c_tp_traverse = rffi_func_ptr + + # initialize object + r1 = lltype.malloc(TEST_P.TO, flavor='raw', immortal=True) + r1.base.c_ob_refcnt = 1 | REFCNT_CLR_PURPLE + r1.base.c_ob_pypy_link = 0 + r1.base.c_ob_type = t1 + r1addr = llmemory.cast_ptr_to_adr(r1) + + return r1, r1addr + def test_rawrefcount_objects_basic(self, old=False): p1, p1ref, r1, r1addr, check_alive = ( self._rawrefcount_pair(42, is_light=True, create_old=old)) @@ -292,9 +337,17 @@ self._collect(major=True) check_alive(0) - def test_cycle(self): - p1, p1ref, r1, r1addr, check_alive = ( - self._rawrefcount_pair(42, is_pyobj=True)) + def test_cycle_self_reference_free(self): + r1, r1addr = self._rawrefcount_cycle_obj() + r1.next = r1 self.gc.rawrefcount_buffer_pyobj(r1addr) self.gc.rrc_collect_cycles() - lltype.free(r1, flavor='raw') + assert r1.base.c_ob_refcnt == 0 + + def test_cycle_self_reference_not_free(self): + r1, r1addr = self._rawrefcount_cycle_obj() + r1.base.c_ob_refcnt += 1 + r1.next = r1 + self.gc.rawrefcount_buffer_pyobj(r1addr) + self.gc.rrc_collect_cycles() + assert r1.base.c_ob_refcnt == 2 From pypy.commits at gmail.com Thu Aug 31 03:16:29 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 31 Aug 2017 00:16:29 -0700 (PDT) Subject: [pypy-commit] cffi default: Expand the docs about wchar_t/char16_t/char32_t Message-ID: <59a7b7cd.90c5df0a.63f4f.3f1f@mx.google.com> Author: Armin Rigo Branch: Changeset: r3005:4a39df6e9df5 Date: 2017-08-31 09:16 +0200 http://bitbucket.org/cffi/cffi/changeset/4a39df6e9df5/ Log: Expand the docs about wchar_t/char16_t/char32_t diff --git a/doc/source/ref.rst b/doc/source/ref.rst --- a/doc/source/ref.rst +++ b/doc/source/ref.rst @@ -796,20 +796,8 @@ `[8]` ``wchar_t``, ``char16_t`` and ``char32_t`` - The ``wchar_t`` type has the same signedness as the underlying - platform's. For example, on Linux, it is a signed 32-bit integer. - However, the types ``char16_t`` and ``char32_t`` (*new in version - 1.11*) are always unsigned. **Warning:** for now, if you use - ``char16_t`` and ``char32_t`` with ``cdef()`` and ``set_source()``, - you have to make sure yourself that the types are declared by the C - source you provide to ``set_source()``. They would be declared if - you ``#include`` a library that explicitly uses them, for example, - or when using C++11. Otherwise, you need ``#include `` on - Linux, or more generally something like ``typedef uint_least16_t - char16_t;``. This is not done automatically by CFFI because - ``uchar.h`` is not standard across platforms, and writing a - ``typedef`` like above would crash if the type happens to be - already defined. + See `Unicode character types`_ below. + .. _file: @@ -842,3 +830,66 @@ The special support for ``FILE *`` is anyway implemented in a similar manner on CPython 3.x and on PyPy, because these Python implementations' files are not natively based on ``FILE *``. Doing it explicity offers more control. + + +.. _unichar: + +Unicode character types ++++++++++++++++++++++++ + +The ``wchar_t`` type has the same signedness as the underlying +platform's. For example, on Linux, it is a signed 32-bit integer. +However, the types ``char16_t`` and ``char32_t`` (*new in version 1.11*) +are always unsigned. + +Note that CFFI assumes that these types are meant to contain UTF-16 or +UTF-32 characters in the native endianness. More precisely: + +* ``char32_t`` is assumed to contain UTF-32, or UCS4, which is just the + unicode codepoint; + +* ``char16_t`` is assumed to contain UTF-16, i.e. UCS2 plus surrogates; + +* ``wchar_t`` is assumed to contain either UTF-32 or UTF-16 based on its + actual platform-defined size of 4 or 2 bytes. + +Whether this assumption is true or not is unspecified by the C language. +In theory, the C library you are interfacing with could use one of these +types with a different meaning. You would then need to handle it +yourself---for example, by using ``uint32_t`` instead of ``char32_t`` in +the ``cdef()``, and building the expected arrays of ``uint32_t`` +manually. + +Python itself can be compiled with ``sys.maxunicode == 65535`` or +``sys.maxunicode == 1114111`` (Python >= 3.3 is always 1114111). This +changes the handling of surrogates (which are pairs of 16-bit +"characters" which actually stand for a single codepoint whose value is +greater than 65535). If your Python is ``sys.maxunicode == 1114111``, +then it can store arbitrary unicode codepoints; surrogates are +automatically inserted when converting from Python unicodes to UTF-16, +and automatically removed when converting back. On the other hand, if +your Python is ``sys.maxunicode == 65535``, then it is the other way +around: surrogates are removed when converting from Python unicodes +to UTF-32, and added when converting back. In other words, surrogate +conversion is done only when there is a size mismatch. + +Note that Python's internal representations is not specified. For +example, on CPython >= 3.3, it will use 1- or 2- or 4-bytes arrays +depending on what the string actually contains. With CFFI, when you +pass a Python byte string to a C function expecting a ``char*``, then +we pass directly a pointer to the existing data without needing a +temporary buffer; however, the same cannot cleanly be done with +*unicode* string arguments and the ``wchar_t*`` / ``char16_t*`` / +``char32_t*`` types, because of the changing internal +representation. As a result, and for consistency, CFFI always allocates +a temporary buffer for unicode strings. + +**Warning:** for now, if you use ``char16_t`` and ``char32_t`` with +``set_source()``, you have to make sure yourself that the types are +declared by the C source you provide to ``set_source()``. They would be +declared if you ``#include`` a library that explicitly uses them, for +example, or when using C++11. Otherwise, you need ``#include +`` on Linux, or more generally something like ``typedef +uint16_t char16_t;``. This is not done automatically by CFFI because +``uchar.h`` is not standard across platforms, and writing a ``typedef`` +like above would crash if the type happens to be already defined. diff --git a/doc/source/using.rst b/doc/source/using.rst --- a/doc/source/using.rst +++ b/doc/source/using.rst @@ -206,6 +206,9 @@ from a unicode string, and calling ``ffi.string()`` on the cdata object returns the current unicode string stored in the source array (adding surrogates if necessary). +See the `Unicode character types`__ section for more details. + +__: ref.html#unichar Note that unlike Python lists or tuples, but like C, you *cannot* index in a C array from the end using negative numbers. From pypy.commits at gmail.com Thu Aug 31 05:40:10 2017 From: pypy.commits at gmail.com (stevie_92) Date: Thu, 31 Aug 2017 02:40:10 -0700 (PDT) Subject: [pypy-commit] pypy cpyext-gc-trialdeletion: Added tests Message-ID: <59a7d97a.052f1c0a.49af8.0de0@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-trialdeletion Changeset: r92284:a478bda34d52 Date: 2017-08-31 11:38 +0200 http://bitbucket.org/pypy/pypy/changeset/a478bda34d52/ Log: Added tests Fixed bug in generic_cpy_call if called recursively Fixed bug in cycle detection if object is buffered twice diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -200,10 +200,11 @@ # executes. In non-cpyext-related code, it will thus always be 0. # # **make_generic_cpy_call():** RPython to C, with the GIL held. Before -# the call, must assert that the global variable is 0 and set the -# current thread identifier into the global variable. After the call, -# assert that the global variable still contains the current thread id, -# and reset it to 0. +# the call, must assert that the global variable is 0 or the current +# thread identifier (recursive call) and set the current thread identifier +# into the global variable. After the call, assert that the global variable +# still contains the current thread id, and reset it to the value it held +# before the call. # # **make_wrapper():** C to RPython; by default assume that the GIL is # held, but accepts gil="acquire", "release", "around", @@ -1598,7 +1599,8 @@ # see "Handling of the GIL" above tid = rthread.get_ident() - assert cpyext_glob_tid_ptr[0] == 0 + tid_before = cpyext_glob_tid_ptr[0] + assert tid_before == 0 or tid_before == tid cpyext_glob_tid_ptr[0] = tid try: @@ -1606,7 +1608,7 @@ result = call_external_function(func, *boxed_args) finally: assert cpyext_glob_tid_ptr[0] == tid - cpyext_glob_tid_ptr[0] = 0 + cpyext_glob_tid_ptr[0] = tid_before keepalive_until_here(*keepalives) if is_PyObject(RESULT_TYPE): diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -3205,9 +3205,7 @@ from rpython.rlib.rawrefcount import (REFCNT_CYCLE_BUFFERED, REFCNT_CLR_MASK, REFCNT_CLR_PURPLE, - REFCNT_MASK, - W_MARKER_DEALLOCATING, - mark_deallocating) + REFCNT_MASK) obj = self._pyobj(pyobject) rc = obj.c_ob_refcnt debug_print("_rrc_cycle_mark_roots", obj) @@ -3218,8 +3216,8 @@ obj.c_ob_refcnt = rc & ~REFCNT_CYCLE_BUFFERED self.rrc_buffered.remove(pyobject) if rc & REFCNT_MASK == 0: - mark_deallocating(W_MARKER_DEALLOCATING, obj) - generic_cpy_call(True, obj.c_ob_type.c_tp_dealloc, obj) + if obj.c_ob_type.c_tp_dealloc: + generic_cpy_call(True, obj.c_ob_type.c_tp_dealloc, obj) def _rrc_cycle_scan_roots(self, pyobject, ignore): obj = self._pyobj(pyobject) diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -2,11 +2,15 @@ from rpython.rtyper.lltypesystem import lltype, llmemory from rpython.memory.gc.incminimark import IncrementalMiniMarkGC from rpython.memory.gc.test.test_direct import BaseDirectGCTest -from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY -from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT +from rpython.rlib.rawrefcount import (REFCNT_FROM_PYPY, REFCNT_FROM_PYPY_LIGHT, + REFCNT_MASK) from pypy.module.cpyext.api import (PyObject, PyTypeObject, PyTypeObjectPtr, PyObjectFields, cpython_struct) from pypy.module.cpyext.complexobject import PyComplexObject +from rpython.rtyper.lltypesystem import rffi +from pypy.module.cpyext.typeobjectdefs import visitproc, traverseproc +from rpython.rtyper.annlowlevel import llhelper +from rpython.rtyper.tool import rffi_platform PYOBJ_HDR = IncrementalMiniMarkGC.PYOBJ_HDR PYOBJ_HDR_PTR = IncrementalMiniMarkGC.PYOBJ_HDR_PTR @@ -17,6 +21,17 @@ ('prev', lltype.Ptr(S)), ('next', lltype.Ptr(S)))) +T = lltype.Ptr(lltype.ForwardReference()) +T.TO.become(lltype.Struct('test', + ('base', PyObject.TO), + ('next', T), + ('prev', T), + ('value', lltype.Signed))) + +TRAVERSE_FUNCTYPE = rffi.CCallback([PyObject, visitproc, rffi.VOIDP], + rffi.INT_real) +t1 = lltype.malloc(PyTypeObject, flavor='raw', immortal=True) + class TestRawRefCount(BaseDirectGCTest): GCClass = IncrementalMiniMarkGC @@ -86,47 +101,38 @@ return p1, p1ref, r1, r1addr, check_alive def _rawrefcount_cycle_obj(self): - from pypy.module.cpyext.typeobjectdefs import visitproc, traverseproc - from rpython.rtyper.lltypesystem import rffi - from rpython.rtyper.annlowlevel import llhelper - from rpython.rlib.rawrefcount import (REFCNT_CLR_PURPLE) - from rpython.rtyper.tool import rffi_platform - - self.gc.rawrefcount_init(lambda: self.trigger.append(1)) - - # construct test type - TEST_P = lltype.Ptr(lltype.ForwardReference()) - TEST_P.TO.become(lltype.Struct('test', - ('base', PyObject.TO), - ('next', TEST_P), - ('value', lltype.Signed))) def test_tp_traverse(obj, visit, args): - from pypy.module.cpyext.api import generic_cpy_call - test = rffi.cast(TEST_P, obj) + test = rffi.cast(T, obj) vret = 0 - if test.next is not None: + if llmemory.cast_ptr_to_adr(test.next).ptr is not None: next = rffi.cast(PyObject, test.next) vret = visit(next, args) if vret != 0: return vret + if llmemory.cast_ptr_to_adr(test.prev).ptr is not None: + next = rffi.cast(PyObject, test.prev) + vret = visit(next, args) + if vret != 0: + return vret return vret - TRAVERSE_FUNCTYPE = rffi.CCallback([PyObject, visitproc, rffi.VOIDP], - rffi.INT_real) func_ptr = llhelper(TRAVERSE_FUNCTYPE, test_tp_traverse) rffi_func_ptr = rffi.cast(traverseproc, func_ptr) - t1 = lltype.malloc(PyTypeObject, flavor='raw', immortal=True) t1.c_tp_traverse = rffi_func_ptr - # initialize object - r1 = lltype.malloc(TEST_P.TO, flavor='raw', immortal=True) - r1.base.c_ob_refcnt = 1 | REFCNT_CLR_PURPLE + r1 = lltype.malloc(T.TO, flavor='raw', immortal=True) r1.base.c_ob_pypy_link = 0 r1.base.c_ob_type = t1 - r1addr = llmemory.cast_ptr_to_adr(r1) + r1.base.c_ob_refcnt = 1 + return r1 - return r1, r1addr + def _rawrefcount_buffer_obj(self, obj): + from rpython.rlib.rawrefcount import REFCNT_CLR_MASK, REFCNT_CLR_PURPLE + rc = obj.base.c_ob_refcnt + obj.base.c_ob_refcnt = rc & ~REFCNT_CLR_MASK | REFCNT_CLR_PURPLE + objaddr = llmemory.cast_ptr_to_adr(obj) + self.gc.rawrefcount_buffer_pyobj(objaddr) def test_rawrefcount_objects_basic(self, old=False): p1, p1ref, r1, r1addr, check_alive = ( @@ -338,16 +344,100 @@ check_alive(0) def test_cycle_self_reference_free(self): - r1, r1addr = self._rawrefcount_cycle_obj() + self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + r1 = self._rawrefcount_cycle_obj() r1.next = r1 - self.gc.rawrefcount_buffer_pyobj(r1addr) + self._rawrefcount_buffer_obj(r1) self.gc.rrc_collect_cycles() - assert r1.base.c_ob_refcnt == 0 + assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 def test_cycle_self_reference_not_free(self): - r1, r1addr = self._rawrefcount_cycle_obj() + self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + r1 = self._rawrefcount_cycle_obj() r1.base.c_ob_refcnt += 1 r1.next = r1 - self.gc.rawrefcount_buffer_pyobj(r1addr) + self._rawrefcount_buffer_obj(r1) self.gc.rrc_collect_cycles() - assert r1.base.c_ob_refcnt == 2 + assert r1.base.c_ob_refcnt & REFCNT_MASK == 2 + + def test_simple_cycle_free(self): + self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + r1 = self._rawrefcount_cycle_obj() + r2 = self._rawrefcount_cycle_obj() + r1.next = r2 + r2.next = r1 + self._rawrefcount_buffer_obj(r1) + self.gc.rrc_collect_cycles() + assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 + assert r2.base.c_ob_refcnt & REFCNT_MASK == 0 + + def test_simple_cycle_not_free(self): + self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + r1 = self._rawrefcount_cycle_obj() + r2 = self._rawrefcount_cycle_obj() + r1.next = r2 + r2.next = r1 + r2.base.c_ob_refcnt += 1 + self._rawrefcount_buffer_obj(r1) + self.gc.rrc_collect_cycles() + assert r1.base.c_ob_refcnt & REFCNT_MASK == 1 + assert r2.base.c_ob_refcnt & REFCNT_MASK == 2 + + def test_complex_cycle_free(self): + self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + r1 = self._rawrefcount_cycle_obj() + r2 = self._rawrefcount_cycle_obj() + r3 = self._rawrefcount_cycle_obj() + r1.next = r2 + r1.prev = r2 + r2.base.c_ob_refcnt += 1 + r2.next = r3 + r3.prev = r1 + self._rawrefcount_buffer_obj(r1) + self.gc.rrc_collect_cycles() + assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 + assert r2.base.c_ob_refcnt & REFCNT_MASK == 0 + assert r3.base.c_ob_refcnt & REFCNT_MASK == 0 + + def test_complex_cycle_not_free(self): + self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + r1 = self._rawrefcount_cycle_obj() + r2 = self._rawrefcount_cycle_obj() + r3 = self._rawrefcount_cycle_obj() + r1.next = r2 + r1.prev = r2 + r2.base.c_ob_refcnt += 1 + r2.next = r3 + r3.prev = r1 + r3.base.c_ob_refcnt += 1 + self._rawrefcount_buffer_obj(r1) + self.gc.rrc_collect_cycles() + assert r1.base.c_ob_refcnt & REFCNT_MASK == 1 + assert r2.base.c_ob_refcnt & REFCNT_MASK == 2 + assert r3.base.c_ob_refcnt & REFCNT_MASK == 2 + + def test_cycle_2_buffered_free(self): + self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + r1 = self._rawrefcount_cycle_obj() + r2 = self._rawrefcount_cycle_obj() + r1.next = r2 + r2.prev = r1 + self._rawrefcount_buffer_obj(r1) + self._rawrefcount_buffer_obj(r2) + self.gc.rrc_collect_cycles() + assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 + assert r2.base.c_ob_refcnt & REFCNT_MASK == 0 + + def test_cycle_2_buffered_not_free(self): + self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + r1 = self._rawrefcount_cycle_obj() + r2 = self._rawrefcount_cycle_obj() + r1.next = r2 + r2.prev = r1 + r1.base.c_ob_refcnt += 1 + self._rawrefcount_buffer_obj(r1) + self._rawrefcount_buffer_obj(r2) + self.gc.rrc_collect_cycles() + assert r1.base.c_ob_refcnt & REFCNT_MASK == 2 + assert r2.base.c_ob_refcnt & REFCNT_MASK == 1 + From pypy.commits at gmail.com Thu Aug 31 07:51:16 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 31 Aug 2017 04:51:16 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: Merged in py3.5-sendmsg-recvmsg (pull request #562) Message-ID: <59a7f834.44c11c0a.9dcf8.b445@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r92286:d12c25571050 Date: 2017-08-31 11:50 +0000 http://bitbucket.org/pypy/pypy/changeset/d12c25571050/ Log: Merged in py3.5-sendmsg-recvmsg (pull request #562) Implement socket.sendmsg()/.recvmsg() diff --git a/pypy/module/_socket/__init__.py b/pypy/module/_socket/__init__.py --- a/pypy/module/_socket/__init__.py +++ b/pypy/module/_socket/__init__.py @@ -34,6 +34,7 @@ ntohs ntohl htons htonl inet_aton inet_ntoa inet_pton inet_ntop getaddrinfo getnameinfo getdefaulttimeout setdefaulttimeout + CMSG_SPACE CMSG_LEN """.split(): if (name in ('inet_pton', 'inet_ntop', 'socketpair') and diff --git a/pypy/module/_socket/interp_func.py b/pypy/module/_socket/interp_func.py --- a/pypy/module/_socket/interp_func.py +++ b/pypy/module/_socket/interp_func.py @@ -327,6 +327,42 @@ for (family, socktype, protocol, canonname, addr) in lst] return space.newlist(lst1) + at unwrap_spec(size=int) +def CMSG_SPACE(space, size): + """ + Socket method to determine the optimal byte size of the ancillary. + Recommended to be used when computing the ancillary size for recvmsg. + :param space: + :param size: an integer with the minimum size required. + :return: an integer with the minimum memory needed for the required size. The value is memory alligned + """ + if size < 0: + raise oefmt(space.w_OverflowError, + "CMSG_SPACE() argument out of range") + retval = rsocket.CMSG_SPACE(size) + if retval == 0: + raise oefmt(space.w_OverflowError, + "CMSG_SPACE() argument out of range") + return space.newint(retval) + + at unwrap_spec(len=int) +def CMSG_LEN(space, len): + """ + Socket method to determine the optimal byte size of the ancillary. + Recommended to be used when computing the ancillary size for recvmsg. + :param space: + :param len: an integer with the minimum size required. + :return: an integer with the minimum memory needed for the required size. The value is not mem alligned. + """ + if len < 0: + raise oefmt(space.w_OverflowError, + "CMSG_LEN() argument out of range") + retval = rsocket.CMSG_LEN(len) + if retval == 0: + raise oefmt(space.w_OverflowError, + "CMSG_LEN() argument out of range") + return space.newint(retval) + def getdefaulttimeout(space): """getdefaulttimeout() -> timeout diff --git a/pypy/module/_socket/interp_socket.py b/pypy/module/_socket/interp_socket.py --- a/pypy/module/_socket/interp_socket.py +++ b/pypy/module/_socket/interp_socket.py @@ -446,6 +446,52 @@ converted_error(space, e, eintr_retry=True) return space.newtuple([space.newbytes(data), w_addr]) + @unwrap_spec(message_size=int, ancbufsize=int, flags=int) + def recvmsg_w(self, space, message_size, ancbufsize=0, flags=0): + """ + recvmsg(message_size[, ancbufsize[, flags]]) -> (message, ancillary, flags, address) + Receive normal data (up to bufsize bytes) and ancillary data from the socket. + The ancbufsize argument sets the size in bytes of the internal buffer used to receive the ancillary data; + it defaults to 0, meaning that no ancillary data will be received. + Appropriate buffer sizes for ancillary data can be calculated using CMSG_SPACE() or CMSG_LEN(), + and items which do not fit into the buffer might be truncated or discarded. + The flags argument defaults to 0 and has the same meaning as for recv(). + The ancdata item is a list of zero or more tuples (cmsg_level, cmsg_type, cmsg_data): + cmsg_level and cmsg_type are integers specifying the protocol level and protocol-specific type respectively, + and cmsg_data is a bytes object holding the associated data. + + :param space: Non useable parameter. It represents the object space. + :param message_size: Maximum size of the message to be received + :param ancbufsize: Maximum size of the ancillary data to be received + :param flags: Receive flag. For more details, please check the Unix manual + :return: a tuple consisting of the message, the ancillary data, return flag and the address. + """ + if message_size < 0: + raise oefmt(space.w_ValueError, "negative buffer size in recvmsg()") + if ancbufsize < 0: + raise oefmt(space.w_ValueError, "invalid ancillary data buffer length") + while True: + try: + recvtup = self.sock.recvmsg(message_size, ancbufsize, flags) + w_message = space.newbytes(recvtup[0]) + anclist = [] + for l in recvtup[1]: + tup = space.newtuple([space.newint(l[0]), space.newint(l[1]), space.newbytes(l[2])]) + anclist.append(tup) + + w_anc = space.newlist(anclist) + + w_flag = space.newint(recvtup[2]) + if (recvtup[3] is not None): + w_address = addr_as_object(recvtup[3], self.sock.fd, space) + else: + w_address = space.w_None + rettup = space.newtuple([w_message, w_anc, w_flag, w_address]) + break + except SocketError as e: + converted_error(space, e, eintr_retry=True) + return rettup + @unwrap_spec(data='bufferstr', flags=int) def send_w(self, space, data, flags=0): """send(data[, flags]) -> count @@ -501,6 +547,67 @@ converted_error(space, e, eintr_retry=True) return space.newint(count) + @unwrap_spec(flags=int) + def sendmsg_w(self, space, w_data, w_ancillary=None, flags=0 ,w_address=None): + """ + sendmsg(data[,ancillary[,flags[,address]]]) -> bytes_sent + Send normal and ancillary data to the socket, gathering the non-ancillary data + from a series of buffers and concatenating it into a single message. + The ancdata argument specifies the ancillary data (control messages) as an iterable of zero or more tuples + (cmsg_level, cmsg_type, cmsg_data), where cmsg_level and cmsg_type are integers specifying the protocol level + and protocol-specific type respectively, and cmsg_data is a bytes-like object holding the associated data. + :param space: Represents the object space. + :param w_data: The message(s). needs to be a bytes like object + :param w_ancillary: needs to be a sequence object Can remain unspecified. + :param w_flags: needs to be an integer. Can remain unspecified. + :param w_address: needs to be a bytes-like object Can remain unspecified. + :return: Bytes sent from the message + """ + # Get the flag and address from the object space + while True: + try: + address = None + if not space.is_none(w_address): + address = self.addr_from_object(space, w_address) + + # find data's type in the ObjectSpace and get a list of string out of it. + data = [] + data_iter = space.unpackiterable(w_data) + for i in data_iter: + data.append(space.readbuf_w(i).as_str()) + + # find the ancillary's type in the ObjectSpace and get a list of tuples out of it. + ancillary = [] + if w_ancillary is not None: + anc_iter = space.unpackiterable(w_ancillary) + for w_i in anc_iter: + if not space.isinstance_w(w_i, space.w_tuple): + raise oefmt(space.w_TypeError, "[sendmsg() ancillary data items]() argument must be sequence") + if space.len_w(w_i) == 3: + intemtup = space.unpackiterable(w_i) + level = space.int_w(intemtup[0]) + type = space.int_w(intemtup[1]) + cont = space.readbuf_w(intemtup[2]).as_str() + tup = (level, type, cont) + ancillary.append(tup) + else: + raise oefmt(space.w_TypeError, + "[sendmsg() ancillary data items]() argument must be sequence of length 3") + + count = self.sock.sendmsg(data, ancillary, flags, address) + if count < 0: + if (count == -1000): + raise oefmt(space.w_OSError, "sending multiple control messages not supported") + if (count == -1001): + raise oefmt(space.w_OSError, "ancillary data item too large") + if (count == -1002): + raise oefmt(space.w_OSError, "too much ancillary data") + break + except SocketError as e: + converted_error(space, e, eintr_retry=True) + + return space.newint(count) + @unwrap_spec(flag=int) def setblocking_w(self, flag): """setblocking(flag) @@ -772,7 +879,7 @@ socketmethodnames = """ _accept bind close connect connect_ex fileno detach getpeername getsockname getsockopt gettimeout listen -recv recvfrom send sendall sendto setblocking +recv recvfrom recvmsg send sendall sendto sendmsg setblocking setsockopt settimeout shutdown _reuse _drop recv_into recvfrom_into """.split() if hasattr(rsocket._c, 'WSAIoctl'): @@ -813,6 +920,8 @@ sendall(data[, flags]) -- send all data send(data[, flags]) -- send data, may not send all of it sendto(data[, flags], addr) -- send data to a given address +sendmsg(messages[, ancillary[, flags[, address]]]) -- send data and ancillary payload in a packet. May specifiy flags or the address +recvmsg(message_size,[ ancillary_size,[ flags]]) -- receive data and ancillary payload. Return a tup of message, ancdata, flags and address setblocking(0 | 1) -- set or clear the blocking I/O flag setsockopt(level, optname, value) -- set socket options settimeout(None | float) -- set or clear the timeout diff --git a/rpython/rlib/_rsocket_rffi.py b/rpython/rlib/_rsocket_rffi.py --- a/rpython/rlib/_rsocket_rffi.py +++ b/rpython/rlib/_rsocket_rffi.py @@ -2,6 +2,7 @@ from rpython.rtyper.lltypesystem import lltype from rpython.rtyper.tool import rffi_platform as platform from rpython.rtyper.lltypesystem.rffi import CCHARP +from rpython.rlib import jit from rpython.translator.tool.cbuild import ExternalCompilationInfo from rpython.translator.platform import platform as target_platform @@ -190,6 +191,8 @@ IPX_TYPE +SCM_RIGHTS + POLLIN POLLPRI POLLOUT POLLERR POLLHUP POLLNVAL POLLRDNORM POLLRDBAND POLLWRNORM POLLWEBAND POLLMSG @@ -260,6 +263,7 @@ sockaddr_ptr = lltype.Ptr(lltype.ForwardReference()) addrinfo_ptr = lltype.Ptr(lltype.ForwardReference()) + # struct types CConfig.sockaddr = platform.Struct('struct sockaddr', [('sa_family', rffi.INT), @@ -343,6 +347,650 @@ [('ifr_ifindex', rffi.INT), ('ifr_name', rffi.CFixedArray(rffi.CHAR, 8))]) +# insert handler for sendmsg / recvmsg here +if _POSIX: + includes = ['stddef.h', + 'sys/socket.h', + 'unistd.h', + 'string.h', + 'stdlib.h', + 'errno.h', + 'limits.h', + 'stdio.h', + 'sys/types.h', + 'netinet/in.h', + 'arpa/inet.h'] + separate_module_sources = [''' + + // special defines for returning from recvmsg + #define BAD_MSG_SIZE_GIVEN -10000 + #define BAD_ANC_SIZE_GIVEN -10001 + #define MAL_ANC -10002 + + // special defines for returning from sendmsg + #define MUL_MSGS_NOT_SUP -1000 + #define ANC_DATA_TOO_LARGE -1001 + #define ANC_DATA_TOO_LARGEX -1002 + + /* + Even though you could, theoretically, receive more than one message, IF you set the socket option, + CPython has hardcoded the message number to 1, and implemented the option to receive more then 1 in a + different socket method: recvmsg_into + */ + #define MSG_IOVLEN 1 // CPython has hardcoded this as well. + #if INT_MAX > 0x7fffffff + #define SOCKLEN_T_LIMIT 0x7fffffff + #else + #define SOCKLEN_T_LIMIT INT_MAX + #endif + + // ################################################################################################ + // Recvmsg implementation and associated functions + + // Taken from CPython. Determines the minimum memory space required for the ancillary data. + #ifdef CMSG_SPACE + static int + cmsg_min_space(struct msghdr *msg, struct cmsghdr *cmsgh, size_t space) + { + size_t cmsg_offset; + static const size_t cmsg_len_end = (offsetof(struct cmsghdr, cmsg_len) + + sizeof(cmsgh->cmsg_len)); + + /* Note that POSIX allows msg_controllen to be of signed type. */ + if (cmsgh == NULL || msg->msg_control == NULL) + return 0; + /* Note that POSIX allows msg_controllen to be of a signed type. This is + annoying under OS X as it's unsigned there and so it triggers a + tautological comparison warning under Clang when compared against 0. + Since the check is valid on other platforms, silence the warning under + Clang. */ + #ifdef __clang__ + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wtautological-compare" + #endif + #if defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 5))) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wtype-limits" + #endif + if (msg->msg_controllen < 0) + return 0; + #if defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 5))) + #pragma GCC diagnostic pop + #endif + #ifdef __clang__ + #pragma clang diagnostic pop + #endif + if (space < cmsg_len_end) + space = cmsg_len_end; + cmsg_offset = (char *)cmsgh - (char *)msg->msg_control; + return (cmsg_offset <= (size_t)-1 - space && + cmsg_offset + space <= msg->msg_controllen); + } + #endif + + // Taken from CPython. + #ifdef CMSG_LEN + /* If pointer CMSG_DATA(cmsgh) is in buffer msg->msg_control, set + *space to number of bytes following it in the buffer and return + true; otherwise, return false. Assumes cmsgh, msg->msg_control and + msg->msg_controllen are valid. */ + static int + get_cmsg_data_space(struct msghdr *msg, struct cmsghdr *cmsgh, size_t *space) + { + size_t data_offset; + char *data_ptr; + + if ((data_ptr = (char *)CMSG_DATA(cmsgh)) == NULL) + return 0; + data_offset = data_ptr - (char *)msg->msg_control; + if (data_offset > msg->msg_controllen) + return 0; + *space = msg->msg_controllen - data_offset; + return 1; + } + + // Taken from CPython. + /* If cmsgh is invalid or not contained in the buffer pointed to by + msg->msg_control, return -1. If cmsgh is valid and its associated + data is entirely contained in the buffer, set *data_len to the + length of the associated data and return 0. If only part of the + associated data is contained in the buffer but cmsgh is otherwise + valid, set *data_len to the length contained in the buffer and + return 1. */ + static int + get_cmsg_data_len(struct msghdr *msg, struct cmsghdr *cmsgh, size_t *data_len) + { + size_t space, cmsg_data_len; + + if (!cmsg_min_space(msg, cmsgh, CMSG_LEN(0)) || + cmsgh->cmsg_len < CMSG_LEN(0)) + return -1; + cmsg_data_len = cmsgh->cmsg_len - CMSG_LEN(0); + if (!get_cmsg_data_space(msg, cmsgh, &space)) + return -1; + if (space >= cmsg_data_len) { + *data_len = cmsg_data_len; + return 0; + } + *data_len = space; + return 1; + } + #endif /* CMSG_LEN */ + + /* + Structure meant to hold the information received after a recvmsg is performed. + Essentially it holds: the address, the message, the ancillary data and the return flags. + I use this structure for 2 main reasons: + - keep things ordered + - some of the ancillary parameters need to be int not long (rffi SignedP is actually long*), + therefore I cannot use the parameters directly + */ + struct recvmsg_info + { + struct sockaddr* address; // address fields + socklen_t addrlen; + int* length_of_messages; // message fields + char** messages; + int no_of_messages; + int size_of_ancillary; // ancillary fields + int* levels; + int* types; + char** file_descr; + int* descr_per_ancillary; + int retflag; // return flag field + }; + + /* + Wrapper function over recvmsg. Since it returns a lot of data, + in a structure that is hard to parse in rffi, it was implemented in C. + All the parameters, save the socket fd, message_size, ancillary_size + will be malloc'd and/or modified. + */ + RPY_EXTERN + int recvmsg_implementation( + int socket_fd, + int message_size, + int ancillary_size, + int flags, + struct sockaddr* address, + socklen_t* addrlen, + long** length_of_messages, + char** messages, + long* no_of_messages, + long* size_of_ancillary, + long** levels, + long** types, + char** file_descr, + long** descr_per_ancillary, + long* retflag) + + { + + struct sockaddr* recvd_address; + socklen_t recvd_addrlen; + struct msghdr msg = {0}; + void *controlbuf = NULL; + struct cmsghdr *cmsgh; + int cmsg_status; + struct iovec iov; + struct recvmsg_info* retinfo; + int error_flag; // variable to be set in case of special errors. + int cmsgdatalen = 0; + + // variables that are set to 1, if the message charp has been allocated + // and if the ancillary variables have been allocated. To be used in case of failure. + int iov_alloc = 0; + int anc_alloc = 0; + + retinfo = (struct recvmsg_info*) malloc(sizeof(struct recvmsg_info)); + + if (ancillary_size > SOCKLEN_T_LIMIT){ + error_flag = BAD_ANC_SIZE_GIVEN; + goto fail; + } + + // Setup the messages iov struct memory + iov.iov_base = (char*) malloc(message_size); + memset(iov.iov_base, 0, message_size); + iov.iov_len = message_size; + + // Setup the ancillary buffer memory + controlbuf = malloc(ancillary_size); + + // Setup the recv address memory + recvd_addrlen = sizeof(struct sockaddr_storage); + recvd_address = (struct sockaddr*) malloc(recvd_addrlen); + + memset(recvd_address, 0,recvd_addrlen); + + // Setup the msghdr struct + msg.msg_name = recvd_address; + msg.msg_namelen = recvd_addrlen; + msg.msg_iov = &iov; + msg.msg_iovlen = MSG_IOVLEN; + msg.msg_control = controlbuf; + msg.msg_controllen = ancillary_size; + + // Link my structure to the msghdr fields + retinfo->address = msg.msg_name; + retinfo->length_of_messages = (int*) malloc (MSG_IOVLEN * sizeof(int)); + retinfo->no_of_messages = MSG_IOVLEN; + retinfo->messages = (char**) malloc (MSG_IOVLEN * sizeof(char*)); + retinfo->messages[0] = msg.msg_iov->iov_base; + + iov_alloc = 1; + ssize_t bytes_recvd = 0; + + bytes_recvd = recvmsg(socket_fd, &msg, flags); + + if (bytes_recvd < 0){ + goto fail; + } + + retinfo->addrlen = (socklen_t) msg.msg_namelen; + retinfo->length_of_messages[0] = msg.msg_iov->iov_len; + + // Count the ancillary items & allocate the memory + int anc_counter = 0; + for (cmsgh = ((msg.msg_controllen > 0) ? CMSG_FIRSTHDR(&msg) : NULL); + cmsgh != NULL; cmsgh = CMSG_NXTHDR(&msg, cmsgh)) { + + anc_counter++; + } + retinfo->size_of_ancillary = anc_counter; + retinfo->file_descr = (char**) malloc (anc_counter * sizeof(char*)); + retinfo->levels = (int*) malloc(anc_counter * sizeof(int)); + retinfo->types = (int*) malloc(anc_counter * sizeof(int)); + retinfo->descr_per_ancillary = (int*) malloc(anc_counter * sizeof(int)); + anc_alloc = 1; + + // Extract the ancillary items + int i=0; + for (cmsgh = ((msg.msg_controllen > 0) ? CMSG_FIRSTHDR(&msg) : NULL); + cmsgh != NULL; cmsgh = CMSG_NXTHDR(&msg, cmsgh)) { + size_t local_size = 0; + cmsg_status = get_cmsg_data_len(&msg, cmsgh, &local_size); + if (cmsg_status !=0 ){ + error_flag = MAL_ANC; + goto err_closefds; + } + retinfo->file_descr[i] = (char*) malloc(local_size); + memcpy(retinfo->file_descr[i], CMSG_DATA(cmsgh), local_size); + retinfo->levels[i] = cmsgh->cmsg_level; + retinfo->types[i] = cmsgh->cmsg_type; + retinfo->descr_per_ancillary[i] =local_size; + i++; + + } + retinfo->retflag = msg.msg_flags; + + // Set the parameters of address + memcpy(address,retinfo->address,retinfo->addrlen); + *addrlen = retinfo->addrlen; + + // Set the parameters of message + no_of_messages[0] = retinfo->no_of_messages; + size_of_ancillary[0] = retinfo->size_of_ancillary; + *length_of_messages = (long*) malloc (sizeof(long) * retinfo->no_of_messages); + //memcpy(*length_of_messages, retinfo->length_of_messages, sizeof(int) * retinfo->no_of_messages); + int counter = 0; + for (i=0; i< retinfo->no_of_messages; i++){ + counter += retinfo->length_of_messages[i]; + length_of_messages[0][i] = retinfo->length_of_messages[i]; + } + memset(*messages, 0, sizeof(char) * counter); + counter = 0; + for(i=0; i< retinfo->no_of_messages; i++){ + memcpy(*messages+counter,retinfo->messages[i],retinfo->length_of_messages[i]); + counter += retinfo->length_of_messages[i]; + } + + // Set the parameters of ancillary + *levels = (long*) malloc (sizeof(long) * retinfo->size_of_ancillary); + *types = (long*) malloc (sizeof(long) * retinfo->size_of_ancillary); + *descr_per_ancillary = (long*) malloc (sizeof(long) * retinfo->size_of_ancillary); + counter = 0; + for (i=0; i < retinfo->size_of_ancillary; i++){ + counter += retinfo->descr_per_ancillary[i]; + // Convert the int* to long* + levels[0][i] = (long) retinfo->levels[i]; + types[0][i] = (long) retinfo->types[i]; + descr_per_ancillary[0][i] = (long) retinfo->descr_per_ancillary[i]; + } + *file_descr = (char*) malloc (sizeof(char) * counter); + memset(*file_descr, 0, sizeof(char) * counter); + counter = 0; + for (i=0; isize_of_ancillary; i++){ + memcpy(*file_descr+counter,retinfo->file_descr[i], retinfo->descr_per_ancillary[i]); + counter += retinfo->descr_per_ancillary[i]; + } + + // Set the retflag + retflag[0] = retinfo->retflag; + + // Free the memory + free(retinfo->address); + free(retinfo->length_of_messages); + free(retinfo->levels); + free(retinfo->types); + free(retinfo->descr_per_ancillary); + for(i = 0; ino_of_messages; i++) + free(retinfo->messages[i]); + for (i = 0; i < retinfo->size_of_ancillary; i++) + free(retinfo->file_descr[i]); + free(retinfo->file_descr); + free(retinfo->messages); + free(retinfo); + free(controlbuf); + + return bytes_recvd; + + fail: + if (anc_alloc){ + free(retinfo->file_descr); + free(retinfo->levels); + free(retinfo->types); + free(retinfo->descr_per_ancillary); + free(retinfo->length_of_messages); + free(retinfo->messages[0]); + free(retinfo->messages); + free(retinfo->address); + free(retinfo); + free(controlbuf); + + }else{ + if (iov_alloc){ + free(retinfo->length_of_messages); + free(retinfo->messages[0]); + free(retinfo->messages); + free(retinfo->address); + free(controlbuf); + free(retinfo); + } + } + return error_flag; + + err_closefds: + // Special case for UNIX sockets. In case file descriptors are received, they need to be closed. + // Taken from CPython + #ifdef SCM_RIGHTS + /* Close all descriptors coming from SCM_RIGHTS, so they don't leak. */ + for (cmsgh = ((msg.msg_controllen > 0) ? CMSG_FIRSTHDR(&msg) : NULL); + cmsgh != NULL; cmsgh = CMSG_NXTHDR(&msg, cmsgh)) { + size_t dataleng; + cmsg_status = get_cmsg_data_len(&msg, cmsgh, &dataleng); + cmsgdatalen = (int) dataleng; + if (cmsg_status < 0) + break; + if (cmsgh->cmsg_level == SOL_SOCKET && + cmsgh->cmsg_type == SCM_RIGHTS) { + size_t numfds; + int *fdp; + + numfds = cmsgdatalen / sizeof(int); + fdp = (int *)CMSG_DATA(cmsgh); + while (numfds-- > 0) + close(*fdp++); + } + if (cmsg_status != 0) + break; + } + #endif /* SCM_RIGHTS */ + goto fail; + } + + + // ################################################################################################ + // Sendmsg implementation and associated functions + + #ifdef CMSG_LEN + static int + get_CMSG_LEN(size_t length, size_t *result) + { + size_t tmp; + + if (length > (SOCKLEN_T_LIMIT - CMSG_LEN(0))) + return 0; + tmp = CMSG_LEN(length); + if ((tmp > SOCKLEN_T_LIMIT) || (tmp < length)) + return 0; + *result = tmp; + return 1; + } + #endif + + #ifdef CMSG_SPACE + /* If length is in range, set *result to CMSG_SPACE(length) and return + true; otherwise, return false. */ + static int + get_CMSG_SPACE(size_t length, size_t *result) + { + size_t tmp; + + /* Use CMSG_SPACE(1) here in order to take account of the padding + necessary before *and* after the data. */ + if (length > (SOCKLEN_T_LIMIT - CMSG_SPACE(1))) + return 0; + tmp = CMSG_SPACE(length); + if ((tmp > SOCKLEN_T_LIMIT) || (tmp < length)) + return 0; + *result = tmp; + return 1; + } + #endif + + /* + sendmsg_implementation is a wrapper over sendmsg of the API. + It was inspired from the way CPython did their implementation of this. + The main reason that it was written in C, is the struct msghdr, + which contains the ancillary data in a linked list of cmsghdr structures. + It was simpler to use it in C, and then push the simpler types of data via rffi. + */ + RPY_EXTERN + int sendmsg_implementation + (int socket, + struct sockaddr* address, + socklen_t addrlen, + long* length_of_messages, + char** messages, + int no_of_messages, + long* levels, + long* types, + char** file_descriptors, + long* no_of_fds, + int control_length, + int flag + ) + { + + struct msghdr msg = {0}; + struct cmsghdr *cmsg; + void* controlbuf = NULL; + int retval; + size_t i; + + // Prepare the msghdr structure for the send: + + // Add the address + if (address != NULL) { + msg.msg_name = address; + msg.msg_namelen = addrlen; + } + + // Add the message + struct iovec *iovs = NULL; + if (no_of_messages > 0){ + + iovs = (struct iovec*) malloc(no_of_messages * sizeof(struct iovec)); + memset(iovs, 0, no_of_messages * sizeof(struct iovec)); + msg.msg_iov = iovs; + msg.msg_iovlen = no_of_messages; + + for (i=0; i< no_of_messages; i++){ + iovs[i].iov_base = messages[i]; + iovs[i].iov_len = length_of_messages[i]; + } + } + + // Add the ancillary + #ifndef CMSG_SPACE + if (control_length > 1){ + free(iovs); + return MUL_MSGS_NOT_SUP; + } + #endif + if (control_length > 0){ + + //compute the total size of the ancillary + //getting the exact amount of space can be tricky and os dependent. + size_t total_size_of_ancillary = 0; + size_t space; + size_t controllen = 0, controllen_last = 0; + for (i = 0; i< control_length; i++){ + total_size_of_ancillary = no_of_fds[i]; + #ifdef CMSG_SPACE + if (!get_CMSG_SPACE(total_size_of_ancillary, &space)) { + #else + if (!get_CMSG_LEN(total_size_of_ancillary, &space)) { + #endif + if (iovs != NULL) + free(iovs); + return ANC_DATA_TOO_LARGE; + } + controllen +=space; + if ((controllen > SOCKLEN_T_LIMIT) || (controllen < controllen_last)) { + if (iovs != NULL) + free(iovs); + return ANC_DATA_TOO_LARGEX; + } + controllen_last = controllen; + } + + controlbuf = malloc(controllen); + msg.msg_control= controlbuf; + msg.msg_controllen = controllen; + + // memset controlbuf to 0 to avoid trash in the ancillary + memset(controlbuf, 0, controllen); + cmsg = NULL; + for (i = 0; i< control_length; i++){ + cmsg = (i == 0) ? CMSG_FIRSTHDR(&msg) : CMSG_NXTHDR(&msg, cmsg); + + cmsg->cmsg_level = (int) levels[i]; + cmsg->cmsg_type = (int) types[i]; + cmsg->cmsg_len = CMSG_LEN(sizeof(char) * no_of_fds[i]); + memcpy(CMSG_DATA(cmsg), file_descriptors[i], sizeof(char) * no_of_fds[i]); + } + + + } + // Add the flags + msg.msg_flags = flag; + + // Send the data + retval = sendmsg(socket, &msg, flag); + + // free everything that was allocated here, and we would not need in rsocket + if (iovs != NULL) + free(iovs); + if (controlbuf !=NULL) + free(controlbuf); + + return retval; + } + + // ################################################################################################ + // Wrappers for CMSG_SPACE and CMSG_LEN + + /* + These 2 functions are wrappers over sys/socket.h's CMSG_SPACE and CMSG_LEN. + They are identical to CPython's. + */ + #ifdef CMSG_SPACE + RPY_EXTERN + size_t CMSG_SPACE_wrapper(size_t desired_space){ + size_t result; + if (!get_CMSG_SPACE(desired_space, &result)){ + return 0; + } + return result; + } + #endif + + #ifdef CMSG_LEN + RPY_EXTERN + size_t CMSG_LEN_wrapper(size_t desired_len){ + size_t result; + if (!get_CMSG_LEN(desired_len, &result)){ + return 0; + } + return result; + } + #endif + + // ################################################################################################ + // Extra functions that I needed + + /* + This function is used to memcpy from a char* at an offset. + Could not get rffi.c_memcpy to do it at an offset, so I made my own. + */ + RPY_EXTERN + int memcpy_from_CCHARP_at_offset_and_size(char* stringfrom, char** stringto, int offset, int size){ + *stringto = memcpy(*stringto, stringfrom + offset, size); + return 0; + } + + /* + These functions free memory that was allocated in C (sendmsg or recvmsg) was used in rsocket and now needs cleanup + */ + RPY_EXTERN + int free_pointer_to_signedp(int** ptrtofree){ + free(*ptrtofree); + return 0; + } + + RPY_EXTERN + int free_ptr_to_charp(char** ptrtofree){ + free(*ptrtofree); + return 0; + } + + ''',] + + post_include_bits =[ "RPY_EXTERN " + "int sendmsg_implementation(int socket, struct sockaddr* address, socklen_t addrlen, long* length_of_messages, char** messages, int no_of_messages, long* levels, long* types, char** file_descriptors, long* no_of_fds, int control_length, int flag );\n" + "RPY_EXTERN " + "int recvmsg_implementation(int socket_fd, int message_size, int ancillary_size, int flags, struct sockaddr* address, socklen_t* addrlen, long** length_of_messages, char** messages, long* no_of_messages, long* size_of_ancillary, long** levels, long** types, char** file_descr, long** descr_per_ancillary, long* flag);\n" + "static " + "int cmsg_min_space(struct msghdr *msg, struct cmsghdr *cmsgh, size_t space);\n" + "static " + "int get_cmsg_data_space(struct msghdr *msg, struct cmsghdr *cmsgh, size_t *space);\n" + "static " + "int get_cmsg_data_len(struct msghdr *msg, struct cmsghdr *cmsgh, size_t *data_len);\n" + "static " + "int get_CMSG_LEN(size_t length, size_t *result);\n" + "static " + "int get_CMSG_SPACE(size_t length, size_t *result);\n" + "RPY_EXTERN " + "size_t CMSG_LEN_wrapper(size_t desired_len);\n" + "RPY_EXTERN " + "size_t CMSG_SPACE_wrapper(size_t desired_space);\n" + "RPY_EXTERN " + "int memcpy_from_CCHARP_at_offset_and_size(char* stringfrom, char** stringto, int offset, int size);\n" + "RPY_EXTERN " + "int free_pointer_to_signedp(int** ptrtofree);\n" + "RPY_EXTERN " + "int free_ptr_to_charp(char** ptrtofree);\n" + ] + + + compilation_info = ExternalCompilationInfo( + includes=includes, + separate_module_sources=separate_module_sources, + post_include_bits=post_include_bits, + ) + if _WIN32: CConfig.WSAEVENT = platform.SimpleType('WSAEVENT', rffi.VOIDP) CConfig.WSANETWORKEVENTS = platform.Struct( @@ -387,6 +1035,7 @@ sockaddr_ptr.TO.become(cConfig.sockaddr) addrinfo_ptr.TO.become(cConfig.addrinfo) + # fill in missing constants with reasonable defaults cConfig.NI_MAXHOST = cConfig.NI_MAXHOST or 1025 cConfig.NI_MAXSERV = cConfig.NI_MAXSERV or 32 @@ -571,11 +1220,32 @@ recvfrom = external('recvfrom', [socketfd_type, rffi.VOIDP, size_t, rffi.INT, sockaddr_ptr, socklen_t_ptr], rffi.INT, save_err=SAVE_ERR) +recvmsg = jit.dont_look_inside(rffi.llexternal("recvmsg_implementation", + [rffi.INT, rffi.INT, rffi.INT, rffi.INT,sockaddr_ptr, socklen_t_ptr, rffi.SIGNEDPP, rffi.CCHARPP, + rffi.SIGNEDP,rffi.SIGNEDP, rffi.SIGNEDPP, rffi.SIGNEDPP, rffi.CCHARPP, rffi.SIGNEDPP, rffi.SIGNEDP], + rffi.INT, save_err=SAVE_ERR, + compilation_info=compilation_info)) + +memcpy_from_CCHARP_at_offset = jit.dont_look_inside(rffi.llexternal("memcpy_from_CCHARP_at_offset_and_size", + [rffi.CCHARP, rffi.CCHARPP,rffi.INT,rffi.INT],rffi.INT,save_err=SAVE_ERR,compilation_info=compilation_info)) +freeccharp = jit.dont_look_inside(rffi.llexternal("free_ptr_to_charp", + [rffi.CCHARPP],rffi.INT,save_err=SAVE_ERR,compilation_info=compilation_info)) +freesignedp = jit.dont_look_inside(rffi.llexternal("free_pointer_to_signedp", + [rffi.SIGNEDPP],rffi.INT,save_err=SAVE_ERR,compilation_info=compilation_info)) + send = external('send', [socketfd_type, rffi.CCHARP, size_t, rffi.INT], ssize_t, save_err=SAVE_ERR) sendto = external('sendto', [socketfd_type, rffi.VOIDP, size_t, rffi.INT, sockaddr_ptr, socklen_t], ssize_t, save_err=SAVE_ERR) +sendmsg = jit.dont_look_inside(rffi.llexternal("sendmsg_implementation", + [rffi.INT, sockaddr_ptr, socklen_t, rffi.SIGNEDP, rffi.CCHARPP, rffi.INT, + rffi.SIGNEDP, rffi.SIGNEDP, rffi.CCHARPP, rffi.SIGNEDP, rffi.INT, rffi.INT], + rffi.INT, save_err=SAVE_ERR, + compilation_info=compilation_info)) +CMSG_SPACE = jit.dont_look_inside(rffi.llexternal("CMSG_SPACE_wrapper",[size_t], size_t, save_err=SAVE_ERR,compilation_info=compilation_info)) +CMSG_LEN = jit.dont_look_inside(rffi.llexternal("CMSG_LEN_wrapper",[size_t], size_t, save_err=SAVE_ERR,compilation_info=compilation_info)) + socketshutdown = external('shutdown', [socketfd_type, rffi.INT], rffi.INT, save_err=SAVE_ERR) gethostname = external('gethostname', [rffi.CCHARP, rffi.INT], rffi.INT, diff --git a/rpython/rlib/rsocket.py b/rpython/rlib/rsocket.py --- a/rpython/rlib/rsocket.py +++ b/rpython/rlib/rsocket.py @@ -963,6 +963,126 @@ return (read_bytes, address) raise self.error_handler() + @jit.dont_look_inside + def recvmsg(self, message_size, ancbufsize = 0, flags = 0): + """ + Receive up to message_size bytes from a message. Also receives ancillary data. + Returns the message, ancillary, flag and address of the sender. + :param message_size: Maximum size of the message to be received + :param ancbufsize: Maximum size of the ancillary data to be received + :param flags: Receive flag. For more details, please check the Unix manual + :return: a tuple consisting of the message, the ancillary data, return flag and the address. + """ + if message_size < 0: + raise RSocketError("Invalid message size") + if ancbufsize < 0: + raise RSocketError("invalid ancillary data buffer length") + + self.wait_for_data(False) + address, addr_p, addrlen_p = self._addrbuf() + len_of_msgs = lltype.malloc(rffi.SIGNEDPP.TO,1,flavor='raw',track_allocation=True,nonmovable=False) + messages = lltype.malloc(rffi.CCHARPP.TO,1,flavor='raw',track_allocation=True,nonmovable=False ) + messages[0] = lltype.malloc(rffi.CCHARP.TO, message_size,flavor='raw',track_allocation=True,nonmovable=False) + rffi.c_memset(messages[0], 0, message_size) + no_of_messages = lltype.malloc(rffi.SIGNEDP.TO,1,flavor='raw',track_allocation=True,nonmovable=False ) + no_of_messages[0] = rffi.cast(rffi.SIGNED, 0) + size_of_anc = lltype.malloc(rffi.SIGNEDP.TO,1,flavor='raw',track_allocation=True,nonmovable=False ) + size_of_anc[0] = rffi.cast(rffi.SIGNED,0) + levels = lltype.malloc(rffi.SIGNEDPP.TO,1,flavor='raw',track_allocation=True,nonmovable=False) + types = lltype.malloc(rffi.SIGNEDPP.TO,1,flavor='raw',track_allocation=True,nonmovable=False) + file_descr = lltype.malloc(rffi.CCHARPP.TO,1,flavor='raw',track_allocation=True,nonmovable=False ) + descr_per_anc = lltype.malloc(rffi.SIGNEDPP.TO,1,flavor='raw',track_allocation=True,nonmovable=False) + retflag = lltype.malloc(rffi.SIGNEDP.TO,1,flavor='raw',track_allocation=True,nonmovable=False ) + retflag[0] = rffi.cast(rffi.SIGNED,0) + + # a mask for the SIGNEDP's that need to be cast to int. (long default) + reply = _c.recvmsg(self.fd, rffi.cast(lltype.Signed,message_size), + rffi.cast(lltype.Signed,ancbufsize),rffi.cast(lltype.Signed,flags), + addr_p, addrlen_p, len_of_msgs, messages, no_of_messages,size_of_anc, + levels, types,file_descr,descr_per_anc,retflag) + if reply >= 0: + anc_size = rffi.cast(rffi.SIGNED,size_of_anc[0]) + returnflag = rffi.cast(rffi.SIGNED,retflag[0]) + addrlen = rffi.cast(rffi.SIGNED,addrlen_p[0]) + + retmsg = rffi.charpsize2str(messages[0],reply) + + offset = 0 + list_of_tuples = [] + + pre_anc = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw', track_allocation=True, nonmovable=False) + for i in range(anc_size): + level = rffi.cast(rffi.SIGNED, levels[0][i]) + type = rffi.cast(rffi.SIGNED, types[0][i]) + bytes_in_anc = rffi.cast(rffi.SIGNED, descr_per_anc[0][i]) + pre_anc[0] = lltype.malloc(rffi.CCHARP.TO, bytes_in_anc,flavor='raw',track_allocation=True,nonmovable=False) + _c.memcpy_from_CCHARP_at_offset(file_descr[0], pre_anc,rffi.cast(rffi.SIGNED,offset), bytes_in_anc) + anc = rffi.charpsize2str(pre_anc[0],bytes_in_anc) + tup = (level,type, anc) + list_of_tuples.append(tup) + offset += bytes_in_anc + lltype.free(pre_anc[0], flavor='raw') + + if addrlen: + address.addrlen = addrlen + else: + address.unlock() + address = None + + rettup = (retmsg,list_of_tuples,returnflag,address) + + if address is not None: + address.unlock() + # free underlying complexity first + _c.freeccharp(file_descr) + _c.freesignedp(len_of_msgs) + _c.freesignedp(levels) + _c.freesignedp(types) + _c.freesignedp(descr_per_anc) + + lltype.free(messages[0], flavor='raw') + lltype.free(pre_anc,flavor='raw') + lltype.free(messages,flavor='raw') + lltype.free(file_descr,flavor='raw') + lltype.free(len_of_msgs,flavor='raw') + lltype.free(no_of_messages, flavor='raw') + lltype.free(size_of_anc, flavor='raw') + lltype.free(levels, flavor='raw') + lltype.free(types, flavor='raw') + lltype.free(descr_per_anc, flavor='raw') + lltype.free(retflag, flavor='raw') + lltype.free(addrlen_p,flavor='raw') + + return rettup + else: + + #in case of failure the underlying complexity has already been freed + lltype.free(messages[0], flavor='raw') + lltype.free(messages, flavor='raw') + lltype.free(file_descr, flavor='raw') + lltype.free(len_of_msgs, flavor='raw') + lltype.free(no_of_messages, flavor='raw') + lltype.free(size_of_anc, flavor='raw') + lltype.free(levels, flavor='raw') + lltype.free(types, flavor='raw') + lltype.free(descr_per_anc, flavor='raw') + lltype.free(retflag, flavor='raw') + lltype.free(addrlen_p, flavor='raw') + + if address is not None: + address.unlock() + if _c.geterrno() == _c.EINTR: + raise last_error() + if (reply == -10000): + raise RSocketError("Invalid message size") + if (reply == -10001): + raise RSocketError("Invalid ancillary data buffer length") + if (reply == -10002): + raise RSocketError("received malformed or improperly truncated ancillary data") + raise last_error() + + + def send_raw(self, dataptr, length, flags=0): """Send data from a CCHARP buffer.""" self.wait_for_data(True) @@ -1009,6 +1129,86 @@ raise self.error_handler() return res + @jit.dont_look_inside + def sendmsg(self, messages, ancillary=None, flags=0, address=None): + """ + Send data and ancillary on a socket. For use of ancillary data, please check the Unix manual. + Work on connectionless sockets via the address parameter. + :param messages: a message that is a list of strings + :param ancillary: data to be sent separate from the message body. Needs to be a list of tuples. + E.g. [(level,type, bytes),...]. Default None. + :param flags: the flag to be set for sendmsg. Please check the Unix manual regarding values. Default 0 + :param address: address of the recepient. Useful for when sending on connectionless sockets. Default None + :return: Bytes sent from the message + """ + need_to_free_address = True + if address is None: + need_to_free_address = False + addr = lltype.nullptr(_c.sockaddr) + addrlen = 0 + else: + addr = address.lock() + addrlen = address.addrlen + + no_of_messages = len(messages) + messages_ptr = lltype.malloc(rffi.CCHARPP.TO,no_of_messages+1,flavor='raw',track_allocation=True,nonmovable=False) + messages_length_ptr = lltype.malloc(rffi.SIGNEDP.TO,no_of_messages,flavor='raw',zero=True, track_allocation=True,nonmovable=False) + counter = 0 + for message in messages: + messages_ptr[counter] = rffi.str2charp(message) + messages_length_ptr[counter] = rffi.cast(rffi.SIGNED, len(message)) + counter += 1 + messages_ptr[counter] = lltype.nullptr(rffi.CCHARP.TO) + if ancillary is not None: + size_of_ancillary = len(ancillary) + else: + size_of_ancillary = 0 + levels = lltype.malloc(rffi.SIGNEDP.TO, size_of_ancillary,flavor='raw',zero=True, track_allocation=True,nonmovable=False) + types = lltype.malloc(rffi.SIGNEDP.TO, size_of_ancillary,flavor='raw',zero=True, track_allocation=True,nonmovable=False) + desc_per_ancillary = lltype.malloc(rffi.SIGNEDP.TO, size_of_ancillary,flavor='raw',zero=True, track_allocation=True,nonmovable=False) + file_descr = lltype.malloc(rffi.CCHARPP.TO, size_of_ancillary,flavor='raw', track_allocation=True,nonmovable=False) + if ancillary is not None: + counter = 0 + for level, type, content in ancillary: + assert isinstance(type,int) + assert isinstance(level, int) + levels[counter] = rffi.cast(rffi.SIGNED,level) + types[counter] = rffi.cast(rffi.SIGNED,type) + desc_per_ancillary[counter] = rffi.cast(rffi.SIGNED, (len(content))) + file_descr[counter] = rffi.str2charp(content, track_allocation=True) + counter +=1 + else: + size_of_ancillary = 0 + snd_no_msgs = rffi.cast(rffi.SIGNED, no_of_messages) + snd_anc_size =rffi.cast(rffi.SIGNED, size_of_ancillary) + + + bytes_sent = _c.sendmsg(self.fd, addr, addrlen, messages_length_ptr, messages_ptr, snd_no_msgs,levels,types,file_descr,desc_per_ancillary,snd_anc_size,flags) + + + if need_to_free_address: + address.unlock() + for i in range(len(messages)): + lltype.free(messages_ptr[i], flavor='raw', track_allocation=True) + lltype.free(messages_ptr, flavor='raw', track_allocation=True) + lltype.free(messages_length_ptr, flavor='raw', track_allocation=True) + + if size_of_ancillary > 0: + for i in range(len(ancillary)): + lltype.free(file_descr[i], flavor='raw', track_allocation=True) + lltype.free(desc_per_ancillary, flavor='raw', track_allocation=True) + lltype.free(types, flavor='raw', track_allocation=True) + lltype.free(levels, flavor='raw', track_allocation=True) + lltype.free(file_descr, flavor='raw', track_allocation=True) + + self.wait_for_data(True) + if (bytes_sent < 0) and (bytes_sent!=-1000) and (bytes_sent!=-1001) and (bytes_sent!=-1002): + raise last_error() + + return bytes_sent + + + def setblocking(self, block): if block: timeout = -1.0 @@ -1190,6 +1390,31 @@ return (make_socket(fd0, family, type, proto, SocketClass), make_socket(fd1, family, type, proto, SocketClass)) +if _c._POSIX: + def CMSG_LEN( demanded_len): + """ + Socket method to determine the optimal byte size of the ancillary. + Recommended to be used when computing the ancillary size for recvmsg. + :param demanded_len: an integer with the minimum size required. + :return: an integer with the minimum memory needed for the required size. The value is not memory alligned + """ + if demanded_len < 0: + return 0 + result = _c.CMSG_LEN(demanded_len) + return result + + def CMSG_SPACE( demanded_size): + """ + Socket method to determine the optimal byte size of the ancillary. + Recommended to be used when computing the ancillary size for recvmsg. + :param demanded_size: an integer with the minimum size required. + :return: an integer with the minimum memory needed for the required size. The value is memory alligned + """ + if demanded_size < 0: + return 0 + result = _c.CMSG_SPACE(demanded_size) + return result + if _c.WIN32: def dup(fd, inheritable=True): with lltype.scoped_alloc(_c.WSAPROTOCOL_INFO, zero=True) as info: diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -752,7 +752,8 @@ # Signed, Signed * SIGNED = lltype.Signed -SIGNEDP = lltype.Ptr(lltype.Array(SIGNED, hints={'nolength': True})) +SIGNEDP = lltype.Ptr(lltype.Array(lltype.Signed, hints={'nolength': True})) +SIGNEDPP = lltype.Ptr(lltype.Array(SIGNEDP, hints={'nolength': True})) # various type mapping From pypy.commits at gmail.com Thu Aug 31 07:50:55 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 31 Aug 2017 04:50:55 -0700 (PDT) Subject: [pypy-commit] pypy py3.5-sendmsg-recvmsg: Close branch py3.5-sendmsg-recvmsg Message-ID: <59a7f81f.358edf0a.db521.1dcb@mx.google.com> Author: Ronan Lamy Branch: py3.5-sendmsg-recvmsg Changeset: r92285:ffb1e878bd5f Date: 2017-08-31 11:50 +0000 http://bitbucket.org/pypy/pypy/changeset/ffb1e878bd5f/ Log: Close branch py3.5-sendmsg-recvmsg From pypy.commits at gmail.com Thu Aug 31 08:46:36 2017 From: pypy.commits at gmail.com (stevie_92) Date: Thu, 31 Aug 2017 05:46:36 -0700 (PDT) Subject: [pypy-commit] pypy cpyext-gc-trialdeletion: Added more tests Message-ID: <59a8052c.02da1c0a.194b9.0570@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-trialdeletion Changeset: r92287:74fa1f758dc8 Date: 2017-08-31 14:45 +0200 http://bitbucket.org/pypy/pypy/changeset/74fa1f758dc8/ Log: Added more tests diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -441,3 +441,47 @@ assert r1.base.c_ob_refcnt & REFCNT_MASK == 2 assert r2.base.c_ob_refcnt & REFCNT_MASK == 1 + def test_multiple_cycles_partial_free(self): + self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + r1 = self._rawrefcount_cycle_obj() + r2 = self._rawrefcount_cycle_obj() + r3 = self._rawrefcount_cycle_obj() + r4 = self._rawrefcount_cycle_obj() + r5 = self._rawrefcount_cycle_obj() + r1.next = r2 + r2.next = r3 + r3.next = r1 + r2.prev = r5 + r5.next = r4 + r4.next = r5 + r5.base.c_ob_refcnt += 1 + r4.base.c_ob_refcnt += 1 + self._rawrefcount_buffer_obj(r1) + self.gc.rrc_collect_cycles() + assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 + assert r2.base.c_ob_refcnt & REFCNT_MASK == 0 + assert r3.base.c_ob_refcnt & REFCNT_MASK == 0 + assert r4.base.c_ob_refcnt & REFCNT_MASK == 2 + assert r5.base.c_ob_refcnt & REFCNT_MASK == 1 + + def test_multiple_cycles_all_free(self): + self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + r1 = self._rawrefcount_cycle_obj() + r2 = self._rawrefcount_cycle_obj() + r3 = self._rawrefcount_cycle_obj() + r4 = self._rawrefcount_cycle_obj() + r5 = self._rawrefcount_cycle_obj() + r1.next = r2 + r2.next = r3 + r3.next = r1 + r2.prev = r5 + r5.next = r4 + r4.next = r5 + r5.base.c_ob_refcnt += 1 + self._rawrefcount_buffer_obj(r1) + self.gc.rrc_collect_cycles() + assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 + assert r2.base.c_ob_refcnt & REFCNT_MASK == 0 + assert r3.base.c_ob_refcnt & REFCNT_MASK == 0 + assert r4.base.c_ob_refcnt & REFCNT_MASK == 0 + assert r5.base.c_ob_refcnt & REFCNT_MASK == 0 From pypy.commits at gmail.com Thu Aug 31 11:44:20 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 31 Aug 2017 08:44:20 -0700 (PDT) Subject: [pypy-commit] cffi default: Write down an explicit example of what not to do Message-ID: <59a82ed4.10301c0a.1cc6a.342e@mx.google.com> Author: Armin Rigo Branch: Changeset: r3006:3d609382a4b8 Date: 2017-08-31 17:44 +0200 http://bitbucket.org/cffi/cffi/changeset/3d609382a4b8/ Log: Write down an explicit example of what not to do diff --git a/doc/source/ref.rst b/doc/source/ref.rst --- a/doc/source/ref.rst +++ b/doc/source/ref.rst @@ -51,6 +51,9 @@ data can be used as long as this object is kept alive, but must not be used for a longer time. Be careful about that when copying the pointer to the memory somewhere else, e.g. into another structure. +Also, this means that a line like ``x = ffi.new(...)[0]`` is *always +wrong:* the newly allocated object goes out of scope instantly, and so +is freed immediately, and ``x`` is garbage. The returned memory is initially cleared (filled with zeroes), before the optional initializer is applied. For performance, see From pypy.commits at gmail.com Thu Aug 31 12:29:31 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 31 Aug 2017 09:29:31 -0700 (PDT) Subject: [pypy-commit] pypy multiphase: Add _testmultiphase module, for test_importlib Message-ID: <59a8396b.010b1c0a.4403c.35ac@mx.google.com> Author: Ronan Lamy Branch: multiphase Changeset: r92288:236f54a091d0 Date: 2017-08-31 17:02 +0100 http://bitbucket.org/pypy/pypy/changeset/236f54a091d0/ Log: Add _testmultiphase module, for test_importlib diff --git a/lib-python/3/test/test_importlib/extension/test_loader.py b/lib-python/3/test/test_importlib/extension/test_loader.py --- a/lib-python/3/test/test_importlib/extension/test_loader.py +++ b/lib-python/3/test/test_importlib/extension/test_loader.py @@ -88,6 +88,7 @@ def setUp(self): self.name = '_testmultiphase' + __import__(self.name) # PyPy hack finder = self.machinery.FileFinder(None) self.spec = importlib.util.find_spec(self.name) assert self.spec diff --git a/lib_pypy/_testmultiphase.c b/lib_pypy/_testmultiphase.c new file mode 100644 --- /dev/null +++ b/lib_pypy/_testmultiphase.c @@ -0,0 +1,627 @@ +/* Copied from CPython's Modules/_testmultiphase.c */ +/***************************************************/ + +/* Testing module for multi-phase initialization of extension modules (PEP 489) + */ + +#include "Python.h" + +/* Example objects */ +typedef struct { + PyObject_HEAD + PyObject *x_attr; /* Attributes dictionary */ +} ExampleObject; + +/* Example methods */ + +static int +Example_traverse(ExampleObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->x_attr); + return 0; +} + +static int +Example_finalize(ExampleObject *self) +{ + Py_CLEAR(self->x_attr); + return 0; +} + +static PyObject * +Example_demo(ExampleObject *self, PyObject *args) +{ + PyObject *o = NULL; + if (!PyArg_ParseTuple(args, "|O:demo", &o)) + return NULL; + if (o != NULL && PyUnicode_Check(o)) { + Py_INCREF(o); + return o; + } + Py_INCREF(Py_None); + return Py_None; +} + + +static PyMethodDef Example_methods[] = { + {"demo", (PyCFunction)Example_demo, METH_VARARGS, + PyDoc_STR("demo() -> None")}, + {NULL, NULL} /* sentinel */ +}; + +static PyObject * +Example_getattro(ExampleObject *self, PyObject *name) +{ + if (self->x_attr != NULL) { + PyObject *v = PyDict_GetItem(self->x_attr, name); + if (v != NULL) { + Py_INCREF(v); + return v; + } + } + return PyObject_GenericGetAttr((PyObject *)self, name); +} + +static int +Example_setattr(ExampleObject *self, char *name, PyObject *v) +{ + if (self->x_attr == NULL) { + self->x_attr = PyDict_New(); + if (self->x_attr == NULL) + return -1; + } + if (v == NULL) { + int rv = PyDict_DelItemString(self->x_attr, name); + if (rv < 0) + PyErr_SetString(PyExc_AttributeError, + "delete non-existing Example attribute"); + return rv; + } + else + return PyDict_SetItemString(self->x_attr, name, v); +} + +static PyType_Slot Example_Type_slots[] = { + {Py_tp_doc, "The Example type"}, + {Py_tp_finalize, Example_finalize}, + {Py_tp_traverse, Example_traverse}, + {Py_tp_getattro, Example_getattro}, + {Py_tp_setattr, Example_setattr}, + {Py_tp_methods, Example_methods}, + {0, 0}, +}; + +static PyType_Spec Example_Type_spec = { + "_testimportexec.Example", + sizeof(ExampleObject), + 0, + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, + Example_Type_slots +}; + +/* Function of two integers returning integer */ + +PyDoc_STRVAR(testexport_foo_doc, +"foo(i,j)\n\ +\n\ +Return the sum of i and j."); + +static PyObject * +testexport_foo(PyObject *self, PyObject *args) +{ + long i, j; + long res; + if (!PyArg_ParseTuple(args, "ll:foo", &i, &j)) + return NULL; + res = i + j; + return PyLong_FromLong(res); +} + +/* Test that PyState registration fails */ + +PyDoc_STRVAR(call_state_registration_func_doc, +"register_state(0): call PyState_FindModule()\n\ +register_state(1): call PyState_AddModule()\n\ +register_state(2): call PyState_RemoveModule()"); + +static PyObject * +call_state_registration_func(PyObject *mod, PyObject *args) +{ + int i, ret; + PyModuleDef *def = PyModule_GetDef(mod); + if (def == NULL) { + return NULL; + } + if (!PyArg_ParseTuple(args, "i:call_state_registration_func", &i)) + return NULL; + switch (i) { + case 0: + mod = PyState_FindModule(def); + if (mod == NULL) { + Py_RETURN_NONE; + } + return mod; + case 1: + ret = PyState_AddModule(mod, def); + if (ret != 0) { + return NULL; + } + break; + case 2: + ret = PyState_RemoveModule(def); + if (ret != 0) { + return NULL; + } + break; + } + Py_RETURN_NONE; +} + + +static PyType_Slot Str_Type_slots[] = { + {Py_tp_base, NULL}, /* filled out in module exec function */ + {0, 0}, +}; + +static PyType_Spec Str_Type_spec = { + "_testimportexec.Str", + 0, + 0, + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, + Str_Type_slots +}; + +static PyMethodDef testexport_methods[] = { + {"foo", testexport_foo, METH_VARARGS, + testexport_foo_doc}, + {"call_state_registration_func", call_state_registration_func, + METH_VARARGS, call_state_registration_func_doc}, + {NULL, NULL} /* sentinel */ +}; + +static int execfunc(PyObject *m) +{ + PyObject *temp = NULL; + + /* Due to cross platform compiler issues the slots must be filled + * here. It's required for portability to Windows without requiring + * C++. */ + Str_Type_slots[0].pfunc = &PyUnicode_Type; + + /* Add a custom type */ + temp = PyType_FromSpec(&Example_Type_spec); + if (temp == NULL) + goto fail; + if (PyModule_AddObject(m, "Example", temp) != 0) + goto fail; + + /* Add an exception type */ + temp = PyErr_NewException("_testimportexec.error", NULL, NULL); + if (temp == NULL) + goto fail; + if (PyModule_AddObject(m, "error", temp) != 0) + goto fail; + + /* Add Str */ + temp = PyType_FromSpec(&Str_Type_spec); + if (temp == NULL) + goto fail; + if (PyModule_AddObject(m, "Str", temp) != 0) + goto fail; + + if (PyModule_AddIntConstant(m, "int_const", 1969) != 0) + goto fail; + + if (PyModule_AddStringConstant(m, "str_const", "something different") != 0) + goto fail; + + return 0; + fail: + return -1; +} + +/* Helper for module definitions; there'll be a lot of them */ +#define TEST_MODULE_DEF(name, slots, methods) { \ + PyModuleDef_HEAD_INIT, /* m_base */ \ + name, /* m_name */ \ + PyDoc_STR("Test module " name), /* m_doc */ \ + 0, /* m_size */ \ + methods, /* m_methods */ \ + slots, /* m_slots */ \ + NULL, /* m_traverse */ \ + NULL, /* m_clear */ \ + NULL, /* m_free */ \ +} + +PyModuleDef_Slot main_slots[] = { + {Py_mod_exec, execfunc}, + {0, NULL}, +}; + +static PyModuleDef main_def = TEST_MODULE_DEF("main", main_slots, testexport_methods); + +PyMODINIT_FUNC +PyInit__testmultiphase(PyObject *spec) +{ + return PyModuleDef_Init(&main_def); +} + + +/**** Importing a non-module object ****/ + +static PyModuleDef def_nonmodule; +static PyModuleDef def_nonmodule_with_methods; + +/* Create a SimpleNamespace(three=3) */ +static PyObject* +createfunc_nonmodule(PyObject *spec, PyModuleDef *def) +{ + PyObject *dct, *ns, *three; + + if (def != &def_nonmodule && def != &def_nonmodule_with_methods) { + PyErr_SetString(PyExc_SystemError, "def does not match"); + return NULL; + } + + dct = PyDict_New(); + if (dct == NULL) + return NULL; + + three = PyLong_FromLong(3); + if (three == NULL) { + Py_DECREF(dct); + return NULL; + } + PyDict_SetItemString(dct, "three", three); + Py_DECREF(three); + + ns = _PyNamespace_New(dct); + Py_DECREF(dct); + return ns; +} + +static PyModuleDef_Slot slots_create_nonmodule[] = { + {Py_mod_create, createfunc_nonmodule}, + {0, NULL}, +}; + +static PyModuleDef def_nonmodule = TEST_MODULE_DEF( + "_testmultiphase_nonmodule", slots_create_nonmodule, NULL); + +PyMODINIT_FUNC +PyInit__testmultiphase_nonmodule(PyObject *spec) +{ + return PyModuleDef_Init(&def_nonmodule); +} + +PyDoc_STRVAR(nonmodule_bar_doc, +"bar(i,j)\n\ +\n\ +Return the difference of i - j."); + +static PyObject * +nonmodule_bar(PyObject *self, PyObject *args) +{ + long i, j; + long res; + if (!PyArg_ParseTuple(args, "ll:bar", &i, &j)) + return NULL; + res = i - j; + return PyLong_FromLong(res); +} + +static PyMethodDef nonmodule_methods[] = { + {"bar", nonmodule_bar, METH_VARARGS, nonmodule_bar_doc}, + {NULL, NULL} /* sentinel */ +}; + +static PyModuleDef def_nonmodule_with_methods = TEST_MODULE_DEF( + "_testmultiphase_nonmodule_with_methods", slots_create_nonmodule, nonmodule_methods); + +PyMODINIT_FUNC +PyInit__testmultiphase_nonmodule_with_methods(PyObject *spec) +{ + return PyModuleDef_Init(&def_nonmodule_with_methods); +} + +/**** Non-ASCII-named modules ****/ + +static PyModuleDef def_nonascii_latin = { \ + PyModuleDef_HEAD_INIT, /* m_base */ + "_testmultiphase_nonascii_latin", /* m_name */ + PyDoc_STR("Module named in Czech"), /* m_doc */ + 0, /* m_size */ + NULL, /* m_methods */ + NULL, /* m_slots */ + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL, /* m_free */ +}; + +PyMODINIT_FUNC +PyInitU__testmultiphase_zkouka_naten_evc07gi8e(PyObject *spec) +{ + return PyModuleDef_Init(&def_nonascii_latin); +} + +static PyModuleDef def_nonascii_kana = { \ + PyModuleDef_HEAD_INIT, /* m_base */ + "_testmultiphase_nonascii_kana", /* m_name */ + PyDoc_STR("Module named in Japanese"), /* m_doc */ + 0, /* m_size */ + NULL, /* m_methods */ + NULL, /* m_slots */ + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL, /* m_free */ +}; + +PyMODINIT_FUNC +PyInitU_eckzbwbhc6jpgzcx415x(PyObject *spec) +{ + return PyModuleDef_Init(&def_nonascii_kana); +} + +/*** Module with a single-character name ***/ + +PyMODINIT_FUNC +PyInit_x(PyObject *spec) +{ + return PyModuleDef_Init(&main_def); +} + +/**** Testing NULL slots ****/ + +static PyModuleDef null_slots_def = TEST_MODULE_DEF( + "_testmultiphase_null_slots", NULL, NULL); + +PyMODINIT_FUNC +PyInit__testmultiphase_null_slots(PyObject *spec) +{ + return PyModuleDef_Init(&null_slots_def); +} + +/**** Problematic modules ****/ + +static PyModuleDef_Slot slots_bad_large[] = { + {_Py_mod_LAST_SLOT + 1, NULL}, + {0, NULL}, +}; + +static PyModuleDef def_bad_large = TEST_MODULE_DEF( + "_testmultiphase_bad_slot_large", slots_bad_large, NULL); + +PyMODINIT_FUNC +PyInit__testmultiphase_bad_slot_large(PyObject *spec) +{ + return PyModuleDef_Init(&def_bad_large); +} + +static PyModuleDef_Slot slots_bad_negative[] = { + {-1, NULL}, + {0, NULL}, +}; + +static PyModuleDef def_bad_negative = TEST_MODULE_DEF( + "_testmultiphase_bad_slot_negative", slots_bad_negative, NULL); + +PyMODINIT_FUNC +PyInit__testmultiphase_bad_slot_negative(PyObject *spec) +{ + return PyModuleDef_Init(&def_bad_negative); +} + +static PyModuleDef def_create_int_with_state = { \ + PyModuleDef_HEAD_INIT, /* m_base */ + "create_with_state", /* m_name */ + PyDoc_STR("Not a PyModuleObject object, but requests per-module state"), + 10, /* m_size */ + NULL, /* m_methods */ + slots_create_nonmodule, /* m_slots */ + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL, /* m_free */ +}; + +PyMODINIT_FUNC +PyInit__testmultiphase_create_int_with_state(PyObject *spec) +{ + return PyModuleDef_Init(&def_create_int_with_state); +} + + +static PyModuleDef def_negative_size = { \ + PyModuleDef_HEAD_INIT, /* m_base */ + "negative_size", /* m_name */ + PyDoc_STR("PyModuleDef with negative m_size"), + -1, /* m_size */ + NULL, /* m_methods */ + slots_create_nonmodule, /* m_slots */ + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL, /* m_free */ +}; + +PyMODINIT_FUNC +PyInit__testmultiphase_negative_size(PyObject *spec) +{ + return PyModuleDef_Init(&def_negative_size); +} + + +static PyModuleDef uninitialized_def = TEST_MODULE_DEF("main", main_slots, testexport_methods); + +PyMODINIT_FUNC +PyInit__testmultiphase_export_uninitialized(PyObject *spec) +{ + return (PyObject*) &uninitialized_def; +} + +PyMODINIT_FUNC +PyInit__testmultiphase_export_null(PyObject *spec) +{ + return NULL; +} + +PyMODINIT_FUNC +PyInit__testmultiphase_export_raise(PyObject *spec) +{ + PyErr_SetString(PyExc_SystemError, "bad export function"); + return NULL; +} + +PyMODINIT_FUNC +PyInit__testmultiphase_export_unreported_exception(PyObject *spec) +{ + PyErr_SetString(PyExc_SystemError, "bad export function"); + return PyModuleDef_Init(&main_def); +} + +static PyObject* +createfunc_null(PyObject *spec, PyModuleDef *def) +{ + return NULL; +} + +PyModuleDef_Slot slots_create_null[] = { + {Py_mod_create, createfunc_null}, + {0, NULL}, +}; + +static PyModuleDef def_create_null = TEST_MODULE_DEF( + "_testmultiphase_create_null", slots_create_null, NULL); + +PyMODINIT_FUNC +PyInit__testmultiphase_create_null(PyObject *spec) +{ + return PyModuleDef_Init(&def_create_null); +} + +static PyObject* +createfunc_raise(PyObject *spec, PyModuleDef *def) +{ + PyErr_SetString(PyExc_SystemError, "bad create function"); + return NULL; +} + +static PyModuleDef_Slot slots_create_raise[] = { + {Py_mod_create, createfunc_raise}, + {0, NULL}, +}; + +static PyModuleDef def_create_raise = TEST_MODULE_DEF( + "_testmultiphase_create_null", slots_create_raise, NULL); + +PyMODINIT_FUNC +PyInit__testmultiphase_create_raise(PyObject *spec) +{ + return PyModuleDef_Init(&def_create_raise); +} + +static PyObject* +createfunc_unreported_exception(PyObject *spec, PyModuleDef *def) +{ + PyErr_SetString(PyExc_SystemError, "bad create function"); + return PyModule_New("foo"); +} + +static PyModuleDef_Slot slots_create_unreported_exception[] = { + {Py_mod_create, createfunc_unreported_exception}, + {0, NULL}, +}; + +static PyModuleDef def_create_unreported_exception = TEST_MODULE_DEF( + "_testmultiphase_create_unreported_exception", slots_create_unreported_exception, NULL); + +PyMODINIT_FUNC +PyInit__testmultiphase_create_unreported_exception(PyObject *spec) +{ + return PyModuleDef_Init(&def_create_unreported_exception); +} + +static PyModuleDef_Slot slots_nonmodule_with_exec_slots[] = { + {Py_mod_create, createfunc_nonmodule}, + {Py_mod_exec, execfunc}, + {0, NULL}, +}; + +static PyModuleDef def_nonmodule_with_exec_slots = TEST_MODULE_DEF( + "_testmultiphase_nonmodule_with_exec_slots", slots_nonmodule_with_exec_slots, NULL); + +PyMODINIT_FUNC +PyInit__testmultiphase_nonmodule_with_exec_slots(PyObject *spec) +{ + return PyModuleDef_Init(&def_nonmodule_with_exec_slots); +} + +static int +execfunc_err(PyObject *mod) +{ + return -1; +} + +static PyModuleDef_Slot slots_exec_err[] = { + {Py_mod_exec, execfunc_err}, + {0, NULL}, +}; + +static PyModuleDef def_exec_err = TEST_MODULE_DEF( + "_testmultiphase_exec_err", slots_exec_err, NULL); + +PyMODINIT_FUNC +PyInit__testmultiphase_exec_err(PyObject *spec) +{ + return PyModuleDef_Init(&def_exec_err); +} + +static int +execfunc_raise(PyObject *spec) +{ + PyErr_SetString(PyExc_SystemError, "bad exec function"); + return -1; +} + +static PyModuleDef_Slot slots_exec_raise[] = { + {Py_mod_exec, execfunc_raise}, + {0, NULL}, +}; + +static PyModuleDef def_exec_raise = TEST_MODULE_DEF( + "_testmultiphase_exec_raise", slots_exec_raise, NULL); + +PyMODINIT_FUNC +PyInit__testmultiphase_exec_raise(PyObject *mod) +{ + return PyModuleDef_Init(&def_exec_raise); +} + +static int +execfunc_unreported_exception(PyObject *mod) +{ + PyErr_SetString(PyExc_SystemError, "bad exec function"); + return 0; +} + +static PyModuleDef_Slot slots_exec_unreported_exception[] = { + {Py_mod_exec, execfunc_unreported_exception}, + {0, NULL}, +}; + +static PyModuleDef def_exec_unreported_exception = TEST_MODULE_DEF( + "_testmultiphase_exec_unreported_exception", slots_exec_unreported_exception, NULL); + +PyMODINIT_FUNC +PyInit__testmultiphase_exec_unreported_exception(PyObject *spec) +{ + return PyModuleDef_Init(&def_exec_unreported_exception); +} + +/*** Helper for imp test ***/ + +static PyModuleDef imp_dummy_def = TEST_MODULE_DEF("imp_dummy", main_slots, testexport_methods); + +PyMODINIT_FUNC +PyInit_imp_dummy(PyObject *spec) +{ + return PyModuleDef_Init(&imp_dummy_def); +} diff --git a/lib_pypy/_testmultiphase.py b/lib_pypy/_testmultiphase.py new file mode 100644 --- /dev/null +++ b/lib_pypy/_testmultiphase.py @@ -0,0 +1,18 @@ +import imp +import os + +try: + import cpyext +except ImportError: + raise ImportError("No module named '_testmultiphase'") +import _pypy_testcapi +cfile = '_testmultiphase.c' +thisdir = os.path.dirname(__file__) +output_dir = _pypy_testcapi.get_hashed_dir(os.path.join(thisdir, cfile)) +try: + fp, filename, description = imp.find_module('_test_multiphase', path=[output_dir]) + with fp: + imp.load_module('_testmultiphase', fp, filename, description) +except ImportError: + print('could not find _testmultiphase in %s' % output_dir) + _pypy_testcapi.compile_shared('_testmultiphase.c', '_testmultiphase', output_dir) From pypy.commits at gmail.com Thu Aug 31 12:29:33 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 31 Aug 2017 09:29:33 -0700 (PDT) Subject: [pypy-commit] pypy multiphase: disable _testmultiphase.call_state_registration_func() Message-ID: <59a8396d.d8aadf0a.71600.06c6@mx.google.com> Author: Ronan Lamy Branch: multiphase Changeset: r92289:8de4e0b09ce1 Date: 2017-08-31 17:23 +0100 http://bitbucket.org/pypy/pypy/changeset/8de4e0b09ce1/ Log: disable _testmultiphase.call_state_registration_func() diff --git a/lib-python/3/test/test_importlib/extension/test_loader.py b/lib-python/3/test/test_importlib/extension/test_loader.py --- a/lib-python/3/test/test_importlib/extension/test_loader.py +++ b/lib-python/3/test/test_importlib/extension/test_loader.py @@ -146,7 +146,8 @@ importlib.reload(module) self.assertIs(ex_class, module.Example) - def test_try_registration(self): + # XXX: PyPy doesn't support the PyState_* functions yet + def XXXtest_try_registration(self): '''Assert that the PyState_{Find,Add,Remove}Module C API doesn't work''' module = self.load_module() with self.subTest('PyState_FindModule'): diff --git a/lib_pypy/_testmultiphase.c b/lib_pypy/_testmultiphase.c --- a/lib_pypy/_testmultiphase.c +++ b/lib_pypy/_testmultiphase.c @@ -119,43 +119,43 @@ /* Test that PyState registration fails */ -PyDoc_STRVAR(call_state_registration_func_doc, -"register_state(0): call PyState_FindModule()\n\ -register_state(1): call PyState_AddModule()\n\ -register_state(2): call PyState_RemoveModule()"); - -static PyObject * -call_state_registration_func(PyObject *mod, PyObject *args) -{ - int i, ret; - PyModuleDef *def = PyModule_GetDef(mod); - if (def == NULL) { - return NULL; - } - if (!PyArg_ParseTuple(args, "i:call_state_registration_func", &i)) - return NULL; - switch (i) { - case 0: - mod = PyState_FindModule(def); - if (mod == NULL) { - Py_RETURN_NONE; - } - return mod; - case 1: - ret = PyState_AddModule(mod, def); - if (ret != 0) { - return NULL; - } - break; - case 2: - ret = PyState_RemoveModule(def); - if (ret != 0) { - return NULL; - } - break; - } - Py_RETURN_NONE; -} +//PyDoc_STRVAR(call_state_registration_func_doc, +//"register_state(0): call PyState_FindModule()\n\ +//register_state(1): call PyState_AddModule()\n\ +//register_state(2): call PyState_RemoveModule()"); +// +//static PyObject * +//call_state_registration_func(PyObject *mod, PyObject *args) +//{ +// int i, ret; +// PyModuleDef *def = PyModule_GetDef(mod); +// if (def == NULL) { +// return NULL; +// } +// if (!PyArg_ParseTuple(args, "i:call_state_registration_func", &i)) +// return NULL; +// switch (i) { +// case 0: +// mod = PyState_FindModule(def); +// if (mod == NULL) { +// Py_RETURN_NONE; +// } +// return mod; +// case 1: +// ret = PyState_AddModule(mod, def); +// if (ret != 0) { +// return NULL; +// } +// break; +// case 2: +// ret = PyState_RemoveModule(def); +// if (ret != 0) { +// return NULL; +// } +// break; +// } +// Py_RETURN_NONE; +//} static PyType_Slot Str_Type_slots[] = { @@ -174,8 +174,8 @@ static PyMethodDef testexport_methods[] = { {"foo", testexport_foo, METH_VARARGS, testexport_foo_doc}, - {"call_state_registration_func", call_state_registration_func, - METH_VARARGS, call_state_registration_func_doc}, +// {"call_state_registration_func", call_state_registration_func, +// METH_VARARGS, call_state_registration_func_doc}, {NULL, NULL} /* sentinel */ }; From pypy.commits at gmail.com Thu Aug 31 12:29:35 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 31 Aug 2017 09:29:35 -0700 (PDT) Subject: [pypy-commit] pypy multiphase: Disable not-yet-supported tp_finalize slot in _testmultiphase Message-ID: <59a8396f.89c4df0a.72ff5.0a3d@mx.google.com> Author: Ronan Lamy Branch: multiphase Changeset: r92290:9edde9447569 Date: 2017-08-31 17:28 +0100 http://bitbucket.org/pypy/pypy/changeset/9edde9447569/ Log: Disable not-yet-supported tp_finalize slot in _testmultiphase diff --git a/lib_pypy/_testmultiphase.c b/lib_pypy/_testmultiphase.c --- a/lib_pypy/_testmultiphase.c +++ b/lib_pypy/_testmultiphase.c @@ -83,7 +83,7 @@ static PyType_Slot Example_Type_slots[] = { {Py_tp_doc, "The Example type"}, - {Py_tp_finalize, Example_finalize}, +// {Py_tp_finalize, Example_finalize}, {Py_tp_traverse, Example_traverse}, {Py_tp_getattro, Example_getattro}, {Py_tp_setattr, Example_setattr},