[issue1087418] long int bitwise ops speedup (patch included)

Mark Dickinson report at bugs.python.org
Sat Oct 24 11:23:15 CEST 2009


Mark Dickinson <dickinsm at gmail.com> added the comment:

Here's the inline assembly version, for comparison:

#define SUM2_BIN64(sumhigh, sumlow, ahigh, alow, bhigh, blow)    \
  __asm__ ("addq\t%5, %1\n\t"                                    \
           "adcq\t%3, %0"                                        \
           : "=r" (sumhigh), "=&r" (sumlow)                      \
           : "0" ((uint64_t)(ahigh)), "rm" ((uint64_t)(bhigh)),  \
             "%1" ((uint64_t)(alow)), "rm" ((uint64_t)(blow))    \
           : "cc")

void
add_128_asm(uint64_t *sumhigh, uint64_t *sumlow,
           uint64_t ahigh, uint64_t alow,
           uint64_t bhigh, uint64_t blow)
{
  SUM2_BIN64(ahigh, alow, ahigh, alow, bhigh, blow);
  *sumlow = alow;
  *sumhigh = ahigh;
}

And the generated output (again gcc-4.4 with -O3):

_add_128_asm:
LFB1:
        pushq   %rbp
LCFI2:
# 29 "test.c" 1
        addq    %r9, %rcx
        adcq    %r8, %rdx
# 0 "" 2
        movq    %rsp, %rbp
LCFI3:
        movq    %rcx, (%rsi)
        movq    %rdx, (%rdi)
        leave
        ret

----------

_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue1087418>
_______________________________________


More information about the Python-bugs-list mailing list