[issue1087418] long int bitwise ops speedup (patch included)
Mark Dickinson
report at bugs.python.org
Sat Oct 24 11:23:15 CEST 2009
Mark Dickinson <dickinsm at gmail.com> added the comment:
Here's the inline assembly version, for comparison:
#define SUM2_BIN64(sumhigh, sumlow, ahigh, alow, bhigh, blow) \
__asm__ ("addq\t%5, %1\n\t" \
"adcq\t%3, %0" \
: "=r" (sumhigh), "=&r" (sumlow) \
: "0" ((uint64_t)(ahigh)), "rm" ((uint64_t)(bhigh)), \
"%1" ((uint64_t)(alow)), "rm" ((uint64_t)(blow)) \
: "cc")
void
add_128_asm(uint64_t *sumhigh, uint64_t *sumlow,
uint64_t ahigh, uint64_t alow,
uint64_t bhigh, uint64_t blow)
{
SUM2_BIN64(ahigh, alow, ahigh, alow, bhigh, blow);
*sumlow = alow;
*sumhigh = ahigh;
}
And the generated output (again gcc-4.4 with -O3):
_add_128_asm:
LFB1:
pushq %rbp
LCFI2:
# 29 "test.c" 1
addq %r9, %rcx
adcq %r8, %rdx
# 0 "" 2
movq %rsp, %rbp
LCFI3:
movq %rcx, (%rsi)
movq %rdx, (%rdi)
leave
ret
----------
_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue1087418>
_______________________________________
More information about the Python-bugs-list
mailing list