[pypy-issue] [issue901] *** glibc detected *** pypy: corrupted double-linked list: 0x000000000bd76de0 ***
Alexander Milenko
tracker at bugs.pypy.org
Mon Oct 10 20:47:16 CEST 2011
Alexander Milenko <alvein.flea at gmail.com> added the comment:
I do not know how to give information. them more than 5 million records. Before
committing an error was 8-hour treatment on the nuclear machine with 8 GB of RAM
________________________________________
PyPy bug tracker <tracker at bugs.pypy.org>
<https://bugs.pypy.org/issue901>
________________________________________
-------------- next part --------------
# -*- coding: utf8 -*-
import sys
import os
import time
path = os.path.normpath(os.path.join(os.getcwd(), '..'))
sys.path.append(path)
from django.db.models.query_utils import Q
from idea.vasya.models import Idei74Matrix, ProviderMatrix, ProviderMatrixAnalogs, Providers
BITS = {
u'4': 1152921504606846976,
u'5': 2305843009213693952,
u'1': 144115188075855872,
u'0': 72057594037927936,
u'3': 576460752303423488,
u'б': 268435456,
u'а': 134217728,
u'г': 1073741824,
u'в': 536870912,
u'е': 4294967296,
u'д': 2147483648,
u'з': 17179869184,
u'ж': 8589934592,
u'7': 288230376151711744,
u'и': 34359738368,
u'л': 137438953472,
u'к': 68719476736,
u'н': 549755813888,
u'м': 274877906944,
u'п': 2199023255552,
u'о': 1099511627776,
u'Ñ': 8796093022208,
u'Ñ': 4398046511104,
u'Ñ': 35184372088832,
u'Ñ': 17592186044416,
u'Ñ
': 140737488355328,
u'Ñ': 70368744177664,
u'Ñ': 562949953421312,
u'Ñ': 281474976710656,
u'Ñ': 2251799813685248,
u'Ñ': 1125899906842624,
u'6': 144115188075855872,
u'Ñ': 4503599627370496,
u'Ñ': 36028797018963968,
u'Ñ': 9007199254740992,
u'9': 1152921504606846976,
u'8': 576460752303423488,
u'2': 288230376151711744,
u'a': 2,
u'c': 8,
u'b': 4,
u'e': 32,
u'd': 16,
u'g': 128,
u'f': 64,
u'i': 512,
u'h': 256,
u'k': 2048,
u'j': 1024,
u'm': 8192,
u'l': 4096,
u'o': 32768,
u'n': 16384,
u'q': 131072,
u'p': 65536,
u's': 524288,
u'r': 262144,
u'u': 2097152,
u't': 1048576,
u'w': 8388608,
u'v': 4194304,
u'y': 33554432,
u'x': 16777216,
u'z': 67108864,
}
from Queue import Queue
from threading import Thread
class Worker(Thread):
def __init__(self, tasks):
Thread.__init__(self)
self.tasks = tasks
self.daemon = True
self.start()
def __del__(self):
self.tasks = None
self.daemon = None
def run(self):
while True:
func, args, kwargs = self.tasks.get()
try:
func(*args, **kwargs)
except Exception, e:
print "%s %s %s" % (e, args, kwargs)
self.tasks.task_done()
class ThreadPool:
def __init__(self, num_threads):
self.tasks = Queue(num_threads)
for _ in range(num_threads):
Worker(self.tasks)
def __del__(self):
self.tasks = None
def add_task(self, func, *args, **kargs):
self.tasks.put((func, args, kargs))
def awaiting_completion(self):
self.tasks.join()
def start_check(matrix, provider_items):
src_pma = ProviderMatrixAnalogs.objects.filter(provider__in=provider_items, idei__id=matrix.id,
provider__provider=provider_items[0].provider.pk, bad_variant=True)
src_pma = [pma.provider.id for pma in src_pma]
if src_pma:
provider_items = provider_items.filter(~Q(id__in=src_pma))
matrix_hashes = matrix.hash.split(":::") if matrix.hash else []
matrix_frases = matrix.frase.split(":::") if matrix.frase else []
try:
for item in provider_items:
find_zero = False
hashes = item.hash.split(":::")
frases = item.frase.split(":::")
for i, hash in enumerate(hashes):
if not find_zero:
for ii, mh in enumerate(matrix_hashes):
if not find_zero and mh and hash:
r = int(mh) ^ int(hash)
l = 0
for b in BITS:
if r & BITS[b]:
l += 1
if l > 2:
break
if l < 3:
d = distance(matrix_frases[ii], frases[i])
if d < 3:
pma = ProviderMatrixAnalogs()
pma.idei = matrix
pma.provider = item
pma.distance = d
pma.algorythm = 'alg%s' % i
if not d:
find_zero = True
pma.is_analog = True
pma.save(using='master')
except ValueError, e:
print "error on matrix item id: %s" % matrix.id
print "e: %s" % e
print "matrix_hashes: %s" % matrix_hashes
print "matrix_frases: %s" % matrix_frases
print "hashes: %s" % hashes
print "frases: %s" % frases
print "-----"
def distance(a, b):
"Calculates the Levenshtein distance between a and b."
n, m = len(a), len(b)
if n > m:
# Make sure n <= m, to use O(min(n,m)) space
a, b = b, a
n, m = m, n
current_row = range(n + 1) # Keep current and previous row, not entire matrix
for i in range(1, m + 1):
previous_row, current_row = current_row, [i] + [0] * m
for j in range(1, n + 1):
add, delete, change = previous_row[j] + 1, current_row[j - 1] + 1, previous_row[j - 1]
if a[j - 1] != b[i - 1]:
change += 1
current_row[j] = min(add, delete, change)
return current_row[n]
pool = ThreadPool(16)
idea_items = Idei74Matrix.objects.filter(~Q(group=221) & Q(show=True))
providers = Providers.objects.all()
for p in providers:
analogs_items = ProviderMatrixAnalogs.objects.filter(Q(provider__provider__id=p.id) & Q(is_analog=True) & Q(provider__show=True))
bad_items = ProviderMatrixAnalogs.objects.filter(Q(provider__provider__id=p.id) & Q(bad_variant=True))
idea_items_new = [obj.idei.id for obj in analogs_items]
provider_items = [obj.provider.id for obj in analogs_items]
idea_items_new = idea_items.filter(~Q(id__in=idea_items_new) & Q(group__providers=p))
provider_items = ProviderMatrix.objects.filter(Q(provider__id=p.id) & ~Q(id__in=provider_items) & Q(show=True) & Q(hide=False))
if provider_items:
for item in idea_items_new:
pool.add_task(start_check, item, provider_items)
pool.awaiting_completion()
More information about the pypy-issue
mailing list