[SciPy-User] Conversion to graph adjacency list
elastica at laposte.net
elastica at laposte.net
Sat Dec 8 09:26:28 EST 2018
Thanks Robert for your hepfull response, now I better understand how csr conversion works.
I'm benchmarking some graph libraries. My edges-to-csr conversion was hand-written in Python, so very slow! Now, with your code, execution time is 7 times faster!
Here is the code, perhaps we can optimize more:
# =============================================
from time import clock
from sys import stderr, stdin
from scipy.sparse.csgraph import minimum_spanning_tree
from scipy.sparse import csr_matrix
import numpy as np
# ------------ Scipy Code ---------------------------
def wedges2adj(edges, n):
G=[[]for _ in range(n)]
for a, b, w in edges:
G[a].append((b,w))
G[b].append((a,w))
return G
def wedges2scr(edges, n):
G=wedges2adj(edges, n)
indptr=[0]
cnt=0
for line in G:
cnt+=len(line)
indptr.append(cnt)
data=[]
indices=[]
for i in range(n):
for (j,w) in G[i]:
data.append(w)
indices.append(j)
return [data, indptr, indices]
def csr2wedges(Mcsr, shape):
n, p=shape
k=0
edges=[]
data, cumul, cols = Mcsr.data,Mcsr.indptr, Mcsr.indices
for i in range(n):
for j in range(cumul[i+1]-cumul[i]):
edges.append((i, cols[k], data[k]))
k+=1
return edges
def kruskal_scipy(edges, n):
data, indptr, indices=wedges2scr(edges, n)
csr=csr_matrix((data, indices, indptr), shape=(n, n))
tree=minimum_spanning_tree(csr)
edges=csr2wedges(tree, (n,n))
return int(sum((round(w)) for (a,b,w) in edges))
# °°°°°°°°°°°°°°°°°°°°°°°
def wedges2scr_FAST(wedges, n):
row_ind, col_ind, costs = np.transpose(wedges)
return csr_matrix((costs, (row_ind, col_ind)),
shape=(n,n))
def kruskal_scipy_FAST(wedges, n):
csr=wedges2scr_FAST(wedges, n)
tree=minimum_spanning_tree(csr).tocoo()
return int(round(sum(w for (i,j,w) in zip(tree.row, tree.col, tree.data))))
# ------------ Benchmark ---------------------------
def go(solver, L):
global duration
N=len(L)
k=1
solution=[]
while k<N:
edges=[]
n=L[k]
k+=1
for a in range(n):
d=L[k]
k+=1
for j in range(d):
b=L[k]
k+=1
w=L[k]
k+=1
if a<b:
edges.append([a,b-1,w])
if solver==kruskal_scipy:
data, indptr, indices=wedges2scr(edges, n)
start=clock()
csr=csr_matrix((data, indices, indptr), shape=(n, n))
tree=minimum_spanning_tree(csr)
edges=csr2wedges(tree, (n,n))
sol=solver(edges, n)
duration+=clock()-start
solution.append(sol)
else:
start=clock()
sol=solver(edges, n)
duration+=clock()-start
solution.append(sol)
return solution
# data
L=list(int(z) for z in stdin.read().split() if z.isdigit())
output=[]
solvers=[kruskal_scipy, kruskal_scipy_FAST]
for solver in solvers:
duration=0
costs=go(solver, L)
output.append(costs)
print("%-20s : %.3f" %(solver.__name__, duration), file=stderr)
if all(output[i]== output[0] for i in range(len(solvers))):
print("NO error detected", file=stderr)
else:
print("ERROR detected", file=stderr)
# =============================================
Link to data file (100 graphs with at most 30000 vertices each):
https://drive.google.com/file/d/1BKxRAzJ9jeowbrFPyLZ23EomKn1eqohs/view?usp=sharing
The above code is faster than Networkit library code and little slower than Graph-Tool library.
I have two observations:
1. Python code is only 2 times slower
2. Pure C-code is about 9 times faster
In each case, even SciPy cf. _min_spanning_tree.pyx, algorithm is the same: kruskal with Union-find.
More information about the SciPy-User
mailing list