[SciPy-dev] denoising spatial point process data
Sturla Molden
sturla at molden.no
Fri Jan 9 11:37:31 EST 2009
If it is of interest I'll donate a useful algorithm of denoising spatial
point process data to scipy.spatial. It works by fitting a mixture of two
Poisson processes (signal + noise) using an EM algorithm. It was
originally developed for US DoD to detect minefields using reconnaissance
aircraft images. I use it on neurophysiological spike data to remove
artifact waveforms. It seems to be very efficient.
Regards,
Sturla Molden
# Written by Sturla Molden
import numpy
from scipy.special import gamma
def nnclean(K, knn, ndim, convergence=0.01):
"""
K-th nearest neighbour clutter removal
Algorithm from: Byers, S & Raftery, A (1998). Nearest-Neighbor
Clutter Removal for estimating Features in Spatial Point
Processes. J. Am. Stat. Assoc., 93: 577-585.
Input:
K: neighbour to use (K=5 often works well)
Dk: distance to K-th nearest neighbour.
Obtain by searching a scipy.spatial.cKDTree with p=2
ndim: number of dimensions in the data set from which Dk was computed.
Output:
signal: indices of signal points
noise: indices of clutter points
delta: likelihood that a point is signal
loglike: total log-likelihood of the fit
"""
# size of data
d, n = ndim, len(Dk)
# calculate volume of N-D hypersphere
ad = (2*numpy.pi**(d/2.0))/(d*gamma(d/2.0))
#select a random starting point
delta = numpy.random.rand(n)
l1 = K * delta.sum()/(ad*numpy.sum((Dk**d)*delta))
l2 = K*numpy.sum(1.0-delta)/(ad*numpy.sum((Dk**d)*(1.0-delta)))
#l1[numpy.where(l1<1E-12)[0]] = 1E-12
#l2[numpy.where(l2<1E-12)[0]] = 1E-12
p = numpy.sum(delta)/n
old_loglike = numpy.sum( -p*l1*ad*((Dk**d)*delta) -
(1.0-p)*l2*ad*((Dk**d)*(1.0-delta))
+ delta*K*numpy.log(l1*ad) + (1.0-delta)*K*numpy.log(l2*ad) )
# estimate Poisson mixture by EM updates
term = 100
loglike = [old_loglike]
counter = 0
run = 1
old_classification = numpy.round(delta)
while run:
new_delta, l1, l2, p = EM_update(Dk,K,n,d,ad,delta,l1,l2,p)
#l1[numpy.where(l1<1E-12)[0]] = 1E-12
#l2[numpy.where(l2<1E-12)[0]] = 1E-12
new_loglike = numpy.sum( -p*l1*ad*((Dk**d)*new_delta) -
(1-p)*l2*ad*((Dk**d)*(1.0-new_delta))
+ new_delta*K*numpy.log(l1*ad) +
(1.0-new_delta)*K*numpy.log(l2*ad) )
update = 100*(new_loglike - old_loglike)/new_loglike
# Force at least 3 EM updates, quit after 50.
# Quit if EM starts to oscillate.
counter = counter + 1
if counter < 3:
run = 1
elif counter > 50:
run = 0
elif old_loglike > new_loglike:
run = 0
classification = old_classification
new_loglike = old_loglike;
new_delta = delta
elif update < convergence:
run = 0
old_loglike = new_loglike
loglike.append(new_loglike)
delta = new_delta
if l2 > l1:
delta = 1.0 - delta
idx, = numpy.where(delta < 1E-6)
delta[idx] = 1E-6
signal = numpy.where(delta > .5)[0]
noise = numpy.where(delta <= .5)[0]
# done
return signal, noise, delta, loglike
#Single EM update
def EM_update(Dk,K,n,d,ad,delta,l1,l2,p):
factorial = lambda K : numpy.arange(1.0,K+1.0).prod() if K > 1 else 1.0
pdf = lambda x,K,l,d: numpy.exp(-l*(x**d)) * 2 * (l**K) * x**(d*K - 1)
/ factorial(K-1)
# E-step
P1 = pdf(Dk,K,l1*ad,d)
P2 = pdf(Dk,K,l2*ad,d)
index, = numpy.where((P1 == 0.0) * (P2 == 0.0))
delta[index] = .5
index, = numpy.where((P1 != 0.0) + (P2 != 0.0))
delta[index] = p*P1[index]/(p*P1[index] + (1.0-p)*P2[index])
# M-step
l1 = K*numpy.sum(delta)/(ad*numpy.sum((Dk**d)*delta))
l2 = K*numpy.sum(1.0-delta)/(ad*numpy.sum((Dk**d)*(1.0-delta)))
p = numpy.sum(delta)/n
return delta, l1, l2, p
More information about the SciPy-Dev
mailing list