getting a submatrix of all true
Anton Vredegoor
anton at vredegoor.doge.nl
Fri Jul 4 18:24:01 EDT 2003
John Hunter <jdhunter at ace.bsd.uchicago.edu> wrote:
>Another way of formulating the question: for a sparse boolean matrix
>(sparse on True), what is the optimal way to remove rows and columns
>so that the total number of elements in the matrix is maximal and
>there are no True values left.
After having read Bengts code (and scraping the parts of my exploded
head from the floor) and after later getting the hint about the size
of the problem being 2**N, it finally dawned on me that the problem
would be related to getting all possible combinations of a range.
The following code has the advantage that it generates solutions by
index, for example "M[i]" returns the score and the row and column
information for index i. (note that is not the same as finding the
optimal solution, but it still could be handy to be able to generate a
specific one by index)
However for small matrices it is possible to do exhaustive searching
with "max(M)".
Maybe if this would be combined with some heuristic algorithm it would
be better (genetic algorithm?)
Code below (needs Python 2.3, very lightly tested), I hope this helps,
Anton
class Combinations:
def __init__(self,n,k):
self.n,self.k,self.count = n,k,self.noverk(n,k)
def __getitem__(self,index):
#combination number index
if index > self.count - 1: raise IndexError
res,x,rest = [],self.n,index
for i in range(self.k,0,-1):
while self.noverk(x,i) > rest : x -= 1
rest -= self.noverk(x,i)
res.append(x)
return res
def noverk(self,n,k):
return reduce(lambda a,b: a*(n-b)/(b+1),range(k),1)
class AllCombinations:
def __init__(self,n):
self.n, self.count = n, 2**n
def __getitem__(self,index):
#combination number index for all k in combinations(n,k)
if index > self.count - 1: raise IndexError
n,rest = self.n,index
for k in range(n+1):
c = Combinations(n,k)
if rest - c.count < 0:
return c[rest]
rest -= c.count
class ScoreMatrix:
def __init__(self, X):
self.AC = AllCombinations(len(X))
self.X = X
def __getitem__(self,i):
#score selected rows and columns by index
rows = self.AC[i][::-1]
if rows:
Y = [self.X[i] for i in rows]
Z = [(i,z) for i,z in enumerate(zip(*Y)) if 1 not in z]
cols = [i for i,z in Z]
score = sum(map(len,[z for i,z in Z]))
return score,rows,cols
else: return 0,[],[]
def test():
X = [ [1, 0, 0, 0, 1],
[0, 0, 0, 0, 0],
[0, 0, 0, 1, 0],
[0, 0, 0, 0, 0],
[0, 0, 1, 0, 0],
[0, 0, 1, 0, 0], ]
M = ScoreMatrix(X)
sc,rows,cols = max(M)
print sc
for i,r in enumerate(X):
for j,c in enumerate(r):
if i in rows and j in cols: print c,
else: print 'x',
print
if __name__=='__main__':
test()
output:
16
x x x x x
0 0 x 0 0
x x x x x
0 0 x 0 0
0 0 x 0 0
0 0 x 0 0
More information about the Python-list
mailing list