scipy.cluster kmeans analisis de voz

joolivar en ing.uchile.cl joolivar en ing.uchile.cl
Mie Mar 12 16:19:19 CET 2008


Saludos a todos
Tengo que realizar una análisis de voz para un pequeño diccionario,  
las palabras son (anterior, siguiente). La idea es poder adelantar y  
retroceder por voz las paginas de impress (openppt). Para ello he  
realizado las siguientes rutinas.
El problema es que el error de distorsión para todas las palabras es  
casi igual
por lo que no puedo diferenciar entre ellas.
Me gustaría saber si alguien ha usado kmeans y fft de scilab para  
corroborar que
realmente funciona ya que si por ejemplo genero el codebook para la  
palabra 'anterior' y luego uso vq para encontrar el error de  
distorsión este entrega un valor de 480 que parece exagerado. Junto a  
esto otras palabras entregan casi igual valor.
Cualquier ayuda se agradece.

Pd:Para la parte de impress pienso usar UNO con python
-----------------------para el codebook--------------------
from  wav_array import wavread
from scipy.fftpack import fft,rfft
from math import log
from scipy.cluster.vq import whiten,kmeans
from scipy import hamming
import pickle
def leer_book(nombre):
     f=open(nombre,'r')
     book=pickle.load(f)
     f.close()
     return book
def guardar_book(archivo,datos):
     f=open(archivo,'w')
     pickle.dump(datos,f)

def ventanas_wav(archivo_wav):
     [data,Fs,Bits]=wavread(wv)
     N=len(data)
     V=256
     M=N/V
     IN=0
     OUT=V-1
     F=[]
     C=[]
     for i in range(M-V):
         ft=abs(fft(data[IN:OUT]*hamming(V-1)))
         F.append(ft)
         C.append(rfft(map(lambda x: log(x), ft)))
         IN=IN+V
         OUT=OUT+V
     return whiten(F),whiten(C)
print '\n----Inicio----'
wv='/home/joolivar/Desktop/Nueva/voz/muestras/anterior1.wav'
[F,C]=ventanas_wav(wv)
CC='CFT_book.pdb'
FF='FFT_book.pdb'
CFTB=leer_book(CC)
print CFTB[0]
FFTB=leer_book(FF)
CDBKF=kmeans(F, FFTB[0], iter=80, thresh=1e-5)
CDBKC=kmeans(C, CFTB[0], iter=80, thresh=1e-5)
print 'guardando datos'
guardar_book('FFT_book.pdb',CDBKF)
guardar_book('CFT_book.pdb',CDBKC)
print '----FIN----'


------------------para el análisis--------------
import pickle,commands,string
from scipy.cluster.vq import whiten,vq
from  wav_array import wavread
from scipy.fftpack import fft,fftshift,rfft
from math import log,sqrt
from pylab import hamming,fromstring, Int16, UInt8,  
clip,plot,xlabel,ylabel,title,grid,savefig,show,subplot,figure
def ceptrums(datos):
     N=len(data)
     V=256
     M=N/V
     IN=0
     OUT=V-1
     F=[]
     C=[]
     for i in range(M-V):
         ft=abs(fft(datos[IN:OUT]*hamming(V-1)))
         F.append(ft)
         C.append((rfft(map(lambda x: log(x), ft))))
         IN=IN+V
         OUT=OUT+V
     return whiten(F),whiten(C)
def leer_book(nombre):
     f=open(nombre,'r')
     book=pickle.load(f)
     f.close()
     return book
def leer_palabra(nombre):
     [data,Fs,Bits]=wavread(nombre)
     return data

print '\n+++Inicio+++\n'
C='CFT_book.pdb'
F='FFT_book.pdb'
CFTB=leer_book(C)
FFTB=leer_book(F)
lista=string.split(commands.getoutput('ls  
/home/joolivar/Desktop/Nueva/voz/muestras/*.wav'),'\n')
i=0
CL=[]
figure(1)
for list in lista:
     i=i+1
     ##nombre=string.split(list,'muestras/')[1][:-3]+'png'
     data=leer_palabra(list)
     [FF,CC]=ceptrums(data)
     [INF,VQF]=vq(FF,CFTB[0])
     [INC,VQC]=vq(CC,FFTB[0])
     subplot(810+i)
     plot(INF,'x')
     subplot(820+i)
     plot(INC,'x')
     print sum(VQF)/len(VQF),sum(VQC)/len(VQC)
show()
print '++++fin++++'

------------wav_array---------
#!/usr/bin/python
# -*- coding: UTF-8 -*-

#
# Copyright (C) 2006 by Hernán Ordiales
# <audiocode en uint8.com.ar>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#

try:
	import wave
	from sys import exit
	import math
	import array
	from pylab import fromstring, Int16, UInt8,  
clip,plot,xlabel,ylabel,title,grid,savefig,show
	from FFT import fft, inverse_fft
	from numpy.fft import fftshift
except:
	print 'libraries import error!'
	sys.exit()


# Note: at the moment only mono wav files

# Example: [ y, Fs, bits ] = wavread( 'filename' )
# Note: Only supports 8 and 16 bits wav files
def wavread( name ):
	file = wave.open( name, 'r' )
	[Channels,Bytes,Fs,Frames,Compress,CompressName] = file.getparams() #  
(nchannels, sampwidth in bytes, sampling frequency, nframes, comptype,  
compname)
	Data = file.readframes( Frames )
	Bits = Bytes*8
	if Bits==16:  # 16 bits per sample
		Data = fromstring( Data, Int16 ) / 32767.0 # -1..1 values, Int16  
because Bits=2x8=16
	elif Bits==8: # 8 bits per sample
		Data = (fromstring( Data, UInt8 ) / 128.0 ) - 1.0 # -1..1 values
	else:
		print "Error. Sorry, this wavread function only supports 8 or 16  
bits wav files."
		return -1, -1, -1
	file.close()
##	print "Fs: ",Fs,"\nBits: ",Bits,"\nChannels: ",Channels
	return Data, Fs, Bits

# Example: wavwrite( y, Fs, filename )
def wavwrite( data_array, Fs, name ):
	file = wave.open( name, 'w' )
	file.setframerate( Fs ) # sets sampling frequency
	file.setnchannels( 1 ) # sets number of channels
	file.setsampwidth( 2 ) # number of bytes: 16bits/8=2, 16 bits per sample

	clipped = False
	block_size = 1024*10 # write block size: 10k
	a_max = 32767 # max amp
	a_min = -32767 # min amp
	n = 0
	len_data_array = len( data_array ) # 2 bytes (int16) data
	while n < len_data_array :
		frame = '' # string frame of 'block_size'
		for i in range( block_size ) :
			if n < len_data_array :
				twodatabytes = int( data_array[n] * a_max )
				if twodatabytes > a_max or twodatabytes < a_min : clipped = True
				twodatabytes = min( max(twodatabytes,a_min), a_max ) #  
normalization, -32767..32767
				#twodatabytes.clip( min=a_min, max=a_max ) # normalization, -32767..32767
				frame += chr( twodatabytes & 0xFF ) # takes first byte, converts  
it to char and adds it to the frame
				frame += chr( (twodatabytes >> 8) & 0xFF ) # takes the second byte
				n += 1
		file.writeframes( frame )
	if clipped == True : print "Warning: Some values were clipped"
	print "Final length:", len_data_array/512,"kb" # n*2/1024 (bytes  
size/1024) = n/512
	file.close()

# Example: wavwrite8bits( y, Fs, filename )
def wavwrite8bits( data_array, Fs, name ):
	file = wave.open( name, 'w' )
	file.setframerate( Fs ) # sets sampling frequency
	file.setnchannels( 1 ) # sets number of channels
	file.setsampwidth( 1 ) # number of bytes, 8 bits per sample

	clipped = False
	block_size = 1024*10 # write block size: 10k
	a_max = 255 # max amp
	a_min = 0 # min amp
	n = 0
	len_data_array = len( data_array ) # 1 byte (UInt8) data
	while n < len_data_array :
		frame = '' # string frame of 'block_size'
		for i in range( block_size ) :
			if n < len_data_array :
				newbyte = int( (data_array[n]+1.0) * 128 ) # ~ 255/2
				if newbyte > a_max or newbyte < a_min : clipped = True
				newbyte = min( max(newbyte,a_min), a_max ) # normalization, 0..255
				#newbyte.clip( min=a_min, max=a_max ) # normalization, 0..255
				frame += chr( newbyte & 0xFF ) # takes the byte, converts it to  
char and adds it to the frame
				n += 1
		file.writeframes( frame )
	if clipped == True : print "Warning: Some values were clipped"
	print "Final length:", len_data_array/512,"kb" # n*2/1024 (bytes  
size/1024) = n/512
	file.close()





----------------------------------------------------------------
This message was sent using IMP, the Internet Messaging Program.

_______________________________________________
Lista de correo Python-es 
http://listas.aditel.org/listinfo/python-es
FAQ: http://listas.aditel.org/faqpyes





Más información sobre la lista de distribución Python-es