encoding problems (é and è)

Larry Bates larry.bates at websafe.com
Thu Mar 23 10:55:55 EST 2006


Seems to work fine for me.

>>> x="éÇ"
>>> x=x.replace('é','E')
'E\xc7'
>>> x=x.replace('Ç','C')
>>> x
'E\xc7'
>>> x=x.replace('Ç','C')
>>> x
'EC'

You should also be able to use .upper() method to
uppercase everything in the string in a single statement:

tstr=ligneA.upper()

Note: you should never use 'str' as a variable as
it will mask the built-in str function.

-Larry Bates

bussiere bussiere wrote:
> hi i'am making a program for formatting string,
> or
> i've added :
> #!/usr/bin/python
> # -*- coding: utf-8 -*-
> 
> in the begining of my script but
> 
>  str = str.replace('Ç', 'C')
>         str = str.replace('é', 'E')
>         str = str.replace('É', 'E')
>         str = str.replace('è', 'E')
>         str = str.replace('È', 'E')
>         str = str.replace('ê', 'E')
> 
> 
> doesn't work it put me " and , instead of remplacing é by E
> 
> 
> if someone have an idea it could be great
> 
> regards
> Bussiere
> ps : i've added the whole script under :
> 
> 
> 
> 
> 
> 
> __________________________________________________________________________
> 
> 
> 
> 
> #!/usr/bin/python
> # -*- coding: utf-8 -*-
> import fileinput, glob, string, sys, os, re
> 
> fichA=raw_input("Entrez le nom du fichier d'entree : ")
> print ("\n")
> fichC=raw_input("Entrez le nom du fichier de sortie : ")
> print ("\n")
> normalisation1 = raw_input("Normaliser les adresses 1 (ex : Avenue->
> AV) (O/N) ou A pour tout normaliser \n")
> normalisation1 = normalisation1.upper()
> 
> if normalisation1 != "A":
>     print ("\n")
>     normalisation2 = raw_input("Normaliser les civilités (ex :
> Docteur-> DR) (O/N) \n")
>     normalisation2 = normalisation2.upper()
>     print ("\n")
>     normalisation3 = raw_input("Normaliser les Adresses 2 (ex :
> Place-> PL) (O/N) \n")
>     normalisation3 = normalisation3.upper()
> 
> 
>     normalisation4 = raw_input("Normaliser les caracteres / et - (ex :
> / ->   ) (O/N) \n" )
>     normalisation4 = normalisation4.upper()
> 
> if normalisation1 == "A":
>     normalisation1 = "O"
>     normalisation2 = "O"
>     normalisation3 = "O"
>     normalisation4 = "O"
> 
> 
> fiA=open(fichA,"r")
> fiC=open(fichC,"w")
> 
> 
> compteur = 0
> 
> while 1:
> 
>     ligneA=fiA.readline()
> 
> 
> 
>     if ligneA == "":
> 
>         break
> 
>     if ligneA != "":
>         str = ligneA
>         str = str.replace('a', 'A')
>         str = str.replace('b', 'B')
>         str = str.replace('c', 'C')
>         str = str.replace('d', 'D')
>         str = str.replace('e', 'E')
>         str = str.replace('f', 'F')
>         str = str.replace('g', 'G')
>         str = str.replace('h', 'H')
>         str = str.replace('i', 'I')
>         str = str.replace('j', 'J')
>         str = str.replace('k', 'K')
>         str = str.replace('l', 'L')
>         str = str.replace('m', 'M')
>         str = str.replace('n', 'N')
>         str = str.replace('o', 'O')
>         str = str.replace('p', 'P')
>         str = str.replace('q', 'Q')
>         str = str.replace('r', 'R')
>         str = str.replace('s', 'S')
>         str = str.replace('t', 'T')
>         str = str.replace('u', 'U')
>         str = str.replace('v', 'V')
>         str = str.replace('w', 'W')
>         str = str.replace('x', 'X')
>         str = str.replace('y', 'Y')
>         str = str.replace('z', 'Z')
> 
>         str = str.replace('ç', 'C')
>         str = str.replace('Ç', 'C')
>         str = str.replace('é', 'E')
>         str = str.replace('É', 'E')
>         str = str.replace('è', 'E')
>         str = str.replace('È', 'E')
>         str = str.replace('ê', 'E')
>         str = str.replace('Ê', 'E')
>         str = str.replace('ë', 'E')
>         str = str.replace('Ë', 'E')
>         str = str.replace('ä', 'A')
>         str = str.replace('Ä', 'A')
>         str = str.replace('à', 'A')
>         str = str.replace('À', 'A')
>         str = str.replace('Á', 'A')
>         str = str.replace('Â', 'A')
>         str = str.replace('Ä', 'A')
>         str = str.replace('Ã', 'A')
>         str = str.replace('â', 'A')
>         str = str.replace('Ä', 'A')
>         str = str.replace('ï', 'I')
>         str = str.replace('Ï', 'I')
>         str = str.replace('î', 'I')
>         str = str.replace('Î', 'I')
>         str = str.replace('ô', 'O')
>         str = str.replace('Ô', 'O')
>         str = str.replace('ö', 'O')
>         str = str.replace('Ö', 'O')
>         str = str.replace('Ú','U')
>         str = str.replace('  ', ' ')
>         str = str.replace('   ', ' ')
>         str = str.replace('    ', ' ')
> 
> 
> 
>         if normalisation1 == "O":
>             str = str.replace('AVENUE', 'AV')
>             str = str.replace('BOULEVARD', 'BD')
>             str = str.replace('FAUBOURG', 'FBG')
>             str = str.replace('GENERAL', 'GAL')
>             str = str.replace('COMMANDANT', 'CMDT')
>             str = str.replace('MARECHAL', 'MAL')
>             str = str.replace('PRESIDENT', 'PRDT')
>             str = str.replace('SAINT', 'ST')
>             str = str.replace('SAINTE', 'STE')
>             str = str.replace('LOTISSEMENT', 'LOT')
>             str = str.replace('RESIDENCE', 'RES')
>             str = str.replace('IMMEUBLE', 'IMM')
>             str = str.replace('IMEUBLE', 'IMM')
>             str = str.replace('BATIMENT', 'BAT')
> 
>         if normalisation2 == "O":
>             str = str.replace('MONSIEUR', 'M')
>             str = str.replace('MR', 'M')
>             str = str.replace('MADAME', 'MME')
>             str = str.replace('MADEMOISELLE', 'MLLE')
>             str = str.replace('DOCTEUR', 'DR')
>             str = str.replace('PROFESSEUR', 'PR')
>             str = str.replace('MONSEIGNEUR', 'MGR')
>             str = str.replace('M ME','MME')
> 
> 
>         if normalisation3 == "O":
>             str = str.replace('PLACE', 'PL')
>             str = str.replace('IMPASSE', 'IMP')
>             str = str.replace('ESPLANADE', 'ESP')
>             str = str.replace('ROND POINT', 'RPT')
>             str = str.replace('ROUTE', 'RTE')
>             str = str.replace('PASSAGE', 'PAS')
>             str = str.replace('SQUARE', 'SQ')
>             str = str.replace('ALLEE', 'ALL')
>             str = str.replace('ESCALIER', 'ESC')
>             str = str.replace('ETAGE', 'ETG')
>             str = str.replace('PORTE', 'PTE')
>             str = str.replace('APPARTEMENT', 'APT')
>             str = str.replace('APARTEMENT', 'APT')
>             str = str.replace('AVENUE', 'AV')
>             str = str.replace('BOULEVARD', 'BD')
>             str = str.replace('ZONE D ACTIVITE', 'ZA')
>             str = str.replace('ZONE D ACTIVITEE', 'ZA')
>             str = str.replace('ZONE D AMENAGEMENT CONCERTE', 'ZAC')
>             str = str.replace('ZONE D AMENAGEMENT CONCERTEE', 'ZAC')
>             str = str.replace('ZONE INDUSTRELLE', 'ZI')
>             str = str.replace('CENTRE COMMERCIAL', 'CCAL')
>             str = str.replace('CENTRE', 'CTRE')
>             str = str.replace('C.CIAL','CCAL')
>             str = str.replace('CTRE CIAL','CCAL')
>             str = str.replace('CTRE CCAL','CCAL')
>             str = str.replace('GALERIE','GAL')
>             str = str.replace('MARTYR', 'M')
>             str = str.replace('ANCIENS', 'AC')
>             str = str.replace('ANCIEN', 'AC')
>             str = str.replace('REVEREND PERE','R P')
> 
>         if normalisation4 == "O":
>             str = str.replace(';\"', ' ')
>             str = str.replace('\"', ' ')
>             str = str.replace('\'', ' ')
>             str = str.replace('-', ' ')
>             str = str.replace(',', ' ')
>             str = str.replace('\\', ' ')
>             str = str.replace('\/', ' ')
>             str = str.replace('&', ' ')
>             str = str.replace('%', ' ')
>             str = str.replace('*', ' ')
>             str = str.replace('  ', ' ')
>             str = str.replace('.', ' ')
>             str = str.replace('_', ' ')
>             str = str.replace('   ', ' ')
>             str = str.replace('    ', ' ')
>             str = str.replace('?', ' ')
>             str = str.replace('%', ' ')
>             str = str.replace('|', ' ')
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
>         str = str.replace('  ', ' ')
>         str = str.replace('   ', ' ')
>         str = str.replace('    ', ' ')
>         fiC.write(str)
>         compteur += 1
>         print compteur, "\n"
> 
> 
> print "FINIT"
> fiA.close()
> fiC.close()



More information about the Python-list mailing list