Extracting/finding strings from a list
Paul McGuire
ptmcg at austin.rr._bogus_.com
Mon Jul 19 12:29:26 EDT 2004
"Steve" <nospam at nopes> wrote in message
news:40fb13f7$1 at clarion.carno.net.au...
> Hi,
>
> I have a very long string, someting like:
>
> DISPLAY=localhost:0.0,FORT_BUFFERED=true,
>
> F_ERROPT1=271\,271\,2\,1\,2\,2\,2\,2,G03BASIS=/opt/g03b05/g03/basis,
> GAMESS=/opt/gamess,GAUSS_ARCHDIR=/opt/g03b05/g03/arch,
>
> GAUSS_EXEDIR=/opt/g03b05/g03/bsd:/opt/g03b05/g03/private:/opt/g03b05/g
> 03,GAUSS_SCR_ROOT=/home/561/345561/scratch,
> GDVBASIS=/opt/g03b05/g03/basis,
>
GMAIN=/opt/g03b05/g03/bsd:/opt/g03b05/g03/private:/opt/g03b05/g03,
> GROUP=e12,GV_DIR=/opt/g03b05/gv,HOST=sc1,
> HOSTTYPE=alpha,INFOPATH=/opt/info,KMP_DUPLICATE_LIB_OK=TRUE,
> KMP_STACKSIZE=10485760,
> LINDA_FORTRAN=f90 -i8 -r8 -omp -reentrancy threaded,
> LINDA_FORTRAN_LINK=f90 -i8 -r8 -omp -reentrancy threaded,
> LOGNAME=345561,MACHTYPE=alpha,MAIL=/var/spool/mail/345561,
>
> MANPATH=/opt/g03b05/g03/bsd:/usr/share/man:/usr/dt/share/man:/usr/loca
>
> l/man:/opt/man:/opt/pbs/man:/opt/rash/man:/usr/opt/mpi/man:/usr/opt/mpi
> /man:/usr/opt/mpi/man,MP_STACK_OVERFLOW=OFF,
> NLSPATH=/usr/lib/nls/msg/%L/%N,OMP_NUM_THREADS=4,ONEEXE=-DONEEXE,
> OSTYPE=osf1,
>
> PERLLIB=/opt/g03b05/g03/bsd,PGI=/usr/pgi,PGIDIR=/usr/pgi/linux86/5.0,
> POSTFL_FORTRAN=f90 -i8 -r8 -omp -reentrancy threaded,PROJECT=e12,
> QCAUX=/opt/qchem-2.02/aux,QCPLATFORM=DEC_ALPHA,
> RMS_PROJECT=e12,RUNCPP=/lib/cpp,SHELL=/opt/rash/bin/tcsh,SHLVL=1,
> QUEUE=normal
>
>
> and I need to extract the value of the variable "GAUSS_EXEDIR". Although
> these are environment variables, I don't have access to them directly.
> These variables are stored in a special file and I need to parse it to
> be able to extract the variable. I wrote the following code for this:
>
> ...
> data = {'gauss_var' : ''}
> allLines = os.popen('cat ./somefile').readlines()
> j = 0
> # Extract GAUSS_EXEDIR from the string
> gaussCont = 0
> variable_list = value.split(",")
> vars = variable_list[0].split("=")
> while len(allLines) > j and vars[0] != "QUEUE":
> var_line = allLines[j]
> var_toks = split(var_line)
> value = var_toks[0]
> variable_list = value.split(",")
> for k in range(len(pvariable_list)):
> if variable_list[k].find("=") == -1:
> if gaussCont == 1:
> data['guass_var']="%s%s"
> %(data['gauss_var'],variable_list[k])
> gaussCont = -1
> break
> # end if
> # end if
>
> vars = variable_list[k].split("=")
> for m in range(len(vars)):
> if vars[m] == "GAUSS_EXEDIR":
> for p in range(1, len(vars)):
> data['gauss_var']="%s%s" %
> (data['gauss_var'], vars[p])
> # end for
> gaussCont = 1
> # end if
> # end for
> # end for
> j += 1
> if gaussCont == -1:
> break
> # end if
> # end while
>
>
> The reason why I look for the word "QUEUE" is because "QUEUE" is the
> last variable expected in a list. After this line, the list continues
> but the variables belong to another use (so basically it's a big file
> full of env variables that belong to different users. Each list begins
> with "DISPLAY" and ends with "QUEUE").
>
> Is there a much simpler way of doing this? That is, extracting/finding
> specific variables/value pairs from a list/string? These loops take up a
> lot of my time and I'm trying to learn better ways of doing the same.
> Thanks!
>
>
> Steve
>
Here is a pyparsing implementation. -- Paul
(download pyparsing at http://pyparsing.sourceforge.net )
search_string = """
DISPLAY=localhost:0.0,FORT_BUFFERED=true,
F_ERROPT1=271\,271\,2\,1\,2\,2\,2\,2,G03BASIS=/opt/g03b05/g03/basis,
GAMESS=/opt/gamess,GAUSS_ARCHDIR=/opt/g03b05/g03/arch,
GAUSS_EXEDIR=/opt/g03b05/g03/bsd:/opt/g03b05/g03/private:/opt/g03b05/g
03,GAUSS_SCR_ROOT=/home/561/345561/scratch,
GDVBASIS=/opt/g03b05/g03/basis,
GMAIN=/opt/g03b05/g03/bsd:/opt/g03b05/g03/private:/opt/g03b05/g03,
GROUP=e12,GV_DIR=/opt/g03b05/gv,HOST=sc1,
HOSTTYPE=alpha,INFOPATH=/opt/info,KMP_DUPLICATE_LIB_OK=TRUE,
KMP_STACKSIZE=10485760,
LINDA_FORTRAN=f90 -i8 -r8 -omp -reentrancy threaded,
LINDA_FORTRAN_LINK=f90 -i8 -r8 -omp -reentrancy threaded,
LOGNAME=345561,MACHTYPE=alpha,MAIL=/var/spool/mail/345561,
MANPATH=/opt/g03b05/g03/bsd:/usr/share/man:/usr/dt/share/man:/usr/loca
l/man:/opt/man:/opt/pbs/man:/opt/rash/man:/usr/opt/mpi/man:/usr/opt/mpi
/man:/usr/opt/mpi/man,MP_STACK_OVERFLOW=OFF,
NLSPATH=/usr/lib/nls/msg/%L/%N,OMP_NUM_THREADS=4,ONEEXE=-DONEEXE,
OSTYPE=osf1,
PERLLIB=/opt/g03b05/g03/bsd,PGI=/usr/pgi,PGIDIR=/usr/pgi/linux86/5.0,
POSTFL_FORTRAN=f90 -i8 -r8 -omp -reentrancy threaded,PROJECT=e12,
QCAUX=/opt/qchem-2.02/aux,QCPLATFORM=DEC_ALPHA,
RMS_PROJECT=e12,RUNCPP=/lib/cpp,SHELL=/opt/rash/bin/tcsh,SHLVL=1,
QUEUE=normal
"""
from pyparsing import printables, Word, Optional, Literal, Group, Dict, \
delimitedList, Combine, OneOrMore, alphanums
# definition of key
key = Word(alphanums+"_")
# definition of value
_noncommachars = "".join( [ c for c in printables if c not in r"\," ] )
_escChar = Word("\\",printables,exact=2)
# add this parse action to "unescape" commas
_escChar.setParseAction( lambda s,l,t: [t[0][1]] )
value = Combine( OneOrMore( _escChar | Word(_noncommachars) ),
adjacent=False )
# add parse action to remove whitespace
collapseWhitespace = lambda s,l,t: [ "".join(t[0].split()) ]
value.setParseAction( collapseWhitespace )
# create overall definition, using Dict element to create a dictionary
# result structure
envVarDef = Dict(delimitedList(
Group(key + Literal("=").suppress() + value)))
# parse input, and access returned results as a dictionary, or as attributes
# on an object if the key name is valid as an attribute name
envVars = envVarDef.parseString( search_string )
print "GAUSS_EXEDIR:", envVars["GAUSS_EXEDIR"]
print "GAUSS_EXEDIR:", envVars.GAUSS_EXEDIR
for k in envVars.keys():
print k+":", envVars[k]
More information about the Python-list
mailing list