Extracting/finding strings from a list

Paul McGuire ptmcg at austin.rr._bogus_.com
Mon Jul 19 12:29:26 EDT 2004


"Steve" <nospam at nopes> wrote in message
news:40fb13f7$1 at clarion.carno.net.au...
> Hi,
>
> I have a very long string, someting like:
>
>          DISPLAY=localhost:0.0,FORT_BUFFERED=true,
>
> F_ERROPT1=271\,271\,2\,1\,2\,2\,2\,2,G03BASIS=/opt/g03b05/g03/basis,
>          GAMESS=/opt/gamess,GAUSS_ARCHDIR=/opt/g03b05/g03/arch,
>
> GAUSS_EXEDIR=/opt/g03b05/g03/bsd:/opt/g03b05/g03/private:/opt/g03b05/g
>          03,GAUSS_SCR_ROOT=/home/561/345561/scratch,
>          GDVBASIS=/opt/g03b05/g03/basis,
>
GMAIN=/opt/g03b05/g03/bsd:/opt/g03b05/g03/private:/opt/g03b05/g03,
>          GROUP=e12,GV_DIR=/opt/g03b05/gv,HOST=sc1,
>          HOSTTYPE=alpha,INFOPATH=/opt/info,KMP_DUPLICATE_LIB_OK=TRUE,
>          KMP_STACKSIZE=10485760,
>          LINDA_FORTRAN=f90 -i8 -r8 -omp -reentrancy threaded,
>          LINDA_FORTRAN_LINK=f90 -i8 -r8 -omp -reentrancy threaded,
>          LOGNAME=345561,MACHTYPE=alpha,MAIL=/var/spool/mail/345561,
>
> MANPATH=/opt/g03b05/g03/bsd:/usr/share/man:/usr/dt/share/man:/usr/loca
>
> l/man:/opt/man:/opt/pbs/man:/opt/rash/man:/usr/opt/mpi/man:/usr/opt/mpi
>          /man:/usr/opt/mpi/man,MP_STACK_OVERFLOW=OFF,
>          NLSPATH=/usr/lib/nls/msg/%L/%N,OMP_NUM_THREADS=4,ONEEXE=-DONEEXE,
>          OSTYPE=osf1,
>
> PERLLIB=/opt/g03b05/g03/bsd,PGI=/usr/pgi,PGIDIR=/usr/pgi/linux86/5.0,
>          POSTFL_FORTRAN=f90 -i8 -r8 -omp -reentrancy threaded,PROJECT=e12,
>          QCAUX=/opt/qchem-2.02/aux,QCPLATFORM=DEC_ALPHA,
>          RMS_PROJECT=e12,RUNCPP=/lib/cpp,SHELL=/opt/rash/bin/tcsh,SHLVL=1,
>          QUEUE=normal
>
>
> and I need to extract the value of the variable "GAUSS_EXEDIR". Although
> these are environment variables, I don't have access to them directly.
> These variables are stored in a special file and I need to parse it to
> be able to extract the variable. I wrote the following code for this:
>
>                  ...
>                  data = {'gauss_var' : ''}
>                  allLines = os.popen('cat ./somefile').readlines()
>                  j = 0
>                  # Extract GAUSS_EXEDIR from the string
>                  gaussCont = 0
>                  variable_list = value.split(",")
>                  vars = variable_list[0].split("=")
>                  while len(allLines) > j and vars[0] != "QUEUE":
>                      var_line = allLines[j]
>                      var_toks = split(var_line)
>                      value = var_toks[0]
>                      variable_list = value.split(",")
>                      for k in range(len(pvariable_list)):
>                          if variable_list[k].find("=") == -1:
>                              if gaussCont == 1:
>                                  data['guass_var']="%s%s"
> %(data['gauss_var'],variable_list[k])
>                                  gaussCont = -1
>                                  break
>                              # end if
>                          # end if
>
>                          vars = variable_list[k].split("=")
>                          for m in range(len(vars)):
>                              if vars[m] == "GAUSS_EXEDIR":
>                                  for p in range(1, len(vars)):
>                                      data['gauss_var']="%s%s" %
> (data['gauss_var'], vars[p])
>                                  # end for
>                                  gaussCont = 1
>                              # end if
>                          # end for
>                      # end for
>                      j += 1
>                      if gaussCont == -1:
>                          break
>                      # end if
>                  # end while
>
>
> The reason why I look for the word "QUEUE" is because "QUEUE" is the
> last variable expected in a list. After this line, the list continues
> but the variables belong to another use (so basically it's a big file
> full of env variables that belong to different users. Each list begins
> with "DISPLAY" and ends with "QUEUE").
>
> Is there a much simpler way of doing this? That is, extracting/finding
> specific variables/value pairs from a list/string? These loops take up a
> lot of my time and I'm trying to learn better ways of doing the same.
> Thanks!
>
>
> Steve
>
Here is a pyparsing implementation. -- Paul
(download pyparsing at http://pyparsing.sourceforge.net )

search_string = """
         DISPLAY=localhost:0.0,FORT_BUFFERED=true,

F_ERROPT1=271\,271\,2\,1\,2\,2\,2\,2,G03BASIS=/opt/g03b05/g03/basis,
         GAMESS=/opt/gamess,GAUSS_ARCHDIR=/opt/g03b05/g03/arch,

GAUSS_EXEDIR=/opt/g03b05/g03/bsd:/opt/g03b05/g03/private:/opt/g03b05/g
         03,GAUSS_SCR_ROOT=/home/561/345561/scratch,
         GDVBASIS=/opt/g03b05/g03/basis,
         GMAIN=/opt/g03b05/g03/bsd:/opt/g03b05/g03/private:/opt/g03b05/g03,
         GROUP=e12,GV_DIR=/opt/g03b05/gv,HOST=sc1,
         HOSTTYPE=alpha,INFOPATH=/opt/info,KMP_DUPLICATE_LIB_OK=TRUE,
         KMP_STACKSIZE=10485760,
         LINDA_FORTRAN=f90 -i8 -r8 -omp -reentrancy threaded,
         LINDA_FORTRAN_LINK=f90 -i8 -r8 -omp -reentrancy threaded,
         LOGNAME=345561,MACHTYPE=alpha,MAIL=/var/spool/mail/345561,

MANPATH=/opt/g03b05/g03/bsd:/usr/share/man:/usr/dt/share/man:/usr/loca

l/man:/opt/man:/opt/pbs/man:/opt/rash/man:/usr/opt/mpi/man:/usr/opt/mpi
         /man:/usr/opt/mpi/man,MP_STACK_OVERFLOW=OFF,
         NLSPATH=/usr/lib/nls/msg/%L/%N,OMP_NUM_THREADS=4,ONEEXE=-DONEEXE,
         OSTYPE=osf1,

PERLLIB=/opt/g03b05/g03/bsd,PGI=/usr/pgi,PGIDIR=/usr/pgi/linux86/5.0,
         POSTFL_FORTRAN=f90 -i8 -r8 -omp -reentrancy threaded,PROJECT=e12,
         QCAUX=/opt/qchem-2.02/aux,QCPLATFORM=DEC_ALPHA,
         RMS_PROJECT=e12,RUNCPP=/lib/cpp,SHELL=/opt/rash/bin/tcsh,SHLVL=1,
         QUEUE=normal
"""

from pyparsing import printables, Word, Optional, Literal, Group, Dict, \
     delimitedList, Combine, OneOrMore, alphanums

# definition of key
key = Word(alphanums+"_")

# definition of value
_noncommachars = "".join( [ c for c in printables if c not in r"\," ] )
_escChar = Word("\\",printables,exact=2)
# add this parse action to "unescape" commas
_escChar.setParseAction( lambda s,l,t: [t[0][1]] )
value = Combine( OneOrMore( _escChar | Word(_noncommachars) ),
                 adjacent=False )

# add parse action to remove whitespace
collapseWhitespace = lambda s,l,t: [ "".join(t[0].split()) ]
value.setParseAction( collapseWhitespace )

# create overall definition, using Dict element to create a dictionary
# result structure
envVarDef = Dict(delimitedList(
                Group(key + Literal("=").suppress() + value)))

# parse input, and access returned results as a dictionary, or as attributes
# on an object if the key name is valid as an attribute name
envVars = envVarDef.parseString( search_string )

print "GAUSS_EXEDIR:", envVars["GAUSS_EXEDIR"]
print "GAUSS_EXEDIR:", envVars.GAUSS_EXEDIR
for k in envVars.keys():
    print k+":", envVars[k]





More information about the Python-list mailing list