[Python-checkins] python/dist/src/Lib shlex.py,1.17,1.18

niemeyer@users.sourceforge.net niemeyer@users.sourceforge.net
Thu, 17 Apr 2003 14:32:04 -0700


Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1:/tmp/cvs-serv1866/Lib

Modified Files:
	shlex.py 
Log Message:
Implemented posix-mode parsing support in shlex.py, as dicussed in
mailing list, and in patch #722686.


Index: shlex.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/shlex.py,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -d -r1.17 -r1.18
*** shlex.py	2 Jun 2002 00:40:05 -0000	1.17
--- shlex.py	17 Apr 2003 21:31:29 -0000	1.18
***************
*** 1,2 ****
--- 1,3 ----
+ # -*- coding: iso-8859-1 -*-
  """A lexical analyzer class for simple shell-like syntaxes."""
  
***************
*** 4,16 ****
  # Input stacking and error message cleanup added by ESR, March 2000
  # push_source() and pop_source() made explicit by ESR, January 2001.
  
  import os.path
  import sys
  
! __all__ = ["shlex"]
  
  class shlex:
      "A lexical analyzer class for simple shell-like syntaxes."
!     def __init__(self, instream=None, infile=None):
          if instream is not None:
              self.instream = instream
--- 5,28 ----
  # Input stacking and error message cleanup added by ESR, March 2000
  # push_source() and pop_source() made explicit by ESR, January 2001.
+ # Posix compliance, split(), string arguments, and
+ # iterator interface by Gustavo Niemeyer, April 2003.
  
  import os.path
  import sys
  
! from types import StringTypes
! 
! try:
!     from cStringIO import StringIO
! except ImportError:
!     from StringIO import StringIO
! 
! __all__ = ["shlex", "split"]
  
  class shlex:
      "A lexical analyzer class for simple shell-like syntaxes."
!     def __init__(self, instream=None, infile=None, posix=0):
!         if type(instream) in StringTypes:
!             instream = StringIO(instream)
          if instream is not None:
              self.instream = instream
***************
*** 19,27 ****
--- 31,50 ----
              self.instream = sys.stdin
              self.infile = None
+         self.posix = posix
+         if posix:
+             self.eof = None
+         else:
+             self.eof = ''
          self.commenters = '#'
          self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
                            'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
+         if self.posix:
+             self.wordchars += ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
+                                'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
          self.whitespace = ' \t\r\n'
+         self.whitespace_split = 0
          self.quotes = '\'"'
+         self.escape = '\\'
+         self.escapedquotes = '"'
          self.state = ' '
          self.pushback = []
***************
*** 43,46 ****
--- 66,71 ----
      def push_source(self, newstream, newfile=None):
          "Push an input source onto the lexer's input source stack."
+         if type(newstream) in StringTypes:
+             newstream = StringIO(newstream)
          self.filestack.insert(0, (self.infile, self.instream, self.lineno))
          self.infile = newfile
***************
*** 74,93 ****
          raw = self.read_token()
          # Handle inclusions
!         while raw == self.source:
!             spec = self.sourcehook(self.read_token())
!             if spec:
!                 (newfile, newstream) = spec
!                 self.push_source(newstream, newfile)
!             raw = self.get_token()
          # Maybe we got EOF instead?
!         while raw == "":
              if len(self.filestack) == 0:
!                 return ""
              else:
                  self.pop_source()
                  raw = self.get_token()
!          # Neither inclusion nor EOF
          if self.debug >= 1:
!             if raw:
                  print "shlex: token=" + `raw`
              else:
--- 99,119 ----
          raw = self.read_token()
          # Handle inclusions
!         if self.source is not None:
!             while raw == self.source:
!                 spec = self.sourcehook(self.read_token())
!                 if spec:
!                     (newfile, newstream) = spec
!                     self.push_source(newstream, newfile)
!                 raw = self.get_token()
          # Maybe we got EOF instead?
!         while raw == self.eof:
              if len(self.filestack) == 0:
!                 return self.eof
              else:
                  self.pop_source()
                  raw = self.get_token()
!         # Neither inclusion nor EOF
          if self.debug >= 1:
!             if raw != self.eof:
                  print "shlex: token=" + `raw`
              else:
***************
*** 96,100 ****
  
      def read_token(self):
!         "Read a token from the input stream (no pushback or inclusions)"
          while 1:
              nextchar = self.instream.read(1)
--- 122,127 ----
  
      def read_token(self):
!         quoted = 0
!         escapedstate = ' '
          while 1:
              nextchar = self.instream.read(1)
***************
*** 114,118 ****
                      if self.debug >= 2:
                          print "shlex: I see whitespace in whitespace state"
!                     if self.token:
                          break   # emit current token
                      else:
--- 141,145 ----
                      if self.debug >= 2:
                          print "shlex: I see whitespace in whitespace state"
!                     if self.token or (self.posix and quoted):
                          break   # emit current token
                      else:
***************
*** 121,146 ****
                      self.instream.readline()
                      self.lineno = self.lineno + 1
                  elif nextchar in self.wordchars:
                      self.token = nextchar
                      self.state = 'a'
                  elif nextchar in self.quotes:
!                     self.token = nextchar
                      self.state = nextchar
                  else:
                      self.token = nextchar
!                     if self.token:
                          break   # emit current token
                      else:
                          continue
              elif self.state in self.quotes:
!                 self.token = self.token + nextchar
!                 if nextchar == self.state:
!                     self.state = ' '
!                     break
!                 elif not nextchar:      # end of file
                      if self.debug >= 2:
                          print "shlex: I see EOF in quotes state"
                      # XXX what error should be raised here?
                      raise ValueError, "No closing quotation"
              elif self.state == 'a':
                  if not nextchar:
--- 148,203 ----
                      self.instream.readline()
                      self.lineno = self.lineno + 1
+                 elif self.posix and nextchar in self.escape:
+                     escapedstate = 'a'
+                     self.state = nextchar
                  elif nextchar in self.wordchars:
                      self.token = nextchar
                      self.state = 'a'
                  elif nextchar in self.quotes:
!                     if not self.posix:
!                         self.token = nextchar
                      self.state = nextchar
+                 elif self.whitespace_split:
+                     self.token = nextchar
+                     self.state = 'a'
                  else:
                      self.token = nextchar
!                     if self.token or (self.posix and quoted):
                          break   # emit current token
                      else:
                          continue
              elif self.state in self.quotes:
!                 quoted = 1
!                 if not nextchar:      # end of file
                      if self.debug >= 2:
                          print "shlex: I see EOF in quotes state"
                      # XXX what error should be raised here?
                      raise ValueError, "No closing quotation"
+                 if nextchar == self.state:
+                     if not self.posix:
+                         self.token = self.token + nextchar
+                         self.state = ' '
+                         break
+                     else:
+                         self.state = 'a'
+                 elif self.posix and nextchar in self.escape and \
+                      self.state in self.escapedquotes:
+                     escapedstate = self.state
+                     self.state = nextchar
+                 else:
+                     self.token = self.token + nextchar
+             elif self.state in self.escape:
+                 if not nextchar:      # end of file
+                     if self.debug >= 2:
+                         print "shlex: I see EOF in escape state"
+                     # XXX what error should be raised here?
+                     raise ValueError, "No escaped character"
+                 # In posix shells, only the quote itself or the escape
+                 # character may be escaped within quotes.
+                 if escapedstate in self.quotes and \
+                    nextchar != self.state and nextchar != escapedstate:
+                     self.token = self.token + self.state
+                 self.token = self.token + nextchar
+                 self.state = escapedstate
              elif self.state == 'a':
                  if not nextchar:
***************
*** 151,155 ****
                          print "shlex: I see whitespace in word state"
                      self.state = ' '
!                     if self.token:
                          break   # emit current token
                      else:
--- 208,212 ----
                          print "shlex: I see whitespace in word state"
                      self.state = ' '
!                     if self.token or (self.posix and quoted):
                          break   # emit current token
                      else:
***************
*** 158,162 ****
                      self.instream.readline()
                      self.lineno = self.lineno + 1
!                 elif nextchar in self.wordchars or nextchar in self.quotes:
                      self.token = self.token + nextchar
                  else:
--- 215,231 ----
                      self.instream.readline()
                      self.lineno = self.lineno + 1
!                     if self.posix:
!                         self.state = ' '
!                         if self.token or (self.posix and quoted):
!                             break   # emit current token
!                         else:
!                             continue
!                 elif self.posix and nextchar in self.quotes:
!                     self.state = nextchar
!                 elif self.posix and nextchar in self.escape:
!                     escapedstate = 'a'
!                     self.state = nextchar
!                 elif nextchar in self.wordchars or nextchar in self.quotes \
!                     or self.whitespace_split:
                      self.token = self.token + nextchar
                  else:
***************
*** 171,174 ****
--- 240,245 ----
          result = self.token
          self.token = ''
+         if self.posix and not quoted and result == '':
+             result = None
          if self.debug > 1:
              if result:
***************
*** 183,187 ****
              newfile = newfile[1:-1]
          # This implements cpp-like semantics for relative-path inclusion.
!         if type(self.infile) == type("") and not os.path.isabs(newfile):
              newfile = os.path.join(os.path.dirname(self.infile), newfile)
          return (newfile, open(newfile, "r"))
--- 254,258 ----
              newfile = newfile[1:-1]
          # This implements cpp-like semantics for relative-path inclusion.
!         if type(self.infile) in StringTypes and not os.path.isabs(newfile):
              newfile = os.path.join(os.path.dirname(self.infile), newfile)
          return (newfile, open(newfile, "r"))
***************
*** 195,198 ****
--- 266,282 ----
          return "\"%s\", line %d: " % (infile, lineno)
  
+     def __iter__(self):
+         return self
+ 
+     def next(self):
+         token = self.get_token()
+         if token == self.eof:
+             raise StopIteration
+         return token
+ 
+ def split(s, posix=1, spaces=1):
+     lex = shlex(s, posix=posix)
+     lex.whitespace_split = spaces
+     return list(lex)
  
  if __name__ == '__main__':