[Python-checkins] CVS: distutils/distutils util.py,1.36,1.37

Greg Ward python-dev@python.org
Sat, 24 Jun 2000 13:40:05 -0700


Update of /cvsroot/python/distutils/distutils
In directory slayer.i.sourceforge.net:/tmp/cvs-serv28287

Modified Files:
	util.py 
Log Message:
Added 'split_quoted()' function to deal with strings that are quoted in 
Unix shell-like syntax (eg. in Python's Makefile, for one thing -- now that
I have this function, I'll probably allow quoted strings in config files too.

Index: util.py
===================================================================
RCS file: /cvsroot/python/distutils/distutils/util.py,v
retrieving revision 1.36
retrieving revision 1.37
diff -C2 -r1.36 -r1.37
*** util.py	2000/06/18 15:45:55	1.36
--- util.py	2000/06/24 20:40:02	1.37
***************
*** 167,168 ****
--- 167,235 ----
  
      return error
+ 
+ 
+ # Needed by 'split_quoted()'
+ _wordchars_re = re.compile(r'[^\\\'\"\ ]*')
+ _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
+ _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
+ 
+ def split_quoted (s):
+     """Split a string up according to Unix shell-like rules for quotes and
+     backslashes.  In short: words are delimited by spaces, as long as those
+     spaces are not escaped by a backslash, or inside a quoted string.
+     Single and double quotes are equivalent, and the quote characters can
+     be backslash-escaped.  The backslash is stripped from any two-character
+     escape sequence, leaving only the escaped character.  The quote
+     characters are stripped from any quoted string.  Returns a list of
+     words.
+     """
+ 
+     # This is a nice algorithm for splitting up a single string, since it
+     # doesn't require character-by-character examination.  It was a little
+     # bit of a brain-bender to get it working right, though...
+ 
+     s = string.strip(s)
+     words = []
+     pos = 0
+ 
+     while s:
+         m = _wordchars_re.match(s, pos)
+         end = m.end()
+         if end == len(s):
+             words.append(s[:end])
+             break
+ 
+         if s[end] == ' ':               # unescaped, unquoted space: now
+             words.append(s[:end])       # we definitely have a word delimiter
+             s = string.lstrip(s[end:])
+             pos = 0
+ 
+         elif s[end] == '\\':            # preserve whatever is being escaped;
+                                         # will become part of the current word
+             s = s[:end] + s[end+1:]
+             pos = end+1
+ 
+         else:
+             if s[end] == "'":           # slurp singly-quoted string
+                 m = _squote_re.match(s, end)
+             elif s[end] == '"':         # slurp doubly-quoted string
+                 m = _dquote_re.match(s, end)
+             else:
+                 raise RuntimeError, \
+                       "this can't happen (bad char '%c')" % s[end]
+ 
+             if m is None:
+                 raise ValueError, \
+                       "bad string (mismatched %s quotes?)" % s[end]
+ 
+             (beg, end) = m.span()
+             s = s[:beg] + s[beg+1:end-1] + s[end:]
+             pos = m.end() - 2
+ 
+         if pos >= len(s):
+             words.append(s)
+             break
+ 
+     return words
+ 
+ # split_quoted ()