Extracting parts of string between anchor points
Tim Chase
python.list at tim.thechases.com
Thu Feb 27 17:01:51 EST 2014
On 2014-02-27 15:45, Tim Chase wrote:
> >>> r = re.compile(r"^([^:]*)(?::((?:(?!-:-).)*)(?:-:-(.*))?)?")
If you want to compare both the re method and the string method,
here's a test-harness to play with:
import re
examples = [
("", (None, None, None)),
("Test1A", ("Test1A", None, None)),
("Test2A: Test2B", ("Test2A", "Test2B", None)),
("Test3A: Test3B -:- Test3C", ("Test3A", "Test3B", "Test3C")),
("Test4A -:- Test4B", None),
("Test5A : Test5B : Test5C -:- Test5D", None),
("Test6A : Test6B -:- Test6C -:- Test6D", None),
]
splitter_re = re.compile(r"^([^:]*)(?::((?:(?!-:-).)*)(?:-:-(.*))?)?")
def clean(t):
return [
s.strip() if s else None
for s in t
]
def splitter1(s):
"using regexp"
m = splitter_re.match(s)
if m:
return tuple(clean(m.groups()))
else:
return (None, None, None)
def splitter2(s):
"using string methods"
out1 = out2 = out3 = None
if ":" in s:
if "-:-" in s:
left, _, out3 = clean(s.partition("-:-"))
if ":" in left:
out1, _, out2 = clean(left.partition(":"))
else:
out1 = left
else:
out1, _, out2 = clean(s.partition(":"))
else:
if s:
out1 = s
return (out1, out2, out3)
for method in (splitter1, splitter2):
print("")
print(method.__doc__)
print("=" * len(method.__doc__))
for s, expected in examples:
result = method(s)
if expected is not None:
if result != expected:
print("FAIL: %r got %r, not %r" % (s, result, expected))
else:
print("PASS: %r got %r" % (s, result))
else:
print("UNKN: %r got %r" % (s, result))
Note the differences in Test4.
-tkc
More information about the Python-list
mailing list