Regular expression fun. Repeated matching of a group Q

Paul McGuire ptmcg at austin.rr.com
Fri Feb 24 16:06:37 EST 2006


Here's a (surprise!) pyparsing solution.  -- Paul
(Get pyparsing at http://pyparsing.sourceforge.net.)

data = [
"""<td>04/01/2006</td><td>Wednesday</td><td> </td><td>09:14</td><td>12:44</td><td>12:50</td><td>17:58</td><td> </td><td> </td><td> </td><td> </td><td>08:14</td>""",
"""<td>03/01/2006</td><td>Tuesday</td><td>Annual_Holiday</td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td><td>08:00</td>"""
]

from pyparsing import *

startTD,endTD = makeHTMLTags("TD")
startTD = startTD.suppress()
endTD = endTD.suppress()
dayOfWeek = oneOf("Sunday Monday Tuesday Wednesday Thursday Friday
Saturday")
nbsp = Literal(" ")
time = Combine(Word(nums,exact=2) + ":" + Word(nums,exact=2))
date = Combine(Word(nums,exact=2) + "/" + Word(nums,exact=2) + "/" +
Word(nums,exact=4))

entry = ( startTD + date.setResultsName("date") + endTD +
          startTD + dayOfWeek.setResultsName("dayOfWeek") + endTD +
          startTD + ( Suppress(nbsp) |
Word(alphanums+"_").setResultsName("name") ) + endTD +
          OneOrMore(startTD + (Suppress(nbsp) | time) + endTD
).setResultsName("dates")
          )

for d in data:
    res = entry.parseString(d)
    print res.date
    print res.dayOfWeek
    print res.name
    print res.dates
    print


Returns:

04/01/2006
Wednesday

['09:14', '12:44', '12:50', '17:58', '08:14']

03/01/2006
Tuesday
Annual_Holiday
['08:00']




More information about the Python-list mailing list