Data Manipulation - Rows to Columns

bearophileHUGS at lycos.com bearophileHUGS at lycos.com
Wed Feb 6 09:42:38 EST 2008


This is smells of homework. Here are few alternative solutions of mine
that I don't like. I presume a good teacher will refuse them all,
because no one of them uses the right tool :-) And every one of them
has some small problem (even if they work here).

data = """\
<item>TABLE</table>
<color>black</color>
<color>blue</color>
<color>red</color>
<item>CHAIR</table>
<color>yellow</color>
<color>black</color>
<color>red</color>
<item>SOFA</table>
<color>white</color>
<color>gray</color>
<color>pink</color>
"""

data2 = data.replace("<color>","").replace("</color>","").replace("</
table>","")
groups = [b.split() for b in data2.split("<item>") if b]
print groups
print


import re
data2 = re.sub(r"<color>|</color>|</table>", "", data)
groups = [b.split() for b in data2.split("<item>") if b]
print groups
print


import re
def splitter(data):
    patt = re.compile(r"(?:<item>(.*)</table>)|(?:<color>(.*)</
color>)")
    parts = []
    for mo in patt.finditer(data):
        p1, p2 = mo.groups()
        if p1 is None:
            parts.append(p2)
        else:
            if parts:
                yield parts
            parts = [p1]
    if parts:
        yield parts
print list(splitter(data))
print


def splitter2(items, predicate):
    parts = []
    for el in items:
        if predicate(el):
            parts.append(el)
        else:
            if parts:
                yield parts
            parts = [el]
    if parts:
        yield parts
import re
patt = re.compile(r"(?:<item>(.*)</table>)|(?:<color>(.*)</color>)")
xmobjects = (mo.groups() for mo in patt.finditer(data))
process = lambda group: [group[0][0]] + [part[1] for part in
group[1:]]
isstart = lambda (p1,p2): p1 is None
xgroups = (process(g) for g in splitter2(xmobjects, isstart))
print list(xgroups)
print



data2 = """
<item>TABLE</table>
<color>black</color>
 <color>blue< / color>
<color>red</color>
<item>CHAIR</table>
<color>yellow</color>

<color>black</color>
 <color>red</color>
<item>SOFA</table>
<color>white</color>
<color>gray</color>
 < color > pink < / color >
"""

import re
patt = re.compile(r"""
                  \s*  < \s* (item|color) \s* >  \s*
                  (.*)
                  \s*  < \s* / \s*  (?:table|color)  \s* >  \s*
                  """, re.VERBOSE)
groups = []
for mo in patt.finditer(data2):
    p1, p2 = mo.groups()
    if p1 == "item":
        groups.append([p2])
    else:
        groups[-1].append(p2)
print groups
print

Bye,
bearophile



More information about the Python-list mailing list