parsing tree from excel sheet

Peter Otten __peter__ at web.de
Wed Jan 28 09:08:00 EST 2015


alb wrote:

> Hi everyone,
> 
> I've a document structure which is extremely simple and represented on a
> spreadsheet in the following way (a made up example):
> 
> subsystem | chapter | section | subsection | subsubsec |
>     A     |         |         |            |           |
>           | func0   |         |            |           |
>           |         |interface|            |           |
>           |         |latency  |            |           |
>           |         |priority |            |           |
>           | func1   |         |            |           |
>           |         |interface|            |           |
>           |         |latency  |            |           |
>           |         |priority |            |           |
>           |         |depend   |            |           |
>           |         |         | variables  |           |
>           |         |         |            | static    |
>           |         |         |            | global    |
>           |         |         | functions  |           |
>           |         |         |            | internal  |
>           |         |         |            | external  |
> 
> And I'd like to get a tree like this:
> 
>     A
>     +-------> func0
>     |           +---> interface
>     |           +---> latency
>     |           \---> priority
>     \-------> func1
>                 +---> interface
>                 +---> latency
>                 +---> priority
>                 \---> depend
>                          +---> variables
>                          |         +---> static
>                          |         \---> local
>                          \---> functions
>                                    +---> internal
>                                    \---> external
> 
> I know about the xlrd module to get data from excel and I'm also aware
> about the ETE toolkit (which is more specific for bioinformatics, but I
> guess can suitable fill the need).
> 
> Does anyone recommend any other path other than scripting through these
> two modules?
> 
> Is there any more suitable module/example/project out there that would
> achieve the same result?
> 
> The reason for parsing is because the need behind is to create documents
> edited in excel but typeset in LaTeX, therefore my script will spill out
> \chapter, \section and so forth based on the tree structure.
> 
> Every node will have some text and some images with a very light markup
> like mediawiki that I can easily convert into latex.
> 
> Hope I've not been too confusing.
> Thanks for any pointer/suggestion/comment.
> 
> Al
> 
> p.s.: I'm not extremely proficient in python, actually I'm just starting
> with it!

You can save the excel sheet as csv so that you an use the csv module which 
may be easier to use than xlrd. The rest should be doable by hand. Here's 
what I hacked together:

$ cat parse_column_tree.py
import csv

def column_index(row):
    for result, cell in enumerate(row, 0):
        if cell:
            return result
    raise ValueError


class Node:
    def __init__(self, name, level):
        self.name = name
        self.level = level
        self.children = []

    def append(self, child):
        self.children.append(child)

    def __str__(self):
        return "\%s{%s}" % (self.level, self.name)

    def show(self):
        yield [self.name]
        for i, child in enumerate(self.children):
            lastchild = i == len(self.children)-1
            first = True
            for c in child.show():
                if first:
                    yield ["\---> " if lastchild else "+---> "] + c
                    first = False
                else:
                    yield ["      " if lastchild else "|     "] + c
    def show2(self):
        yield str(self)
        for child in self.children:
            yield from child.show2()

def show(root):
    for row in root.show():
        print("".join(row))

def show2(root):
    for line in root.show2():
        print(line)

def read_tree(rows, levelnames):
    root = Node("#ROOT", "#ROOT")
    old_level = 0
    stack = [root]
    for i, row in enumerate(rows, 1):

        new_level = column_index(row)
        node = Node(row[new_level], levelnames[new_level])

        if new_level == old_level:
            stack[-1].append(node)
        elif new_level > old_level:
            if new_level - old_level != 1:
                raise ValueError

            stack.append(stack[-1].children[-1])
            stack[-1].append(node)
            old_level = new_level
        else:
            while new_level < old_level:
                stack.pop(-1)
                old_level -= 1
            stack[-1].append(node)
    return root

def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("infile")
    parser.add_argument("--latex", action="store_true")

    args = parser.parse_args()

    with open(args.infile) as f:
        rows = csv.reader(f)
        levelnames = next(rows) # skip header
        tree = read_tree(rows, levelnames)

        show_tree = show2 if args.latex else show
        for node in tree.children:
            show_tree(node)
            print("")

if __name__ == "__main__":
    main()
$ cat data.csv
subsystem,chapter,section,subsection,subsubsec,
A,,,,,
,func0,,,,
,,interface,,,
,,latency,,,
,,priority,,,
,func1,,,,
,,interface,,,
,,latency,,,
,,priority,,,
,,depend,,,
,,,variables,,
,,,,static,
,,,,global,
,,,functions,,
,,,,internal,
,,,,external,
$ python3 parse_column_tree.py data.csv
A
+---> func0
|     +---> interface
|     +---> latency
|     \---> priority
\---> func1
      +---> interface
      +---> latency
      +---> priority
      \---> depend
            +---> variables
            |     +---> static
            |     \---> global
            \---> functions
                  +---> internal
                  \---> external

$ python3 parse_column_tree.py data.csv --latex
\subsystem{A}
\chapter{func0}
\section{interface}
\section{latency}
\section{priority}
\chapter{func1}
\section{interface}
\section{latency}
\section{priority}
\section{depend}
\subsection{variables}
\subsubsec{static}
\subsubsec{global}
\subsection{functions}
\subsubsec{internal}
\subsubsec{external}









More information about the Python-list mailing list