Write tables from Word (.docx) to Excel (.xlsx) using xlsxwriter

Peter Otten __peter__ at web.de
Wed May 27 13:36:02 EDT 2020


BBT wrote:

> I am trying to parse a word (.docx) for tables, then copy these tables
> over to excel using xlsxwriter. This is my code:
> 
> from docx.api import Document
> import xlsxwriter
>  
> document = Document('/Users/xxx/Documents/xxx/Clauses Sample - Copy v1 -
> for merge.docx') tables = document.tables
>  
> wb = xlsxwriter.Workbook('C:/Users/xxx/Documents/xxx/test clause
> retrieval.xlsx') Sheet1 = wb.add_worksheet("Compliance")
> index_row = 0
>  
> print(len(tables))
>  
> for table in document.tables:
> data = []
> keys = None
> for i, row in enumerate(table.rows):
>     text = (cell.text for cell in row.cells)
>  
>     if i == 0:
>         keys = tuple(text)
>         continue
>     row_data = dict(zip(keys, text))
>     data.append(row_data)
>     #print (data)
>     #big_data.append(data)
>     Sheet1.write(index_row,0, str(row_data))
>     index_row = index_row + 1
>  
> print(row_data)
>  
> wb.close()
> 
> 
> This is my desired output: https://i.stack.imgur.com/9qnbw.png
> 
> However, here is my actual output: https://i.stack.imgur.com/vpXej.png
> 
> I am aware that my current output produces a list of string instead.
> 
> Is there anyway that I can get my desired output using xlsxwriter?

I had to simulate docx.api. With that caveat the following seems to work:

import xlsxwriter
 
# begin simulation of
# from docx.api import Document

class Cell:
    def __init__(self, text):
        self.text = text

class Row:
    def __init__(self, cells):
        self.cells = [Cell(c) for c in cells]

class Table:
    def __init__(self, data):
        self.rows = [
            Row(row) for row in data
        ]

class Document:
    def __init__(self):
        self.tables = [
            Table([
                ["Hello", "Test"],
                ["est", "ing"],
                ["gg", "ff"]
            ]),
            Table([
                ["Foo", "Bar", "Baz"],
                ["ham", "spam", "jam"]
            ])
        ]

document = Document()

# end simulation

wb = xlsxwriter.Workbook("tmp.xlsx")
sheet = wb.add_worksheet("Compliance")
 
offset = 0
for table in document.tables:
    for y, row in enumerate(table.rows):
        for x, cell in enumerate(row.cells):
            sheet.write(y + offset, x, cell.text)
    offset +=  len(table.rows) + 1  # one empty row between tables

wb.close()




More information about the Python-list mailing list