Win32 and Excel ranges

Mike Brenner mikeb at mitre.org
Fri Oct 18 15:03:51 EDT 2002


Larry Whitley <ldwhitley at removeme.charter.net> wrote:
> I've been going through Mark Hammond's book doing 
> his Python/Excel examples. It's working nicely but 
> I'm dissapointed with the performance of storing a
> lot of data in the spread sheet cell by cell.

It would be nice to access the spreadsheet a cell at a time,
because that is our primary paradigms of spreadsheets, but
the current COM implementation in Python makes that
prohibitively expensive.

One would have thought that it would have the same 
cost for Visual Basic and for Python, but Python 
has about the same cost for extracting one 
number from an Excel spreadsheet as it does for 
extracting 50,000 numbers, so you might as well 
take the whole spreadsheet at one time and do 
all the processing in Python.

It is worse for extracting colors than than for 
extracting numbers because ranges can't be used. 

To extract the colors, I had python generate the following Visual
Basic macro to place the colors (as numbers) into the cells,
100 columns to the right of the spreadsheet itself. 

After running this macro, a single Python call can
extract the colors from those second hundred columns.
(I used "rip" in the code at the bottom of this message.

       Sub Macro1()
       Dim offset As Integer
       Dim row As Integer
       Dim col As Integer
       Sheets("Artificial Placenta Electrophersis").Select
       
       Range("CU1:HA2100").Select
       Selection.ClearContents
       
       offset = 100
       last_row = 1171
       last_col = 75
       For row = 1 To last_row
       For col = 1 To last_col
          Cells(row, col + offset).Value = Cells(row, col).Interior.Color
       Next col
       Next row
       End Sub
       

Paul Casteels wrote:
> This is something that works for me (using NumPy for the arrays) :
>
>  xlApp = Dispatch("Excel.Application")
>  xlApp.Workbooks.Add()
>  xSheet = xlApp.ActiveWorkbook.ActiveSheet
>  xSheet.Range(xSheet.Cells(row,col), \
>    xSheet.Cells(row+aLen-1,col+1)).Value = array
>
> My array is 2*8196 and the speed of this 1 line is also very low.
> Does someone have other suggestions for speeding this up ?


Here is the code that shows it also works with 
ordinary Python arrays. The RANGE is much faster 
than doing it one cell at a time.


# junk1.py
# create a dummy excel file called C:\\junk1.xls before running this
from win32com.client import Dispatch
xLApp  = Dispatch("Excel.Application")
xLBook = xLApp.Workbooks.Open("c:\\junk1.xls")
xLBook.Worksheets("Sheet1").Select()
xSheet=xLBook.ActiveSheet

row=1; col=1; aLen=5
array=(("row 1 col A","row 1 col B"),
       ("row 2 col A","row 2 col B"),
       ("row 3 col A","row 3 col B"),
       ("row 4 col A","row 4 col B"),
       ("row 5 col A","row 5 col B"))
xSheet.Range(xSheet.Cells(row,col), \
xSheet.Cells(row+aLen-1,col+1)).Value = array
xLBook.Close(SaveChanges=1)
# end of junk.py

==================================================

# COMtools.py
#

import string
import traceback
import tools
import types
def com_project_saveas_example(directory):
   """
   com_project_saveas_html_example saves the PROJECT files in a directory as html
   """
   log("attempting to bring up PROJECT")
   global pApp
   from win32com.client import Dispatch
   pApp=Dispatch("MSProject.Application")
   log("brought up Project")
   pApp.Visible=1
   os.chdir(directory)
   # pApp.ChangeFileOpenDirectory(directory) # for the button
   for pFile in os.listdir(os.getcwd()):
      if string.find(pFile,"mpp")>1:
         csvFile=pFile+".csv"
         pApp.Add(pFile) # pApp.Documents.Add(pFile) also fails
         pApp.ActiveDocument.SaveAs(csvFile,17)
         pApp.ActiveDocument.Close()
         print "Saved "+pFile+" as "+csvFile
   pApp.Quit()


def com_word2html(directory, files):
    """
    This works for Word 2000, but Word 2000 gives all
    sorts of complex htmls and xmls instead of a
    simple, naive html like Word 97 does. If you have
    both installed, then you have to use VB to get W97.
    """
    # e.g. 3.5 inches=216 points
    from win32com.client import Dispatch
    import os
    os.chdir(directory)
    W = Dispatch("Word.Application")
    W.visible=1
    W.ChangeFileOpenDirectory(directory)
    for file in files.keys():
        infile,outfile,shortName=files[file]
        print "saving",file,"in=",infile,"out=",outfile
        Doc = W.Documents.Open(infile, ReadOnly=1)
        Doc.SaveAs(FileName=outfile, FileFormat=8) # web format
        print "                     SAVED AS WEB PAGE"
        Doc.Close()
    W.Quit()


def com_word_saveas_html_example(directory):
   """
   com_word_saveas_html_example saves the WORD files in a directory as html
   """
   log("attempting to bring up Word")
   global wApp
   from win32com.client import Dispatch
   wApp=Dispatch("Word.Application")
   log("brought up Word")
   wApp.Visible=1
   os.chdir(directory)
   wApp.ChangeFileOpenDirectory(directory) # for the button
   for WordFile in os.listdir(os.getcwd()):
      if string.find(WordFile,"doc")>1:
         htmlfile=WordFile+".html"
         wApp.Documents.Add(WordFile)
         wApp.ActiveDocument.SaveAs(htmlfile,17)
         wApp.ActiveDocument.Close()
         print "Saved "+WordFile+" as html "+htmlfile
   wApp.Quit()


def com_zz_done():
   from win32com.test.util import CheckClean
   CheckClean()
   import pythoncom
   pythoncom.CoUninitialize()


from win32com.client import Dispatch
import os
class pyExcel:
    def __init__(self):
        self.filename=None
        self.currentSheet="No Sheet"
        try: self.xL = Dispatch("Excel.Application")
        except: raise "could not dispatch Excel.Application"

    def openFile(self,thisFile=None,closeFirst=1,visible=1,
                 tracingCloses=0,tracingOpens=0):
        if self.filename!=None:
           if self.filename==thisFile:
              return
           elif closeFirst:
              if tracingCloses:
                 print "closing Excel file",self.filename
              self.closeFile()
        if thisFile:
            self.filename=thisFile
            try:
               print "opening ",self.filename
               self.xLBook=self.xL.Workbooks.Open(thisFile)
            except:
               raise "could not open Excel workbook ["+thisFile+"]"
        else:
            self.filename=""
            try:
               self.xLBook=self.xL.Workbooks.Add()
               self.filename="unsaved, unnamed Excel Workbook"
            except:
               raise "could not open a blank Excel sheet"
        self.xL.Visible = visible

    def selectSheet(self,thisFile,thisSheet):
       self.openFile(thisFile)
       self.mbo();
       self.currentSheet=""
       try:
          self.xLBook.Worksheets(thisSheet).Select()
          if 0:
             self.xL.Sheets(name).Select()
       except:
            traceback.print_stack()
            raise "problem selecting Excel sheet "+thisSheet+" in "+thisFile
       self.currentSheet=thisSheet

    def closeFile(self):
       self.mbo()
       if self.filename:
          try: self.xLBook.Close(SaveChanges=0)
          except:
             print "problem closing Excel file "+self.filename
          self.filename=None
       del self.xLBook
       
    def close(self):
       if self.filename!=None:
          self.closeFile()
       self.xL.Quit()
       del self.xL

#
# Utility Methods
#
    def mbo(self):
       if self.filename==None:
          raise "must be open: Open a spreadsheet first"

#
# Spreadsheet Operations
#

    def cell(self,row,col):
       # WARNING: This is VERY slow. Use rip to get
       # a whole block at a time. This procedure takes
       # the same amount of time on ONE cell as
       # RIP takes on a whole spreadsheet with 10,000
       # cells, because the COM overhead is constant
       # and VERY high.
       self.mbo()
       try:
          u=self.xL.ActiveSheet.Cells(row,col).Value
       except:
           traceback.print_stack()
           print
           print "COULD NOT GET CELL(row=",row,", col=",col,\
                 "from sheet",self.currentSheet
           raise
       return self.strip(u)

    def setCell(self,row,col,value):
        self.mbo()
        self.xL.ActiveSheet.Cells(row,col).Value=value
        if 0:
           self.xL.ActiveWorkbook.ActiveSheet.Cells(row,col).Value = value
        
    def cell_color(self,row,col):
       # WARNING: EXTREMELY slow. Instead, use a VB macro
       # to put the COLORS into a different block of cells
       # as the VALUE of those new cells. Then read in the
       # values of those new cells in a single block using rip.
       self.mbo()
       return self.xL.ActiveSheet.Cells(row,col).Interior.Color

    def cell_border(self,row,col):
       # WARNING: EXTREMELY slow. Instead, use a VB macro
       # to put the BORDERS into a different block of cells
       # as the VALUE of those new cells. Then read in the
       # values of those new cells in a single block using rip.
       self.mbo()
       return self.xL.ActiveSheet.Cells(row,col).Borders(9).Color

    def rip(self, fileName, sheetName, rectangle):
       row1, col1, row2, col2=rectangle
       self.selectSheet(fileName, sheetName)
       self.mbo()
       S=self.xL.ActiveWorkbook.ActiveSheet
       x=S.Range(S.Cells(row1,col1),S.Cells(row2,col2)).Value2
          # Value2 turns data objects into floating point numbers
       try: L=len(x)
       except TypeError:
          print
          print "COMtools.rip: Got a TypeError taking len(x),"
          print "    the array coming back from Range.Value"
          print "type(x)=",type(x)
          traceback.print_stack()
          print "x=",x
          raise
       y=[] 
       for row in range(len(x)):
          R=[element for element in x[row]]
          y.append(map(tools.phrase_unicode2string, R))
       if row1==row2:
          return y[0] # one-dimensional array (a row)
       if col1==col2:
          return [y[i][0] for i in range(len(y))]  # one_dimensional array (a column)
       return y       # two_dimensional array

    def save(self):
       self.mbo()
       self.xLBook.Save()

    def saveAs(self,filename):
       self.mbo()
       self.filename=filename
       self.xLBook.SaveAs(filename)





More information about the Python-list mailing list