CalcParser
From Apache OpenOffice Wiki
I'll document this later, so far the code is the following:
[python]
- !/bin/env python
import sys from xml.sax import saxutils from xml.sax import parse from xml.sax import handler # Python 2.3 uses the handler
- Replace DefaultHandler with ContentHandler
- from the handler modules
class CalcHandler(handler.ContentHandler):
def __init__(self): self.chars=[] self.cells=[] self.rows=[] def characters(self, content): self.chars.append(content)
def startElement(self, name, atts): if name=="table:table-cell": self.chars=[] elif name=="table:table-row": self.cells=[] def endElement(self, name):
if name=="table:table-cell":
self.cells.append(.join(self.chars)) elif name=="table:table-row": self.rows.append(self.cells)
calcHandler=CalcHandler() parse(sys.argv[1], calcHandler) print calcHandler.rows