XML processing to CSV
From Apache OpenOffice Wiki
This script was published by Tommy at the OpenOffice Basic, Python, BeanShell, JavaScript.
This script allows to convert from XML to CSV.
Original code
#-*- coding: utf-8 -*- import os, sys, zipfile, xml.dom.minidom ########################################################## # Script to export LibreOffice Auto Correct Entries # into a flat file (e.g. to reuse some of them with autokey) ########################################################## ACEfile=r'C:\Program Files\OpenOffice\User\LibreOffice 3\user\autocorr\acor_it-IT.dat' # This is a ZIP where LibreOffice stores its auto correct entries '''ACEfile=r'C:\Program Files\OpenOffice\User\LibreOffice 3\user\autocorr\acor_it-IT.dat' #for windows ''' ifname='DocumentList.xml' # Name of the file inside the ZIP archive that contains auto correct entries ofname='AutoCorrectEntries.csv' # any desired output file name for the export tagname= 'block-list:block' # (as in DocumentList.xml) schema=['block-list:abbreviated-name','block-list:name'] # (as in DocumentList.xml) default_encoding='UTF-8' # (as in DocumentList.xml) ofdelimiter=";" # any desired delimiter for export ########################################################## of = open(ofname,"w") oACE = zipfile.ZipFile(ACEfile) zif = oACE.open(ifname, "r") # access as read-only ZipExtFile object doctree = xml.dom.minidom.parse(zif) # Parse the input file as DOM (document object model, xml-tree) into memory if doctree.encoding: encoding = doctree.encoding else: encoding = default_encoding for elem in doctree.getElementsByTagName(tagname): acEntry=[] for fieldname in schema: acEntry.append(elem.getAttribute(fieldname)) of.write(ofdelimiter.join(acEntry).encode(encoding)+"\n") of.close() # Close output file doctree.unlink # and deallocate DOM object