Array Sort add-in function.

From Apache OpenOffice Wiki
< Python
Revision as of 20:23, 5 September 2013 by JZA (Talk | contribs)

(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search


This script was published by Charlie Young at the Code Snippets Forum.

For many purposes in Calc, it is pretty easy to define data ranges with sort specifications (sort order, headers, etc.), and to redo a sort with a couple of clicks and/or keystrokes, and of course for many purposes it's better to use a database. But it might also be useful sometimes to have an array function automatically sort a range. I have examples, and others may as well. I think it's probably preferable to the tricks in Sorting and Filtering data with formulas.

Original code

import uno
import unohelper
import locale
import re
from com.sun.star.lang import Locale
from com.pysort import XPySort
from operator import methodcaller
from functools import partial
 
# PySort Calc Add-in implementation.
NUMBER_RE = re.compile(r'[1-9]\d*')
 
class PySortImpl( unohelper.Base, XPySort ):
    def __init__( self, ctx ):
        self.ctx = ctx
 
    def PySort(self, inRange, fieldspecs, HasHeaders, ByRows, CollationType, docProps):
        inRange = list(inRange)
        out = []
        if not fieldspecs:
            fieldspecs = ((0,""),)
        elif type(fieldspecs) == float:
            fieldspecs = ((fieldspecs,""),)
 
        specs = CheckFields(fieldspecs,len(inRange[0]))  #Checks fields for validity
 
        TypeCollation = 0   
        if type(CollationType) == float:
            TypeCollation = int(CollationType) if 0 <= CollationType <=  3 else 0      
 
        if ByRows:
            inRange = zip(*inRange)
        if HasHeaders:
            out.append(inRange[0])
            inRange = inRange[1:]
 
        if TypeCollation == 0:
            locale.setlocale(locale.LC_ALL, "")
            sortrange = pyCollator_sort(inRange,specs)
        elif TypeCollation == 1:
            locale.setlocale(locale.LC_ALL, "")
            sortrange = strxfrm_sort(inRange,specs)
        elif TypeCollation == 2:
            aLoc = docProps.getPropertyValue("CharLocale")  #Use locale of document for collation. This could be a problem with multilingual spreadsheets.
            xCollator = self.ctx.getServiceManager().createInstance("com.sun.star.i18n.Collator")
            sortrange = xCollator_sort(inRange,specs,xCollator,aLoc)
        else:
            sortrange = byte_sort(inRange,specs)
 
        out.extend(sortrange)
 
        if ByRows:
            return tuple(zip(*out))
 
        return tuple(out)
 
def blankval(v):
    return 1 if type(v) == unicode and len(v) == 0 else 0
 
def pyCollator_sort( rrange, specs):
    for spec in specs[::-1]:
        field, rev, textfirst, ignore_case, isnatural = map(int, spec )
        k = partial(pycmp2key,i = field,TextFirst = textfirst, ignore_case = ignore_case, isnatural = isnatural, rev = rev)
        rrange.sort(key = k(pycmp),reverse = rev)
    return rrange
 
def pycmp(i,a,b,TextFirst,ignore_case, isnatural, rev):
    #acomp = a[i].lower().encode("utf8") if ignore_case and type(a[i]) == unicode else (a[i].encode("utf8") if type(a[i]) == unicode else a[i])
    #bcomp = b[i].lower().encode("utf8") if ignore_case and type(b[i]) == unicode else (b[i].encode("utf8") if type(b[i]) == unicode else b[i])
    acomp = a[i].lower() if ignore_case and type(a[i]) == unicode else (a[i] if type(a[i]) == unicode else a[i])
    bcomp = b[i].lower() if ignore_case and type(b[i]) == unicode else (b[i] if type(b[i]) == unicode else b[i])
    acomp = naturalkey(acomp) if isnatural else acomp
    bcomp = naturalkey(bcomp) if isnatural else bcomp
 
    if blankval(acomp) and blankval(bcomp):
       return 0
    elif blankval(acomp): 
        return 1 if not rev else -1
    elif blankval(bcomp):
        return -1 if not rev else 1
    elif type(acomp) == unicode and type(bcomp) == unicode: 
        return locale.strcoll(acomp,bcomp)
    elif type(acomp) == float and type(bcomp) == float:
        return numcmp(acomp,bcomp)
    elif type(acomp) == float:
        return 1 if TextFirst else -1
    else:
       return -1 if TextFirst else 1
 
def pycmp2key(mycmp,i,TextFirst,ignore_case,isnatural,rev):
    #Convert a cmp= function into a key= function
    class K(object):
        def __init__(self, obj, *args):
            self.obj = obj
        def __lt__(self, other):
            return mycmp(i,self.obj, other.obj, TextFirst,ignore_case,isnatural,rev) < 0
        def __gt__(self, other):
            return mycmp(i,self.obj, other.obj, TextFirst,ignore_case,isnatural,rev) > 0
        def __eq__(self, other):
            return mycmp(i,self.obj, other.obj, TextFirst,ignore_case,isnatural,rev) == 0
        def __le__(self, other):
            return mycmp(i,self.obj, other.obj, TextFirst,ignore_case,isnatural,rev) <= 0
        def __ge__(self, other):
            return mycmp(i,self.obj, other.obj, TextFirst,ignore_case,isnatural,rev) >= 0
        def __ne__(self, other):
            return mycmp(i,self.obj, other.obj, TextFirst,ignore_case,isnatural,rev) != 0
    return K
 
def blankstoend(rrange,field):
    i = firstblank = 0
    rlen = len(rrange)
    while i < rlen and not blankval(rrange[i][field]):
        i += 1
    firstblank = i
    while i < rlen and blankval(rrange[i][field]):
        i += 1
    lastblank = i
    if firstblank < rlen:
        return rrange[:firstblank] + rrange[lastblank:] + rrange[firstblank:lastblank]
    else:
        return rrange
 
def make_key_func(spec):
 
    def key_func(item):
        key = item[field]
        if isinstance(key, unicode):
            for function in functions:
                key = function(key)
            return (1 if len(key)==0 else 0 , key) # here do we put the <empties> to the end
        else:
            return (number_sort_key,key)
    field, rev, TextFirst, ignore_case, isnatural = map(int, spec)
    number_sort_key = 1 if TextFirst else -1 # Here is the corrected 't' -behavior
    functions = list()
    if ignore_case:
        functions.append(methodcaller('lower'))
    if isnatural:
        functions.append(
            partial(
                NUMBER_RE.sub, lambda m: '%03d%s' % (len(m.group()), m.group())
            )
        )
    functions.append(methodcaller('encode','utf8'))
    functions.append(locale.strxfrm)
    return key_func
 
 
def strxfrm_sort(items, specs):
    for spec in reversed(specs):
        field, rev, TextFirst, ignore_case, isnatural = map(int, spec)
        items.sort(key=make_key_func(spec), reverse = rev)
        items = blankstoend(items,field)
    return items
 
def xCollator_sort(rrange,specs,xCollator,aLoc):
    for spec in specs[::-1]:
        field, rev, TextFirst, ignore_case, isnatural = map(int, spec)
        xCollator.loadDefaultCollator(aLoc,ignore_case)
        k = partial(cmp2key,i = field,xCollator = xCollator,TextFirst = TextFirst, isnatural = isnatural, rev = rev)
        rrange.sort(key=k(fncmp),reverse = rev)
    return rrange
 
#Comparison function.  i is field index, a and b are entire rows.
def fncmp(i,a,b,xCollator,TextFirst,isnatural,rev):
    acomp = naturalkey(a[i]) if isnatural and type(a[i]) == unicode else (a[i] if type(a[i]) == unicode else a[i])
    bcomp = naturalkey(b[i]) if isnatural and type(b[i]) == unicode else (b[i] if type(b[i]) == unicode else b[i])
 
    if blankval(acomp) and blankval(bcomp):
        return 0
    elif blankval(acomp): 
        return 1 if not rev else -1
    elif blankval(bcomp):
        return -1 if not rev else 1
    elif type(acomp) == unicode and type(bcomp) == unicode: 
        return xCollator.compareString(acomp,bcomp)
    elif type(acomp) == float and type(bcomp) == float:
        return numcmp(acomp,bcomp)
    elif type(acomp) == float:
        return 1 if TextFirst else -1
    else:
       return -1 if TextFirst else 1
 
def naturalkey(arg):
    if type(arg) == unicode:
        nkey = partial(NUMBER_RE.sub, lambda m: '%03d%s' % (len(m.group()), m.group()))
        return nkey(arg)
    else:
        return arg
 
#Compare two numerics
def numcmp(a,b):
    if a < b:
        return -1
    elif a > b:
        return 1
    else:
        return 0
 
def cmp2key(mycmp,i,xCollator,TextFirst,isnatural,rev):
    #Convert a cmp= function into a key= function
    class K(object):
        def __init__(self, obj, *args):
            self.obj = obj
        def __lt__(self, other):
            return mycmp(i,self.obj, other.obj, xCollator, TextFirst, isnatural, rev) < 0
        def __gt__(self, other):
            return mycmp(i,self.obj, other.obj, xCollator, TextFirst, isnatural, rev) > 0
        def __eq__(self, other):
            return mycmp(i,self.obj, other.obj, xCollator, TextFirst, isnatural, rev) == 0
        def __le__(self, other):
            return mycmp(i,self.obj, other.obj, xCollator, TextFirst, isnatural, rev) <= 0
        def __ge__(self, other):
            return mycmp(i,self.obj, other.obj, xCollator, TextFirst, isnatural, rev) >= 0
        def __ne__(self, other):
            return mycmp(i,self.obj, other.obj, xCollator, TextFirst, isnatural, rev) != 0
    return K
 
def texttofront(rrange,field,rev):
    if not rev:
        i = 0
        while type(rrange[i][field]) == float:
            i += 1
    else:
        i = len(rrange)
        while type(rrange[i - 1][field]) == float:
            i -= 1
    return rrange[i:] + rrange[:i]      
 
def byte_sort( rrange, specs):
    for spec in specs[::-1]:
        field, rev, textfirst, ignore_case, isnatural = map(int, spec )
        rrange.sort(key=lambda e: e[field].lower()
                    if ignore_case and type(e[field])==unicode
                    else e[field], reverse=rev)
        if textfirst:
            rrange = texttofront(rrange, field, rev)
    return rrange
 
def CheckFields(inArray,maxfields):
    outArray = []
    speclen = len(inArray)
    speclen0 = len(inArray[0])
 
    fieldnumbersin = [getfieldnumber(inArray[i][0],maxfields) for i in range(speclen)]
    fieldnumbersout = []
 
    for fieldnumber in fieldnumbersin:
        if fieldnumber not in fieldnumbersout:
            fieldnumbersout.append(fieldnumber)
        else:
            fieldnumbersout.append(-1)
 
    testfields = [tuple([fieldnumbersout[i]] + [getfieldvalue(inArray[i][1],j) if speclen0 > 1 else 0 for j in range(0,4)]) for i in range(speclen)]
 
    for i in range(len(testfields)):
        if testfields[i][0] != -1:
            outArray.append(tuple(testfields[i]))
 
    return outArray
 
def getfieldnumber(n,maxfields):
    if type(n) == float:
        return int(n) if 0 <= n < maxfields else -1
    elif type(n) == int:
        return n if 0 <= n < maxfields else -1
    else:
        return -1 
 
def getfieldvalue(s,i):
    flags = ["r","t","c","n"]
    if type(s) == unicode:
        return 1 if flags[i] in s.lower() else 0
    else:
        return 0 
 
def createInstance( ctx ):
    return PySortImpl( ctx )
 
g_ImplementationHelper = unohelper.ImplementationHelper()
g_ImplementationHelper.addImplementation(
   createInstance,"com.pysort.python.PySortImpl",
      ("com.sun.star.sheet.AddIn",),)

Process of the script

This script uses the power of python to re-implement the sort functionality through a pythonesque functionality to enhance and costumize. The code is internationalized so it will work over different locales. It also carries the conversion between the cmp formulas to a key formula which allow us to have a standarized toolset.

Personal tools