XFastTokenHandler
DRAFT
See FastParser
Abstract
An instance of this interface is used by a FastParser to convert xml names from utf8 to integer tokens used by XFastContextHandler and XFastAttributeList
IDL
module com { module sun { module star { module xml { module sax { /** a container for the attributes of an xml element. <br>Attributes are seperated into known attributes and unknown attributes. <p>Known attributes have a local name that is known to the <type>XFastTokenHandler</token> registered at the <type>XFastParser</type> which created the sax event containing this attributes. If an attribute also has a namespace, that must be registered at the <type>XFastParser</type>, else this attribute is also unknown even if the local name is known. */ interface XFastAttributeList: com::sun::star::uno::XInterface { /** checks if an attribute is available.<br> @param Token contains the integer token from the <type>XFastTokenHandler</type> registered at the <type>XFastParser</type>.<br> If the attribute name has a namespace that was registered with the <type>XFastParser</type>, Token contains the integer token of the attributes local name from the <type>XFastTokenHandler</type> and the integer token of the namespace combined with an arithmetic <b>or</b> operation. @returns <TRUE/>, if the attribute is available */ boolean hasAttribute( [in] long Token ); /** retrieves the token of an attributes value.<br> @param Token contains the integer token from the <type>XFastTokenHandler</type> registered at the <type>XFastParser</type>.<br> If the attribute name has a namespace that was registered with the <type>XFastParser</type>, Token contains the integer token of the attributes local name from the <type>XFastTokenHandler</type> and the integer token of the namespace combined with an arithmetic <b>or</b> operation. @returns The integer token of the value from the attribute or <const>FastToken::Invalid</const> @raises SAXEXception if the attribute is not available */ long getValueToken( [in] long Token ) raises( SAXException ); /**retrieves the token of an attributes value.<br> @param Token contains the integer token from the <type>XFastTokenHandler</type> registered at the <type>XFastParser</type>.<br> If the attribute name has a namespace that was registered with the <type>XFastParser</type>, Token contains the integer token of the attributes local name from the <type>XFastTokenHandler</type> and the integer token of the namespace combined with an arithmetic <b>or</b> operation. @param Default This value will be returned if the attribute is not available @returns If the attribute is available it returns the integer token of the value from the attribute or <const>FastToken::Invalid</const>. If not the value of <param>Default</param> is returned. */ long getOptionalValueToken( [in] long Token, [in] long Default ); /** retrieves the value of an attributes.<br> @param Token contains the integer token from the <type>XFastTokenHandler</type> registered at the <type>XFastParser</type>.<br> If the attribute name has a namespace that was registered with the <type>XFastParser</type>, Token contains the integer token of the attributes local name from the <type>XFastTokenHandler</type> and the integer token of the namespace combined with an arithmetic <b>or</b> operation. @returns The string value from the attribute. @raises SAXEXception if the attribute is not available */ string getValue( [in] long Token ) raises( SAXException ); /** retrieves the value of an attributes.<br> @param Token contains the integer token from the <type>XFastTokenHandler</type> registered at the <type>XFastParser</type>.<br> If the attribute name has a namespace that was registered with the <type>XFastParser</type>, Token contains the integer token of the attributes local name from the <type>XFastTokenHandler</type> and the integer token of the namespace combined with an arithmetic <b>or</b> operation. @returns The string value from the attribute or an empty string if the attribute is not available. */ string getOptionalValue( [in] long Token ); /** returns a sequence of attributes which names and or namespaces URLS can not be translated to tokens. */ sequence< ::com::sun::star::xml::Attribute > getUnknownAttributes(); }; }; }; }; }; };
Sample implementation using gperf
gperf is a cool little tool to generate a perfect hash algorithm at compile time for a fixed set of strings.
When writing a filter to import an xml format you usually have a list of valid xml names. They can be easily extracted from a schema. Lets assume you already have a list of valid xml names in the file tokens.txt, for example
User Admin Name Login
The following pearl script can convert that file to an input file for gperf and also a header file with one const sal_Int32 identifier for each xml name.
my $ARGV0 = shift; my $ARGV1 = shift; my $ARGV2 = shift; open ( TOKENS, $ARGV0 ) || die "can't open token file: $!"; my %tokens; while ( defined (my $line = <TOKENS>) ) { chomp($line); my @token = split(' ',$line); unless ( defined ($token[1]) ) { $token[1] = "XML_".$token[0]; $token[1] =~ tr/\-\.\:/___/; $token[1] =~ s/\+/PLUS/g; $token[1] =~ s/\-/MINUS/g; } $tokens{$token[0]} = $token[1]; } close ( TOKENS ); open ( HXX, ">$ARGV1" ) or die "can't open tokens.hxx file: $!"; open ( GPERF, ">$ARGV2" ) or die "can't open tokens.gperf file: $!"; print ( GPERF "%language=C++\n" ); print ( GPERF "%global-table\n" ); print ( GPERF "%null-strings\n" ); print ( GPERF "%struct-type\n" ); print ( GPERF "struct xmltoken\n" ); print ( GPERF "{\n" ); print ( GPERF " const sal_Char *name; sal_Int32 nToken; \n" ); print ( GPERF "};\n" ); print ( GPERF "%%\n" ); print ( HXX "#ifndef _TOKEN_HXX_\n" ); print ( HXX "#define _TOKEN_HXX_\n\n" ); print ( HXX "#ifndef _SAL_TYPES_H_\n" ); print ( HXX "#include <sal/types.h>\n" ); print ( HXX "#endif\n\n" ); $i = 0; foreach( sort(keys(%tokens)) ) { print( HXX "const sal_Int32 $tokens{$_} = $i;\n" ); print( GPERF "$_,$tokens{$_}\n" ); $i++; } print ( GPERF "%%\n" ); print ( HXX "const sal_Int32 XML_TOKEN_COUNT = $i;\n" ); print ( HXX "#endif\n" ); close ( HXX ); close ( GPERF );
The generated .hxx file has the following format
#ifndef _TOKEN_HXX_ #define _TOKEN_HXX_ #ifndef _SAL_TYPES_H_ #include <sal/types.h> #endif const sal_Int32 User = 0: const sal_Int32 Admin = 1; const sal_Int32 Name = 2; const sal_Int32 Login = 3; const sal_Int32 XML_TOKEN_COUNT = 4;
This can be automated by adding the following at the end of your makefile.mk
$(INCCOM)$/tokens.hxx $(MISC)$/tokens.gperf : tokens.txt gentoken.pl $(PERL) gentoken.pl tokens.txt $(INCCOM)$/tokens.hxx $(MISC)$/tokens.gperf $(INCCOM)$/tokens.cxx : $(MISC)$/tokens.gperf gperf --compare-strncmp --output-file=$(MISC)$/_tokens.cxx $(MISC)$/tokens.gperf $(TYPE) $(MISC)$/_tokens.cxx | $(SED) -e "s/(char\*)0/(char\*)0, 0/g" >$(INCCOM)$/tokens.cxx $(SLO)$/tokenmap.obj : $(INCCOM)$/tokens.cxx $(INCCOM)$/tokens.hxx $(INCCOM)$/tokens.gperf : $(INCCOM)$/tokens.hxx
This automatically creates the tokens.hxx and a tokens.cxx in your platforms include folder of your project.
The following code uses the tokens.cxx generated by gperf to implement a XFastTokenHandler
#include <com/sun/star/xml/sax/XFastTokenHandler.hpp> #include <com/sun/star/xml/sax/FastToken.hpp> #include <cppuhelper/implbase1.hxx> #include "tokens.hxx" #include "tokens.cxx" // this also includes the c++ code created from gperf using ::rtl::OUString; using ::osl::Mutex; using ::osl::MutexGuard; using namespace ::com::sun::star::xml::sax; using namespace ::com::sun::star::uno; class FastTokenHandler : public ::cppu::WeakImplHelper1< XFastTokenHandler > { public: virtual sal_Int32 SAL_CALL getToken( const OUString& Identifier ) throw (RuntimeException); virtual OUString SAL_CALL getIdentifier( sal_Int32 Token ) throw (RuntimeException); virtual sal_Int32 SAL_CALL getTokenFromUTF8( const Sequence< sal_Int8 >& Identifier ) throw (RuntimeException); }; Mutex& getTokenMutex() { static Mutex aMutex; return aMutex; } sal_Int32 FastTokenHandler::getToken( const OUString& Identifier ) throw (RuntimeException) { MutexGuard guard( getTokenMutex() ); OString aUTF8( Identifier.getStr(), Identifier.getLength(), RTL_TEXTENCODING_UTF8 ); struct xmltoken * t = Perfect_Hash::in_word_set( aUTF8, aUTF8.getLength() ); if( t ) return t->nToken; else return FastToken::DONTKNOW; } OUString FastTokenHandler::getIdentifier( sal_Int32 nToken ) throw (RuntimeException) { MutexGuard guard( getTokenMutex() ); if( nToken >= XML_TOKEN_COUNT ) return OUString(); static OUString aTokens[XML_TOKEN_COUNT]; if( aTokens[nToken].getLength() == 0 ) aTokens[nToken] = OUString::createFromAscii(wordlist[nToken].name); return aTokens[nToken]; } sal_Int32 FastTokenHandler::getTokenFromUTF8( const Sequence< sal_Int8 >& Identifier ) throw (RuntimeException) { MutexGuard guard( getTokenMutex() ); struct xmltoken * t = Perfect_Hash::in_word_set((const char*)Identifier.getConstArray(), Identifier.getLength()); if( t ) return t->nToken; else return FastToken::DONTKNOW; }