XFastTokenHandler

From Apache OpenOffice Wiki
Revision as of 17:08, 10 July 2010 by Newacct (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search

DRAFT

See FastParser

Abstract

An instance of this interface is used by a FastParser to convert xml names from utf8 to integer tokens used by XFastContextHandler and XFastAttributeList

IDL


module com {  module sun {  module star {  module xml {  module sax {  

/** a container for the attributes of an xml element. 

	<br>Attributes are seperated into known attributes and unknown attributes.
	<p>Known attributes have a local name that is known to the <type>XFastTokenHandler</token>
	registered at the <type>XFastParser</type> which created the sax event containing
	this attributes. If an attribute also has a namespace, that must be registered
	at the <type>XFastParser</type>, else this attribute is also unknown even if
	the local name is known.
 */
interface XFastAttributeList: com::sun::star::uno::XInterface
{
	/** checks if an attribute is available.<br>

		@param Token
			contains the integer token from the <type>XFastTokenHandler</type>
			registered at the <type>XFastParser</type>.<br>

			If the attribute name has a namespace that was registered with the
			<type>XFastParser</type>, Token contains the integer token of the
			attributes local name from the <type>XFastTokenHandler</type> and
			the integer token of the namespace combined with an arithmetic
			<b>or</b> operation.

		@returns
			<TRUE/>, if the attribute is available
	*/
	boolean hasAttribute( [in] long Token );

	/** retrieves the token of an attributes value.<br>

		@param Token
			contains the integer token from the <type>XFastTokenHandler</type>
			registered at the <type>XFastParser</type>.<br>

			If the attribute name has a namespace that was registered with the
			<type>XFastParser</type>, Token contains the integer token of the
			attributes local name from the <type>XFastTokenHandler</type> and
			the integer token of the namespace combined with an arithmetic
			<b>or</b> operation.

		@returns
			The integer token of the value from the attribute or <const>FastToken::Invalid</const>

		@raises SAXEXception
			if the attribute is not available

	*/
	long getValueToken( [in] long Token )
		raises( SAXException );

	/**retrieves the token of an attributes value.<br>

		@param Token
			contains the integer token from the <type>XFastTokenHandler</type>
			registered at the <type>XFastParser</type>.<br>

			If the attribute name has a namespace that was registered with the
			<type>XFastParser</type>, Token contains the integer token of the
			attributes local name from the <type>XFastTokenHandler</type> and
			the integer token of the namespace combined with an arithmetic
			<b>or</b> operation.

		@param Default
			This value will be returned if the attribute is not available

		@returns
			If the attribute is available it returns the integer token of the value
			from the attribute or <const>FastToken::Invalid</const>.
			If not the value of <param>Default</param> is returned.

	*/
	long getOptionalValueToken( [in] long Token, [in] long Default );

	/** retrieves the value of an attributes.<br>

		@param Token
			contains the integer token from the <type>XFastTokenHandler</type>
			registered at the <type>XFastParser</type>.<br>

			If the attribute name has a namespace that was registered with the
			<type>XFastParser</type>, Token contains the integer token of the
			attributes local name from the <type>XFastTokenHandler</type> and
			the integer token of the namespace combined with an arithmetic
			<b>or</b> operation.

		@returns
			The string value from the attribute.

		@raises SAXEXception
			if the attribute is not available

	*/
	string getValue( [in] long Token )
		raises( SAXException );

	/** retrieves the value of an attributes.<br>

		@param Token
			contains the integer token from the <type>XFastTokenHandler</type>
			registered at the <type>XFastParser</type>.<br>

			If the attribute name has a namespace that was registered with the
			<type>XFastParser</type>, Token contains the integer token of the
			attributes local name from the <type>XFastTokenHandler</type> and
			the integer token of the namespace combined with an arithmetic
			<b>or</b> operation.

		@returns
			The string value from the attribute or an empty string if the
			attribute is not available.
	*/
	string getOptionalValue( [in] long Token );

	/** returns a sequence of attributes which names and or namespaces URLS
		can not be translated to tokens.
	*/
	sequence< ::com::sun::star::xml::Attribute > getUnknownAttributes();
};

}; }; }; }; };  

Sample implementation using gperf

gperf is a cool little tool to generate a perfect hash algorithm at compile time for a fixed set of strings.

When writing a filter to import an xml format you usually have a list of valid xml names. They can be easily extracted from a schema. Lets assume you already have a list of valid xml names in the file tokens.txt, for example

User
Admin
Name
Login

The following pearl script can convert that file to an input file for gperf and also a header file with one const sal_Int32 identifier for each xml name.

my $ARGV0 = shift;
my $ARGV1 = shift;
my $ARGV2 = shift;

open ( TOKENS, $ARGV0 ) || die "can't open token file: $!";
my %tokens;

while ( defined (my $line = <TOKENS>) )
{
	chomp($line);
	my @token = split(' ',$line);
	unless ( defined ($token[1]) )
	{
		$token[1] = "XML_".$token[0];
		$token[1] =~ tr/\-\.\:/___/;
		$token[1] =~ s/\+/PLUS/g;
		$token[1] =~ s/\-/MINUS/g;
	}

	$tokens{$token[0]} = $token[1];
}
close ( TOKENS );

open ( HXX, ">$ARGV1" ) or die "can't open tokens.hxx file: $!";
open ( GPERF, ">$ARGV2" ) or die "can't open tokens.gperf file: $!";

print ( GPERF "%language=C++\n" );
print ( GPERF "%global-table\n" );
print ( GPERF "%null-strings\n" );
print ( GPERF "%struct-type\n" );
print ( GPERF "struct xmltoken\n" );
print ( GPERF "{\n" );
print ( GPERF "  const sal_Char *name; sal_Int32 nToken; \n" );
print ( GPERF "};\n" );
print ( GPERF "%%\n" );

print ( HXX "#ifndef _TOKEN_HXX_\n" );
print ( HXX "#define _TOKEN_HXX_\n\n" );
print ( HXX "#ifndef _SAL_TYPES_H_\n" );
print ( HXX "#include <sal/types.h>\n" );
print ( HXX "#endif\n\n" );

$i = 0;
foreach( sort(keys(%tokens)) )
{
	print( HXX "const sal_Int32 $tokens{$_} = $i;\n" );
	print( GPERF "$_,$tokens{$_}\n" );
	$i++;
}
print ( GPERF "%%\n" );
print ( HXX "const sal_Int32 XML_TOKEN_COUNT = $i;\n" );
print ( HXX "#endif\n" );
close ( HXX );
close ( GPERF );

The generated .hxx file has the following format

#ifndef _TOKEN_HXX_
#define _TOKEN_HXX_

#ifndef _SAL_TYPES_H_
#include <sal/types.h>
#endif

const sal_Int32 User = 0:
const sal_Int32 Admin = 1;
const sal_Int32 Name = 2;
const sal_Int32 Login = 3;
const sal_Int32 XML_TOKEN_COUNT = 4;

This can be automated by adding the following at the end of your makefile.mk

$(INCCOM)$/tokens.hxx $(MISC)$/tokens.gperf : tokens.txt gentoken.pl
		$(PERL) gentoken.pl tokens.txt $(INCCOM)$/tokens.hxx $(MISC)$/tokens.gperf

$(INCCOM)$/tokens.cxx : $(MISC)$/tokens.gperf
		gperf --compare-strncmp --output-file=$(MISC)$/_tokens.cxx $(MISC)$/tokens.gperf
		$(TYPE) $(MISC)$/_tokens.cxx | $(SED) -e "s/(char\*)0/(char\*)0, 0/g" >$(INCCOM)$/tokens.cxx

$(SLO)$/tokenmap.obj : $(INCCOM)$/tokens.cxx $(INCCOM)$/tokens.hxx

$(INCCOM)$/tokens.gperf : $(INCCOM)$/tokens.hxx

This automatically creates the tokens.hxx and a tokens.cxx in your platforms include folder of your project.

The following code uses the tokens.cxx generated by gperf to implement a XFastTokenHandler


#include <com/sun/star/xml/sax/XFastTokenHandler.hpp>
#include <com/sun/star/xml/sax/FastToken.hpp>
#include <cppuhelper/implbase1.hxx>

#include "tokens.hxx"

#include "tokens.cxx" // this also includes the c++ code created from gperf

using ::rtl::OUString;
using ::osl::Mutex;
using ::osl::MutexGuard;
using namespace ::com::sun::star::xml::sax;
using namespace ::com::sun::star::uno;

class FastTokenHandler : public ::cppu::WeakImplHelper1< XFastTokenHandler >
{
public:
  virtual sal_Int32 SAL_CALL getToken( const OUString& Identifier ) throw (RuntimeException);
  virtual OUString SAL_CALL getIdentifier( sal_Int32 Token ) throw (RuntimeException);
  virtual sal_Int32 SAL_CALL getTokenFromUTF8( const Sequence< sal_Int8 >& Identifier ) throw (RuntimeException);
};

Mutex& getTokenMutex()
{
  static Mutex aMutex;
  return aMutex;
}

sal_Int32 FastTokenHandler::getToken( const OUString& Identifier ) throw (RuntimeException)
{
  MutexGuard guard( getTokenMutex() );

  OString aUTF8( Identifier.getStr(), Identifier.getLength(), RTL_TEXTENCODING_UTF8 );

  struct xmltoken * t = Perfect_Hash::in_word_set( aUTF8, aUTF8.getLength() );
  if( t )
    return t->nToken;
  else
    return FastToken::DONTKNOW;
}

OUString FastTokenHandler::getIdentifier( sal_Int32 nToken ) throw (RuntimeException)
{
  MutexGuard guard( getTokenMutex() );

  if( nToken >= XML_TOKEN_COUNT )
    return OUString();

  static OUString aTokens[XML_TOKEN_COUNT];

  if( aTokens[nToken].getLength() == 0 )
    aTokens[nToken] = OUString::createFromAscii(wordlist[nToken].name);

  return aTokens[nToken];
}

sal_Int32 FastTokenHandler::getTokenFromUTF8( const Sequence< sal_Int8 >& Identifier ) throw (RuntimeException)
{
  MutexGuard guard( getTokenMutex() );

  struct xmltoken * t = Perfect_Hash::in_word_set((const char*)Identifier.getConstArray(), Identifier.getLength());
  if( t )
    return t->nToken;
  else
    return FastToken::DONTKNOW;
}

Personal tools