cgul_microxml.h File Reference

MicroXML Parser. More...

#include "cgul_common.h"
#include "cgul_exception.h"
#include "cgul_hash.h"
#include "cgul_stream_reader.h"
#include "cgul_string.h"
Include dependency graph for cgul_microxml.h:
This graph shows which files directly or indirectly include this file:

Typedefs

typedef typedefCGUL_BEGIN_C struct cgul_microxml * cgul_microxml_t
 

Functions

CGUL_EXPORT char * cgul_microxml__escape (cgul_exception_t *cex, const char *s)
 
CGUL_EXPORT cgul_microxml_t cgul_microxml__new (cgul_exception_t *cex)
 
CGUL_EXPORT cgul_microxml_t cgul_microxml__new_from_fname (cgul_exception_t *cex, const char *fname)
 
CGUL_EXPORT cgul_microxml_t cgul_microxml__new_from_file (cgul_exception_t *cex, FILE *f)
 
CGUL_EXPORT cgul_microxml_t cgul_microxml__new_from_memory (cgul_exception_t *cex, const char *buffer, size_t buffer_size)
 
CGUL_EXPORT cgul_microxml_t cgul_microxml__new_from_stream_reader (cgul_exception_t *cex, cgul_stream_reader_t sr)
 
CGUL_EXPORT void cgul_microxml__delete (cgul_microxml_t uxml)
 
CGUL_EXPORT void cgul_microxml__open_fname (cgul_exception_t *cex, cgul_microxml_t uxml, const char *fname)
 
CGUL_EXPORT void cgul_microxml__open_file (cgul_exception_t *cex, cgul_microxml_t uxml, FILE *f)
 
CGUL_EXPORT void cgul_microxml__open_memory (cgul_exception_t *cex, cgul_microxml_t uxml, const char *buffer, size_t buffer_size)
 
CGUL_EXPORT void cgul_microxml__close (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT cgul_stream_reader_t cgul_microxml__get_stream_reader (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT void cgul_microxml__set_stream_reader (cgul_exception_t *cex, cgul_microxml_t uxml, cgul_stream_reader_t sr)
 
CGUL_EXPORT const char * cgul_microxml__get_presentation_name (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT void cgul_microxml__set_presentation_name (cgul_exception_t *cex, cgul_microxml_t uxml, const char *presentation_name)
 
CGUL_EXPORT size_t cgul_microxml__get_maximum_entity_length (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT void cgul_microxml__set_maximum_entity_length (cgul_exception_t *cex, cgul_microxml_t uxml, size_t entity_length_max)
 
CGUL_EXPORT size_t cgul_microxml__get_line_number (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT size_t cgul_microxml__get_column_number (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT void cgul_microxml__read_next_token (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT int cgul_microxml__is_document_start (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT int cgul_microxml__is_document_end (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT int cgul_microxml__is_element_start (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT int cgul_microxml__is_element_end (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT int cgul_microxml__is_text (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT int cgul_microxml__is_comment (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT const char * cgul_microxml__get_element_name (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT cgul_string_t cgul_microxml__get_attribute_value (cgul_exception_t *cex, cgul_microxml_t uxml, const char *attribute_name)
 
CGUL_EXPORT cgul_hash_t cgul_microxml__get_attributes (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT cgul_hash_t cgul_microxml__take_attributes (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT void cgul_microxml__delete_attributes (cgul_exception_t *cex, cgul_microxml_t uxml, cgul_hash_t attributes)
 
CGUL_EXPORT int cgul_microxml__is_empty_element (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT cgul_string_t cgul_microxml__get_text (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT int cgul_microxml__is_white_space (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT int cgul_microxml__get_skip_white_space (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT void cgul_microxml__set_skip_white_space (cgul_exception_t *cex, cgul_microxml_t uxml, int skip)
 
CGUL_EXPORT size_t cgul_microxml__get_maximum_text_length (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT void cgul_microxml__set_maximum_text_length (cgul_exception_t *cex, cgul_microxml_t uxml, size_t text_length_max)
 
CGUL_EXPORT cgul_string_t cgul_microxml__get_comment (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT int cgul_microxml__get_skip_comments (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT void cgul_microxml__set_skip_comments (cgul_exception_t *cex, cgul_microxml_t uxml, int skip)
 
CGUL_EXPORT size_t cgul_microxml__get_maximum_comment_length (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT void cgul_microxml__set_maximum_comment_length (cgul_exception_t *cex, cgul_microxml_t uxml, size_t comment_length_max)
 
CGUL_EXPORT int cgul_microxml__get_skip_prolog_or_epilog_tokens (cgul_exception_t *cex, cgul_microxml_t uxml)
 
CGUL_EXPORT void cgul_microxml__set_skip_prolog_or_epilog_tokens (cgul_exception_t *cex, cgul_microxml_t uxml, int skip)
 

Detailed Description

Parser for MicroXML as specified by the W3C MicroXML Community Group at the following URLs:

    https://www.w3.org/community/microxml/
    https://dvcs.w3.org/hg/microxml/raw-file/tip/spec/microxml.html

By default, white space and comments in the prolog and epilog are not tokenized. Also, comments (but not white space) under the root element are also not tokenized. To receive all the tokens that are skipped by default, the following methods can be used:

cgul_microxml__set_skip_prolog_or_epilog_tokens(&local, uxml, 0); cgul_microxml__set_skip_comments(&local, uxml, 0);

White space under the root element can be skipped as follows:

cgul_microxml__set_skip_white_space(&local, uxml, 1);

Author
Paul Serice

Typedef Documentation

§ cgul_microxml_t

typedef typedefCGUL_BEGIN_C struct cgul_microxml* cgul_microxml_t

Opaque pointer to a cgul_microxml_t instance.

Function Documentation

§ cgul_microxml__escape()

CGUL_EXPORT char* cgul_microxml__escape ( cgul_exception_t cex,
const char *  s 
)

Static method used to escape the special MicroXML characters '&', '<', '>', '"', and "'" in the string s. The client is repsonsible for calling free() on the pointer returned. If an error occurs, an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]sUTF-8 input string
Returns
UTF-8 escaped output string

Referenced by cgul_microxml_cxx::escape().

§ cgul_microxml__new()

CGUL_EXPORT cgul_microxml_t cgul_microxml__new ( cgul_exception_t cex)

Return a new cgul_microxml instance. After this method returns, cgul_microxml__set_stream_reader() should be called before calling cgul_microxml__read_next_token(). The client is responsible for calling cgul_microxml__delete() on the pointer returned. If an error occurs, NULL is returned, and an exception is thrown.

Parameters
[in,out]cexc-style exception
Returns
new cgul_microxml instance
See also
cgul_microxml__set_maximum_text_length()
cgul_microxml__set_skip_comments()
cgul_microxml__set_skip_white_space()

Referenced by cgul_microxml_cxx::cgul_microxml_cxx().

§ cgul_microxml__new_from_fname()

CGUL_EXPORT cgul_microxml_t cgul_microxml__new_from_fname ( cgul_exception_t cex,
const char *  fname 
)

This method creates a new cgul_microxml instance and calls cgul_microxml__open_fname() passing it fname. The file will be closed when this instance is deleted. The client is responsible for calling cgul_microxml__delete() on the object returned. If an error occurs, NULL is returned, and an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]fnamefile name
Returns
new cgul_microxml instance
See also
cgul_microxml__set_text_size()
cgul_microxml__set_comment_size()
cgul_microxml__set_skip_comments()
cgul_microxml__set_skip_white_space()

Referenced by cgul_microxml_cxx::cgul_microxml_cxx().

§ cgul_microxml__new_from_file()

CGUL_EXPORT cgul_microxml_t cgul_microxml__new_from_file ( cgul_exception_t cex,
FILE *  f 
)

This method creates a new cgul_microxml instance and calls cgul_microxml__open_file() passing it f. The class does not take ownership of f. Thus, the client is still responsible for calling fclose() on it. The client is also responsible for calling cgul_microxml__delete() on the object returned. If an error occurs, NULL is returned, and an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]ffile
Returns
new cgul_microxml instance
See also
cgul_microxml__set_text_size()
cgul_microxml__set_comment_size()
cgul_microxml__set_skip_comments()
cgul_microxml__set_skip_white_space()

Referenced by cgul_microxml_cxx::cgul_microxml_cxx().

§ cgul_microxml__new_from_memory()

CGUL_EXPORT cgul_microxml_t cgul_microxml__new_from_memory ( cgul_exception_t cex,
const char *  buffer,
size_t  buffer_size 
)

This method creates a new cgul_microxml instance and calls cgul_microxml__open_memory() passing it buffer and buffer_size. This class does not take ownership of buffer so the client is still responsible for freeing buffer if necessary. The client is also responsible for calling cgul_microxml__delete() on the object returned. If an error occurs, NULL is returned, and an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]buffermemory buffer
[in]buffer_sizesize of buffer in bytes
Returns
new cgul_microxml instance
See also
cgul_microxml__set_text_size()
cgul_microxml__set_comment_size()
cgul_microxml__set_skip_comments()
cgul_microxml__set_skip_white_space()

Referenced by cgul_microxml_cxx::cgul_microxml_cxx().

§ cgul_microxml__new_from_stream_reader()

CGUL_EXPORT cgul_microxml_t cgul_microxml__new_from_stream_reader ( cgul_exception_t cex,
cgul_stream_reader_t  sr 
)

This method creates a new cgul_microxml instance and calls cgul_microxml__set_stream_reader() passing it sr. The client is responsible for calling cgul_microxml__delete() on the object returned. If an error occurs, NULL is returned, and an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]srstream reader
Returns
new cgul_microxml instance
See also
cgul_microxml__set_maximum_text_length()
cgul_microxml__set_skip_comments()
cgul_microxml__set_skip_white_space()

Referenced by cgul_microxml_cxx::cgul_microxml_cxx().

§ cgul_microxml__delete()

CGUL_EXPORT void cgul_microxml__delete ( cgul_microxml_t  uxml)

Delete the MicroXML instance uxml freeing all internally allocated resources. The client must not attempt to use uxml after calling this method.

Parameters
[in]uxmlcgul_microxml instance

Referenced by cgul_microxml_cxx::set_obj(), and cgul_microxml_cxx::~cgul_microxml_cxx().

§ cgul_microxml__open_fname()

CGUL_EXPORT void cgul_microxml__open_fname ( cgul_exception_t cex,
cgul_microxml_t  uxml,
const char *  fname 
)

Open the MicroXML file with file name fname. If a file or memory buffer is already open, it is closed before attempting to open the new file. The new file will be closed when this instance is deleted. If an error occurs, an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
[in]fnamefile name

Referenced by cgul_microxml_cxx::open_fname().

§ cgul_microxml__open_file()

CGUL_EXPORT void cgul_microxml__open_file ( cgul_exception_t cex,
cgul_microxml_t  uxml,
FILE *  f 
)

Open the MicroXML file f. If a file or memory buffer is already open, it is closed before attempting to open the new file. This class does not take ownership of f. Thus, the client is still responsible for calling fclose() on f. If an error occurs, an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
[in]finput file name

Referenced by cgul_microxml_cxx::open_file().

§ cgul_microxml__open_memory()

CGUL_EXPORT void cgul_microxml__open_memory ( cgul_exception_t cex,
cgul_microxml_t  uxml,
const char *  buffer,
size_t  buffer_size 
)

Open the MicroXML file contained in the memory buffer buffer holding size buffer_size bytes (not Unicode characters). If a file or memory buffer is already open, it is closed before attempting to open the new memory buffer. This class does not take ownership of buffer. Thus, the client is still responsible for freeing the buffer if necessary. This method does not alter buffer, but buffer should not be changed while it is being used by this class. If an error occurs, an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
[in]buffermemory buffer
[in]buffer_sizebuffer size in bytes (not Unicode characters)

Referenced by cgul_microxml_cxx::open_memory().

§ cgul_microxml__close()

CGUL_EXPORT void cgul_microxml__close ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Close the open MicroXML file (if any). If an error occurs, an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance

Referenced by cgul_microxml_cxx::close().

§ cgul_microxml__get_stream_reader()

CGUL_EXPORT cgul_stream_reader_t cgul_microxml__get_stream_reader ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return the stream reader. If no stream reader is currently selected, NULL is returned.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
stream reader

Referenced by cgul_microxml_cxx::get_stream_reader().

§ cgul_microxml__set_stream_reader()

CGUL_EXPORT void cgul_microxml__set_stream_reader ( cgul_exception_t cex,
cgul_microxml_t  uxml,
cgul_stream_reader_t  sr 
)

Set the stream reader sr. This class does not take ownership of sr. Thus, the client is still responsible for deleting it. If sr is NULL, the old stream reader will no longer be used making it safe to delete the old stream reader without having to set a new one. If an error occurs, an exception is thrown.

The only constraint on sr is that it must yield Unicode characters. If the stream is UTF-8 or starts with a Unicode Byte-Order Mark (BOM), cgul_stream_reader should automatically detect the correct decoder to use; otherwise, manually set sr to use one of the following decoders:

cgul_stream_reader__decode_utf8 cgul_stream_reader__decode_utf16be cgul_stream_reader__decode_utf16le cgul_stream_reader__decode_utf32be cgul_stream_reader__decode_utf32le

If you need to parse MicroXML fragments, multiple block readers can be added to the stream reader at any time using cgul_stream_reader__add_block_reader().

Parameters
[in]cexc-style exception
[in]uxmlcgul_microxml instance
[in]srstream reader
See also
cgul_stream_reader__add_block_reader()

Referenced by cgul_microxml_cxx::set_stream_reader().

§ cgul_microxml__get_presentation_name()

CGUL_EXPORT const char* cgul_microxml__get_presentation_name ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return the presentation name. If an input source is not currently set, NULL is returned, and an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
presentation name

Referenced by cgul_microxml_cxx::get_presentation_name().

§ cgul_microxml__set_presentation_name()

CGUL_EXPORT void cgul_microxml__set_presentation_name ( cgul_exception_t cex,
cgul_microxml_t  uxml,
const char *  presentation_name 
)

Set the presentation name. The presentation name is primarily used for error reporting and can be set to anything, but it does need to be set again if the input file changes. If the presentation name is not set, it defaults to the file name that was opened or to "FILE" or "MEMORY" if using a FILE* or memory buffer respectively. An exception is thrown if an attempt is made to set the presentation name without first setting the input file.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
[in]presentation_namepresentation name

Referenced by cgul_microxml_cxx::set_presentation_name().

§ cgul_microxml__get_maximum_entity_length()

CGUL_EXPORT size_t cgul_microxml__get_maximum_entity_length ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return the maximum entity length. If set to 0, the length is limited only by available memory. The default is 16K.

Parameters
[in]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
maximum entity length

Referenced by cgul_microxml_cxx::get_maximum_entity_length().

§ cgul_microxml__set_maximum_entity_length()

CGUL_EXPORT void cgul_microxml__set_maximum_entity_length ( cgul_exception_t cex,
cgul_microxml_t  uxml,
size_t  entity_length_max 
)

Set the maximum entity length. If set to 0, the length is limited only by available memory. The default is 16K.

Parameters
[in]cexc-style exception
[in]uxmlcgul_microxml instance
[in]entity_length_maxmaximum entity length

Referenced by cgul_microxml_cxx::set_maximum_entity_length().

§ cgul_microxml__get_line_number()

CGUL_EXPORT size_t cgul_microxml__get_line_number ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return the starting line number for the current token. If no input source is set, 0 is returned, and an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
current line number

Referenced by cgul_microxml_cxx::get_line_number().

§ cgul_microxml__get_column_number()

CGUL_EXPORT size_t cgul_microxml__get_column_number ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return the starting column number for the current token. If no input source is set, 0 is returned, and an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
current column number

Referenced by cgul_microxml_cxx::get_column_number().

§ cgul_microxml__read_next_token()

CGUL_EXPORT void cgul_microxml__read_next_token ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Read the next token from the MicroXML input source. This method can be used to implement a pull parser. If an error occurs, an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
See also
cgul_microxml__is_document_start()
cgul_microxml__is_document_end()
cgul_microxml__is_element_start()
cgul_microxml__is_element_end()
cgul_microxml__is_text()
cgul_microxml__is_comment()

Referenced by cgul_microxml_cxx::read_next_token().

§ cgul_microxml__is_document_start()

CGUL_EXPORT int cgul_microxml__is_document_start ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return whether the current token indicates the start of the document. This is always the first token.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
whether the current token indicates the start of the document

Referenced by cgul_microxml_cxx::is_document_start().

§ cgul_microxml__is_document_end()

CGUL_EXPORT int cgul_microxml__is_document_end ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return whether the current token indicates the end of the document. This is always the last token if no errors occurr. Attempting to get the next token after this one results in an error.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
whether the current token indicates the end of the document

Referenced by cgul_microxml_cxx::is_document_end().

§ cgul_microxml__is_element_start()

CGUL_EXPORT int cgul_microxml__is_element_start ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return whether the start tag for an element has been read. Use cgul_microxml__get_element_name() and cgul_microxml__get_attributes() to get the name and attributes for the element. Use cgul_microxml__is_empty_element() to determine if the element is an empty element (e.g., <foo/>).

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
whether the start tag for an element has been read

Referenced by cgul_microxml_cxx::is_element_start().

§ cgul_microxml__is_element_end()

CGUL_EXPORT int cgul_microxml__is_element_end ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return whether the end tag for an element has been read or, if an empty element was parsed earlier, whether an end tag is being simulated on behalf of the earlier empty element. This way the following two MicroXML samples generate the same token stream:

    <foo></foo>
    <foo/>

Use cgul_microxml__get_element_name() to get the name of the element. Use cgul_microxml__is_empty_element() to determine if the end tag is being simulated because the matching start tag is an empty element.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
whether the end tag for an element has been read

Referenced by cgul_microxml_cxx::is_element_end().

§ cgul_microxml__is_text()

CGUL_EXPORT int cgul_microxml__is_text ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return whether text has been read. Use cgul_microxml__get_text() to get the text.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
whether text has been read

Referenced by cgul_microxml_cxx::is_text().

§ cgul_microxml__is_comment()

CGUL_EXPORT int cgul_microxml__is_comment ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return whether a comment has been read. Use cgul_microxml__get_comment() to get the comment.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
whether comment has been read

Referenced by cgul_microxml_cxx::is_comment().

§ cgul_microxml__get_element_name()

CGUL_EXPORT const char* cgul_microxml__get_element_name ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return the name of the current element. The string is owned by this class so the client must not attempt to free it. This method is valid when processing the start or end tag of an element; otherwise, NULL is returned, and an exception is thrown.

Element names are interned by the cgul_microxml instance making them valid until the cgul_microxml instance is deleted or reset. It is common for elements to be repeated. In this case, this method returns the same pointer for the repeated elements which allows DOMs to be created in a memory efficient manner. Because the element names are shared, it is important that the client treat them as immutable which should be natural in most contexts.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
name associated with token

Referenced by cgul_microxml_cxx::get_element_name().

§ cgul_microxml__get_attribute_value()

CGUL_EXPORT cgul_string_t cgul_microxml__get_attribute_value ( cgul_exception_t cex,
cgul_microxml_t  uxml,
const char *  attribute_name 
)

Return the value for the attribute name attribute_name. If no attribute has that name, NULL is returned. Ownership of the attribute value remains with the cgul_microxml instance so the client must not attempt to delete it. This method is valid when processing the start or end tag of an element; otherwise, NULL is returned, and an exception is thrown.

Unlike element names and attribute names, attribute values are only valid until the next token is read unless cgul_string__take_value() is called on the returned attribute value in which case the client is responsible for calling free() on the value taken.

The attribute value can be converted to numeric types using methods from cgul_string like cgul_string__to_int().

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
[in]attribute_nameattribute name
Returns
value for the given attribute name or NULL
See also
cgul_microxml__get_attributes()
cgul_microxml__take_attributes()

Referenced by cgul_microxml_cxx::get_attribute_value().

§ cgul_microxml__get_attributes()

CGUL_EXPORT cgul_hash_t cgul_microxml__get_attributes ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return the entire attribute map associated with the current element. The keys and values stored in the attributes map are owned by this class so the client must not attempt to free or delete them. This method is valid when processing the start tag of an element; otherwise, NULL is returned, and an exception is thrown.

The keys in the attributes map correspond to the names of the attributes and are C-style strings. The keys are valid until this cgul_microxml instance is deleted or reset.

The values in the attributes map correspond to the attribute values and are cgul_string instances (not C-style strings). Unlike the keys, the values are only valid until the next token is read unless cgul_string__take_value() is called on the the attribute values in which case the client is responsible for calling free() on each value taken.

The attribute values can be converted to numeric types using methods from cgul_string like cgul_string__to_int().

You can iterate over the attribute names and values as follows:

cgul_hash_node_t attr = NULL; attr = cgul_hash__get_front(cex, attrs); for ( ; attr ; attr = cgul_hash_node__get_next(cex, attr)) { // Get the key/value pair for this attribute. const char* key = (const char*)cgul_hash_node__get_key(cex, attr); cgul_string_t tmp = (cgul_string_t)cgul_hash_node__get_value(cex, attr); const char* value = cgul_string__get_value(cex, tmp); ... }

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
map of attributes associated with token
See also
cgul_microxml__take_attributes()
cgul_microxml__get_attribute_value()

Referenced by cgul_microxml_cxx::get_attributes().

§ cgul_microxml__take_attributes()

CGUL_EXPORT cgul_hash_t cgul_microxml__take_attributes ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return the attributes map associated with the current element. This method transfers ownership of the attributes map requiring the client to delete the map by calling cgul_microxml__delete_attributes(). The element attributes can only be taken once per element. This method is valid when processing the start tag of an element; otherwise, NULL is returned, and an exception is thrown.

The keys in the attributes map correspond to the names of the attributes and are C-style strings. The keys are only valid until cgul_microxml__delete_attributes() is called or until this cgul_microxml instance is deleted or reset. So do not try use the keys after deleting uxml! (The reason for this is that attribute names are interned in a symbol table owned by uxml so that multiple occurrences of the same attribute name can share the same symbol without requiring extra memory for a duplicate string.)

The values in the attributes map correspond to the attribute values and are cgul_string instances (not C-style strings). Unlike the keys, the values are valid until cgul_microxml__delete_attributes() is called even if uxml has been deleted or reset.

The attribute values can be converted to numeric types using methods from cgul_string like cgul_string__to_int().

You can iterate over the attribute names and values as follows:

cgul_hash_node_t attr = NULL; attr = cgul_hash__get_front(cex, attrs); for ( ; attr ; attr = cgul_hash_node__get_next(cex, attr)) { // Get the key/value pair for this attribute. const char* key = (const char*)cgul_hash_node__get_key(cex, attr); cgul_string_t tmp = (cgul_string_t)cgul_hash_node__get_value(cex, attr); const char* value = cgul_string__get_value(cex, tmp); ... }

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
map of attributes associated with token
See also
cgul_microxml__get_attributes()
cgul_microxml__get_attribute_value()

Referenced by cgul_microxml_cxx::take_attributes().

§ cgul_microxml__delete_attributes()

CGUL_EXPORT void cgul_microxml__delete_attributes ( cgul_exception_t cex,
cgul_microxml_t  uxml,
cgul_hash_t  attributes 
)

Convenience method used to delete the attributes map attributes returned by the function cgul_microxml__take_attributes(). The client must not use attributes after this method returns.

Note
Do not use this function for attribute maps returned by cgul_microxml__get_attributes(). Only use it on attributes maps returned by cgul_microxml__take_attributes().
Parameters
[in]cexc-style exception
[in]uxmlcgul_microxml instance
[in]attributesattributes map

Referenced by cgul_microxml_cxx::delete_attributes().

§ cgul_microxml__is_empty_element()

CGUL_EXPORT int cgul_microxml__is_empty_element ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return whether the element is an empty element (e.g., <foo/>). This method is valid when processing the start or end tag of an element; otherwise, an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
whether the empty-element syntax was used to close the start tag

Referenced by cgul_microxml_cxx::is_empty_element().

§ cgul_microxml__get_text()

CGUL_EXPORT cgul_string_t cgul_microxml__get_text ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return the current text. The returned value is owned by this class so the client must not attempt to delete it. This method is valid when processing text; otherwise, NULL is returned, and an exception is thrown.

The text returned is only valid until the next token is read unless cgul_string__take_value() is called on the returned value in which case the client is responsible for calling free() on the text taken.

The text can be converted to numeric types using methods from cgul_string like cgul_string__to_int().

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
text associated with token
See also
cgul_microxml__set_maximum_text_length()
cgul_microxml__set_skip_white_space()

Referenced by cgul_microxml_cxx::get_text().

§ cgul_microxml__is_white_space()

CGUL_EXPORT int cgul_microxml__is_white_space ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return whether the current text is comprised of only white space characters. The MicroXML specification defines white space as tab, new line, or space characters. This method is valid when processing text; otherwise, 0 is returned, and an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
whether text is pure white space

Referenced by cgul_microxml_cxx::is_white_space().

§ cgul_microxml__get_skip_white_space()

CGUL_EXPORT int cgul_microxml__get_skip_white_space ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return whether to skip text under the root element that is pure white space. By default, this white space is not skipped. For white space in the prolog and epilog, see cgul_microxml__get_skip_prolog_or_epilog_tokens().

Parameters
[in]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
whether to skip text that is pure white space
See also
cgul_microxml__get_skip_prolog_or_epilog_tokens()

Referenced by cgul_microxml_cxx::get_skip_white_space().

§ cgul_microxml__set_skip_white_space()

CGUL_EXPORT void cgul_microxml__set_skip_white_space ( cgul_exception_t cex,
cgul_microxml_t  uxml,
int  skip 
)

Set whether to skip text under the root element that is pure white space. This is convenient when the document being parsed is formatted with line breaks and indentation that are not significant. By default, this white space is not skipped. For white space in the prolog and epilog, see cgul_microxml__set_skip_prolog_or_epilog_tokens().

If enabled, cgul_microxml__set_maximum_text_length() should be used to increase the maximum size of each block of text (if necessary) so that at least one character that is not white space is always included in each block of significant white space.

Parameters
[in]cexc-style exception
[in]uxmlcgul_microxml instance
[in]skipwhether to skip text that is pure white space
See also
cgul_microxml__set_maximum_text_length()
cgul_microxml__set_skip_prolog_or_epilog_tokens()

Referenced by cgul_microxml_cxx::set_skip_white_space().

§ cgul_microxml__get_maximum_text_length()

CGUL_EXPORT size_t cgul_microxml__get_maximum_text_length ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return the maximum size in Unicode characters of text returned by cgul_microxml__get_text(). If set to 0, the length is limited only by available memory. The default is 16K.

Parameters
[in]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
maximum text length in Unicode characters

Referenced by cgul_microxml_cxx::get_maximum_text_length().

§ cgul_microxml__set_maximum_text_length()

CGUL_EXPORT void cgul_microxml__set_maximum_text_length ( cgul_exception_t cex,
cgul_microxml_t  uxml,
size_t  text_length_max 
)

Set the maximum size in Unicode characters of text returned by cgul_microxml__get_text(). If set to 0, the length is limited only by available memory. The default is 16K.

Parameters
[in]cexc-style exception
[in]uxmlcgul_microxml instance
[in]text_length_maxmaximum text length in Unicode characters

Referenced by cgul_microxml_cxx::set_maximum_text_length().

§ cgul_microxml__get_comment()

CGUL_EXPORT cgul_string_t cgul_microxml__get_comment ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return the current comment. The returned value is owned by this class so the client must not attempt to delete it. This method is valid when processing comments; otherwise, NULL is returned, and an exception is thrown.

The comment returned is only valid until the next token is read unless cgul_string__take_value() is called on the returned value in which case the client is responsible for calling free() on the comment taken.

Parameters
[in,out]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
comment associated with token
See also
cgul_microxml__set_skip_comments()

Referenced by cgul_microxml_cxx::get_comment().

§ cgul_microxml__get_skip_comments()

CGUL_EXPORT int cgul_microxml__get_skip_comments ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return whether to skip comments under the root element. By default, these comments are skipped. For comments in the prolog and epilog, see cgul_microxml__get_skip_prolog_or_epilog_tokens().

Parameters
[in]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
whether to skip text that is pure white space
See also
cgul_microxml__get_skip_prolog_or_epilog_tokens()

Referenced by cgul_microxml_cxx::get_skip_comments().

§ cgul_microxml__set_skip_comments()

CGUL_EXPORT void cgul_microxml__set_skip_comments ( cgul_exception_t cex,
cgul_microxml_t  uxml,
int  skip 
)

Set whether to skip comments under the root element. By default, these comments are skipped. For comments in the prolog and epilog, see cgul_microxml__set_skip_prolog_or_epilog_tokens().

Parameters
[in]cexc-style exception
[in]uxmlcgul_microxml instance
[in]skipwhether to skip text that is pure white space
See also
cgul_microxml__set_skip_prolog_or_epilog_tokens()

Referenced by cgul_microxml_cxx::set_skip_comments().

§ cgul_microxml__get_maximum_comment_length()

CGUL_EXPORT size_t cgul_microxml__get_maximum_comment_length ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return the maximum comment length. If set to 0, the length is limited only by available memory. The default is 16K.

Parameters
[in]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
maximum comment length

Referenced by cgul_microxml_cxx::get_maximum_comment_length().

§ cgul_microxml__set_maximum_comment_length()

CGUL_EXPORT void cgul_microxml__set_maximum_comment_length ( cgul_exception_t cex,
cgul_microxml_t  uxml,
size_t  comment_length_max 
)

Set the maximum comment length. If set to 0, the length is limited only by available memory. The default is 16K.

Parameters
[in]cexc-style exception
[in]uxmlcgul_microxml instance
[in]comment_length_maxmaximum comment length

Referenced by cgul_microxml_cxx::set_maximum_comment_length().

§ cgul_microxml__get_skip_prolog_or_epilog_tokens()

CGUL_EXPORT int cgul_microxml__get_skip_prolog_or_epilog_tokens ( cgul_exception_t cex,
cgul_microxml_t  uxml 
)

Return whether to skip tokens in the prolog or epilog. These are the white space and comment tokens that surround the root element. By default, these tokens are skipped. For white space and comments under the root element, see cgul_microxml__get_skip_white_space() and cgul_microxml__get_skip_comments().

Parameters
[in]cexc-style exception
[in]uxmlcgul_microxml instance
Returns
whether tokens in the prolog or epilog
See also
cgul_microxml__get_skip_white_space()
cgul_microxml__get_skip_comments()

Referenced by cgul_microxml_cxx::get_skip_prolog_or_epilog_tokens().

§ cgul_microxml__set_skip_prolog_or_epilog_tokens()

CGUL_EXPORT void cgul_microxml__set_skip_prolog_or_epilog_tokens ( cgul_exception_t cex,
cgul_microxml_t  uxml,
int  skip 
)

Set whether to skip tokens in the prolog or epilog. These are the white space and comment tokens that surround the root element. By default, these tokens are skipped. For white space and comments under the root element, see cgul_microxml__set_skip_white_space() and cgul_microxml__set_skip_comments().

Parameters
[in]cexc-style exception
[in]uxmlcgul_microxml instance
[in]skipwhether tokens in the prolog or epilog
See also
cgul_microxml__set_skip_white_space()
cgul_microxml__set_skip_comments()

Referenced by cgul_microxml_cxx::set_skip_prolog_or_epilog_tokens().