stream reader abstraction More...
#include "cgul_common.h"
#include "cgul_exception.h"
#include "cgul_block_reader.h"
#include "cgul_wchar.h"
#include "cgul_wstring.h"
Typedefs | |
typedef typedefCGUL_BEGIN_C struct cgul_stream_reader * | cgul_stream_reader_t |
typedef void(* | cgul_stream_reader__decode_t) (cgul_exception_t *cex, unsigned char *block_encoded, size_t size_encoded, size_t *index_encoded, cgul_wchar_t *block_decoded, size_t size_decoded, size_t *count_decoded) |
The cgul_stream_reader
class is an adaptor for one or more cgul_block_reader
instances. It returns a stream of characters read from each each block reader in turn. If a decoder is not explicitly set, cgul_stream_reader
will attempt to determine the correct Unicode decoder by looking for the Unicode Byte-Order Mark (BOM). If the stream is not Unicode, an ASCII decoder is provided or the user can write their own. If a decoder is not provided and not explicitly set, UTF-8 is assumed. For automatic detection to work correctly, the underlying block must have a size of at least 4 bytes. The default block size is 16K.
typedef typedefCGUL_BEGIN_C struct cgul_stream_reader* cgul_stream_reader_t |
Opaque pointer to a cgul_stream_reader
instance.
typedef void(* cgul_stream_reader__decode_t) (cgul_exception_t *cex, unsigned char *block_encoded, size_t size_encoded, size_t *index_encoded, cgul_wchar_t *block_decoded, size_t size_decoded, size_t *count_decoded) |
Type definition for the callback function used by cgul_stream_reader
to decode and return the next block of characters. The function should decode up to size_encoded
bytes from block_encoded
starting with the index_encoded
byte and storing the index of the first byte that could not be decoded in index_encoded
. If the entire block is decoded, then index_encoded
should be set to size_encoded
on return. Up to size_decoded
wide characters should be placed in order in block_decoded
, and count_decoded
should be set to the number of characters that were decoded.
[in,out] | cex | c-style exception |
[in] | block_encoded | block of encoded bytes |
[in] | size_encoded | total size of block_encoded in bytes |
[in,out] | index_encoded | index into block_encoded |
[out] | block_decoded | block of decoded wide characters |
[in] | size_decoded | total size of block_decoded in chars |
[out] | count_decoded | count of decoded characters |
CGUL_EXPORT void cgul_stream_reader__decode_ascii | ( | cgul_exception_t * | cex, |
unsigned char * | block_encoded, | ||
size_t | size_encoded, | ||
size_t * | index_encoded, | ||
cgul_wchar_t * | block_decoded, | ||
size_t | size_decoded, | ||
size_t * | count_decoded | ||
) |
This function implements the cgul_stream_reader__decode_t
callback and can be used to configure a cgul_stream_reader
instance to adapt a cgul_block_reader
to return an ASCII character stream. Clients do not typically call this function directly. Instead, they usually only have to pass it into a cgul_stream_reader
instance so the instance can call this function.
[in,out] | cex | c-style exception |
[in] | block_encoded | block of encoded bytes |
[in] | size_encoded | total size of block_encoded in bytes |
[in,out] | index_encoded | index into block_encoded |
[out] | block_decoded | block of decoded wide characters |
[in] | size_decoded | total size of block_decoded in chars |
[out] | count_decoded | count of decoded characters |
Referenced by cgul_stream_reader_cxx::decode_ascii().
CGUL_EXPORT void cgul_stream_reader__decode_utf8 | ( | cgul_exception_t * | cex, |
unsigned char * | block_encoded, | ||
size_t | size_encoded, | ||
size_t * | index_encoded, | ||
cgul_wchar_t * | block_decoded, | ||
size_t | size_decoded, | ||
size_t * | count_decoded | ||
) |
This function implements the cgul_stream_reader__decode_t
callback and can be used to configure a cgul_stream_reader
instance to adapt a cgul_block_reader
to return a UTF-8 character stream. Clients do not typically call this function directly. Instead, they usually only have to pass it into a cgul_stream_reader
instance so the instance can call this function.
[in,out] | cex | c-style exception |
[in] | block_encoded | block of encoded bytes |
[in] | size_encoded | total size of block_encoded in bytes |
[in,out] | index_encoded | index into block_encoded |
[out] | block_decoded | block of decoded wide characters |
[in] | size_decoded | total size of block_decoded in chars |
[out] | count_decoded | count of decoded characters |
Referenced by cgul_stream_reader_cxx::decode_utf8().
CGUL_EXPORT void cgul_stream_reader__decode_utf16be | ( | cgul_exception_t * | cex, |
unsigned char * | block_encoded, | ||
size_t | size_encoded, | ||
size_t * | index_encoded, | ||
cgul_wchar_t * | block_decoded, | ||
size_t | size_decoded, | ||
size_t * | count_decoded | ||
) |
This function implements the cgul_stream_reader__decode_t
callback and can be used to configure a cgul_stream_reader
instance to adapt a cgul_block_reader
to return a UTF-16BE character stream. Clients do not typically call this function directly. Instead, they usually only have to pass it into a cgul_stream_reader
instance so the instance can call this function.
cgul_stream_reader
instance will automatically detect the BOM and set the correct decoder.[in,out] | cex | c-style exception |
[in] | block_encoded | block of encoded bytes |
[in] | size_encoded | total size of block_encoded in bytes |
[in,out] | index_encoded | index into block_encoded |
[out] | block_decoded | block of decoded wide characters |
[in] | size_decoded | total size of block_decoded in chars |
[out] | count_decoded | count of decoded characters |
CGUL_EXPORT void cgul_stream_reader__decode_utf16le | ( | cgul_exception_t * | cex, |
unsigned char * | block_encoded, | ||
size_t | size_encoded, | ||
size_t * | index_encoded, | ||
cgul_wchar_t * | block_decoded, | ||
size_t | size_decoded, | ||
size_t * | count_decoded | ||
) |
This function implements the cgul_stream_reader__decode_t
callback and can be used to configure a cgul_stream_reader
instance to adapt a cgul_block_reader
to return a UTF-16LE character stream. Clients do not typically call this function directly. Instead, they usually only have to pass it into a cgul_stream_reader
instance so the instance can call this function.
cgul_stream_reader
instance will automatically detect the BOM and set the correct decoder.[in,out] | cex | c-style exception |
[in] | block_encoded | block of encoded bytes |
[in] | size_encoded | total size of block_encoded in bytes |
[in,out] | index_encoded | index into block_encoded |
[out] | block_decoded | block of decoded wide characters |
[in] | size_decoded | total size of block_decoded in chars |
[out] | count_decoded | count of decoded characters |
CGUL_EXPORT void cgul_stream_reader__decode_utf32be | ( | cgul_exception_t * | cex, |
unsigned char * | block_encoded, | ||
size_t | size_encoded, | ||
size_t * | index_encoded, | ||
cgul_wchar_t * | block_decoded, | ||
size_t | size_decoded, | ||
size_t * | count_decoded | ||
) |
This function implements the cgul_stream_reader__decode_t
callback and can be used to configure a cgul_stream_reader
instance to adapt a cgul_block_reader
to return a UTF-32BE character stream. Clients do not typically call this function directly. Instead, they usually only have to pass it into a cgul_stream_reader
instance so the instance can call this function.
cgul_stream_reader
instance will automatically detect the BOM and set the correct decoder.[in,out] | cex | c-style exception |
[in] | block_encoded | block of encoded bytes |
[in] | size_encoded | total size of block_encoded in bytes |
[in,out] | index_encoded | index into block_encoded |
[out] | block_decoded | block of decoded wide characters |
[in] | size_decoded | total size of block_decoded in chars |
[out] | count_decoded | count of decoded characters |
CGUL_EXPORT void cgul_stream_reader__decode_utf32le | ( | cgul_exception_t * | cex, |
unsigned char * | block_encoded, | ||
size_t | size_encoded, | ||
size_t * | index_encoded, | ||
cgul_wchar_t * | block_decoded, | ||
size_t | size_decoded, | ||
size_t * | count_decoded | ||
) |
This function implements the cgul_stream_reader__decode_t
callback and can be used to configure a cgul_stream_reader
instance to adapt a cgul_block_reader
to return a UTF-32LE character stream. Clients do not typically call this function directly. Instead, they usually only have to pass it into a cgul_stream_reader
instance so the instance can call this function.
cgul_stream_reader
instance will automatically detect the BOM and set the correct decoder.[in,out] | cex | c-style exception |
[in] | block_encoded | block of encoded bytes |
[in] | size_encoded | total size of block_encoded in bytes |
[in,out] | index_encoded | index into block_encoded |
[out] | block_decoded | block of decoded wide characters |
[in] | size_decoded | total size of block_decoded in chars |
[out] | count_decoded | count of decoded characters |
CGUL_EXPORT cgul_stream_reader_t cgul_stream_reader__new | ( | cgul_exception_t * | cex | ) |
Create a new cgul_stream_reader
instance. After this method returns, cgul_stream_reader__add_block_reader()
should be called and cgul_stream_reader__set_decoder()
may be called before calling cgul_stream_reader__read()
. The client is responsible for calling cgul_stream_reader__delete()
on the pointer returned. If an error occurs, NULL
is returned, and an exception is thrown.
[in] | cex | c-style exception |
cgul_stream_reader
instance Referenced by cgul_stream_reader_cxx::cgul_stream_reader_cxx().
CGUL_EXPORT cgul_stream_reader_t cgul_stream_reader__new_from_block_reader | ( | cgul_exception_t * | cex, |
cgul_block_reader_t | br | ||
) |
Create a new cgul_stream_reader
instance. After this method returns, cgul_stream_reader__set_decoder()
may be called before calling cgul_stream_reader__read()
. This class does not take ownership of the block reader br
so the client is responsible for deleting it when this class is no longer using it. The client is responsible for calling cgul_stream_reader__delete()
on the pointer returned. If an error occurs, NULL
is returned, and an exception is thrown.
[in] | cex | c-style exception |
[in] | br | block reader to add |
cgul_stream_reader
instance Referenced by cgul_stream_reader_cxx::cgul_stream_reader_cxx().
CGUL_EXPORT void cgul_stream_reader__delete | ( | cgul_stream_reader_t | sr | ) |
This method deletes the instance sr
freeing all internally allocated resources. Clients must not attempt to use sr
after this method returns.
[in] | sr | cgul_stream_reader instance |
Referenced by cgul_stream_reader_cxx::set_obj(), and cgul_stream_reader_cxx::~cgul_stream_reader_cxx().
CGUL_EXPORT void cgul_stream_reader__reset | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr | ||
) |
Reset the cgul_stream_reader
instance sr
so it can be used with a new set of block readers. This clears the end of file (EOF), resets the line and column numbers to 1
, clears the queue of block readers, and clears the decoder reverting back to automatic detection of UTF-8, UTF-16, and UTF-32 input.
[in] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
Referenced by cgul_stream_reader_cxx::reset().
CGUL_EXPORT int cgul_stream_reader__read | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr, | ||
cgul_wchar_t * | next_char | ||
) |
Read the next encoded character from the stream and return the result as *next_char
provided next_char
is not NULL
. This methods advances the character stream. If successful, 1
is returned. If the end of file is reached, 0
is returned and *next_char
is set to CGUL_WCHAR__NUL
. If an error occurs, 0
is returned, *next_char
is set to CGUL_WCHAR__NUL
, and an exception is thrown.
[in,out] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
[out] | next_char | next character |
Referenced by cgul_stream_reader_cxx::read().
CGUL_EXPORT int cgul_stream_reader__eof | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr | ||
) |
Return whether the end of file (EOF) has been reached. Calling cgul_stream_reader__reset()
will clear the EOF flag. Putting a character back will also clear the EOF flag.
[in,out] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
Referenced by cgul_stream_reader_cxx::eof().
CGUL_EXPORT int cgul_stream_reader__get_line_and_column_tracking | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr | ||
) |
Return whether line and column number tracking is enabled. By default, line and column number tracking is enabled.
[in] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
Referenced by cgul_stream_reader_cxx::get_line_and_column_tracking().
CGUL_EXPORT void cgul_stream_reader__set_line_and_column_tracking | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr, | ||
int | is_tracking_enabled | ||
) |
Set whether line and column number tracking is enabled. By default, line and column number tracking is enabled. Disabling line and column number tracking results in about a 10% performance increase.
[in] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
[in] | is_tracking_enabled | whether line and column number tracking is enabled |
Referenced by cgul_stream_reader_cxx::set_line_and_column_tracking().
CGUL_EXPORT size_t cgul_stream_reader__get_line_number_current | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr | ||
) |
Return the line number for the current character (i.e., the character that will be returned by the next call to cgul_stream_reader__read()
). The line number is 1-based (i.e., the first line is assigned the number 1).
0
is returned. (See see cgul_stream_reader__set_line_and_column_tracking().)[in,out] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
Referenced by cgul_stream_reader_cxx::get_line_number_current().
CGUL_EXPORT size_t cgul_stream_reader__get_column_number_current | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr | ||
) |
Return the column number for the current character (i.e., the character that will be returned by the next call to cgul_stream_reader__read()
). The column number is 1-based (i.e., the first column is assigned the number 1).
0
is returned. (See see cgul_stream_reader__set_line_and_column_tracking().)[in,out] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
0
Referenced by cgul_stream_reader_cxx::get_column_number_current().
CGUL_EXPORT size_t cgul_stream_reader__get_line_number_previous | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr | ||
) |
Return the line number for the previous character (i.e., the character that was returned by the previous call to cgul_stream_reader__read()
). The line number is 1-based (i.e., the first line is assigned the number 1). If 0
is returned, it means the first character has not yet been read.
0
is returned. (See see cgul_stream_reader__set_line_and_column_tracking().)[in,out] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
Referenced by cgul_stream_reader_cxx::get_line_number_previous().
CGUL_EXPORT size_t cgul_stream_reader__get_column_number_previous | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr | ||
) |
Return the column number for the previous character (i.e., the character that was returned by the previous call to cgul_stream_reader__read()
) or 0
if no character has been read yet. The column number is 1-based (i.e., the first column is assigned the number 1).
0
is returned. (See see cgul_stream_reader__set_line_and_column_tracking().)[in,out] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
0
Referenced by cgul_stream_reader_cxx::get_column_number_previous().
CGUL_EXPORT int cgul_stream_reader__get_normalize_eol | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr | ||
) |
Return whether end-of-line (EOL) character sequences are normalized. If normalization is enabled, "\r" and "\r\n" sequences are normalized to "\n". All other characters pass though without replacement. By default EOL normalization is enabled.
[in,out] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
Referenced by cgul_stream_reader_cxx::get_normalize_eol().
CGUL_EXPORT void cgul_stream_reader__set_normalize_eol | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr, | ||
int | normalize | ||
) |
Set whether end-of-line (EOL) character sequences are normalized. If normalization is enabled, "\r" and "\r\n" sequences are normalized to "\n". All other characters passed though without replacement. By default EOL normalization is enabled.
[in,out] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
[in] | normalize | whether to normalize EOLs |
Referenced by cgul_stream_reader_cxx::set_normalize_eol().
CGUL_EXPORT void cgul_stream_reader__add_block_reader | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr, | ||
cgul_block_reader_t | br | ||
) |
Add the block reader br
to the queue of block readers. The block reader must end on an encoded character boundary (i.e., encoded characters must not span across block readers). Characters are streamed from the front block reader on the queue. When end of file is reach for the front block reader, it is removed from the queue, and characters are then streamed from the next block reader in the queue. Once a block reader is added to the queue, it should not be modified externally. If an error occurs, an exception is thrown.
[in,out] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
[in] | br | block reader to add |
Referenced by cgul_stream_reader_cxx::add_block_reader().
CGUL_EXPORT size_t cgul_stream_reader__get_block_size | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr | ||
) |
Return the size of the block used to read from the block reader. The default block size is 16K.
[in] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
Referenced by cgul_stream_reader_cxx::get_block_size().
CGUL_EXPORT int cgul_stream_reader__set_block_size | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr, | ||
size_t | block_size | ||
) |
Set the block size block_size
used to read from the block reader. The default block size is 16K. If a block reader has been added, the block size can only be increased. If the block size is not large enough to decode any character in the input stream, the decoder will throw an exception when cgul_stream_reader__read()
, cgul_stream_reader__peek()
, or cgul_stream_reader__accept()
(or related methods) is called. If successful, 1
is returned; otherwise, 0
is returned. If an error occurs, 0
is returned, and an exception is thrown.
[in] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
[in] | block_size | block size |
Referenced by cgul_stream_reader_cxx::set_block_size().
CGUL_EXPORT cgul_stream_reader__decode_t cgul_stream_reader__get_decoder | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr | ||
) |
Return the decoder used to decode characters from the block reader. If a decoder has not been set, NULL
is returned. If a decoder is not explicitly set, cgul_stream_reader
will attempt to determine the correct Unicode decoder by looking for the Unicode Byte-Order Mark (BOM). If the stream is not Unicode, an ASCII decoder is provided or the user can write their own. If a decoder is not provided and not explicitly set, UTF-8 is assumed.
[in] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
Referenced by cgul_stream_reader_cxx::get_decoder().
CGUL_EXPORT void cgul_stream_reader__set_decoder | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr, | ||
cgul_stream_reader__decode_t | decoder | ||
) |
Set the decoder decoder
used to decode characters from the block reader. If a decoder is not explicitly set, cgul_stream_reader
will attempt to determine the correct Unicode decoder by looking for the Unicode Byte-Order Mark (BOM). If the stream is not Unicode, an ASCII decoder is provided or the user can write their own. If a decoder is not provided and not explicitly set, UTF-8 is assumed. For automatic detection to work, the underlying block must have a size of at least 4 bytes. The default block size is 16K.
[in] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
[in] | decoder | decoder |
Referenced by cgul_stream_reader_cxx::set_decoder().
CGUL_EXPORT const char* cgul_stream_reader__get_presentation_name | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr | ||
) |
Return the presentation name for the stream reader. If a presentation name has been explicitly set using cgul_stream_reader__set_presentation_name()
, it is returned. If no presentation name has been explicitly set and only one block reader has been added, the presentation name for the block reader is returned (and should not be cached by the client). If no presentation name has been explicitly set and more than one block reader has been added, "MULTIPART"
is returned. If no block reader has been set, NULL
is returned, and an exception is thrown.
[in] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
Referenced by cgul_stream_reader_cxx::get_presentation_name().
CGUL_EXPORT void cgul_stream_reader__set_presentation_name | ( | cgul_exception_t * | cex, |
cgul_stream_reader_t | sr, | ||
const char * | presentation_name | ||
) |
Set the presentation name for the stream reader. If an error occurs, an exception is thrown.
[in,out] | cex | c-style exception |
[in] | sr | cgul_stream_reader instance |
[in] | presentation_name | presentation name |
Referenced by cgul_stream_reader_cxx::set_presentation_name().