cgul_crlf_file.h File Reference

read dos, mac, and unix text files More...

#include "cgul_common.h"
#include "cgul_exception.h"
Include dependency graph for cgul_crlf_file.h:
This graph shows which files directly or indirectly include this file:

Typedefs

typedef typedefCGUL_BEGIN_C struct cgul_crlf_file * cgul_crlf_file_t
 

Functions

CGUL_EXPORT cgul_crlf_file_t cgul_crlf_file__new (cgul_exception_t *cex)
 
CGUL_EXPORT cgul_crlf_file_t cgul_crlf_file__new_from_fname (cgul_exception_t *cex, const char *fname, int is_block_buffered)
 
CGUL_EXPORT cgul_crlf_file_t cgul_crlf_file__new_from_file (cgul_exception_t *cex, FILE *f, int is_block_buffered)
 
CGUL_EXPORT cgul_crlf_file_t cgul_crlf_file__new_from_memory (cgul_exception_t *cex, const char *buf, size_t buf_size)
 
CGUL_EXPORT void cgul_crlf_file__delete (cgul_crlf_file_t crlf_file)
 
CGUL_EXPORT void cgul_crlf_file__open_fname (cgul_exception_t *cex, cgul_crlf_file_t crlf_file, const char *fname, int is_block_buffered)
 
CGUL_EXPORT void cgul_crlf_file__open_file (cgul_exception_t *cex, cgul_crlf_file_t crlf_file, FILE *f, int is_block_buffered)
 
CGUL_EXPORT void cgul_crlf_file__open_memory (cgul_exception_t *cex, cgul_crlf_file_t crlf_file, const char *buf, size_t buf_size)
 
CGUL_EXPORT void cgul_crlf_file__close (cgul_exception_t *cex, cgul_crlf_file_t crlf_file)
 
CGUL_EXPORT const char * cgul_crlf_file__get_fname (cgul_exception_t *cex, cgul_crlf_file_t crlf_file)
 
CGUL_EXPORT const void * cgul_crlf_file__get_file (cgul_exception_t *cex, cgul_crlf_file_t crlf_file)
 
CGUL_EXPORT int cgul_crlf_file__get_strip_utf8_bom (cgul_exception_t *cex, cgul_crlf_file_t crlf_file)
 
CGUL_EXPORT void cgul_crlf_file__set_strip_utf8_bom (cgul_exception_t *cex, cgul_crlf_file_t crlf_file, int strip_utf8_bom)
 
void cgul_crlf_file__get_lines (cgul_exception_t *cex, cgul_crlf_file_t crlf_file, char ***lines, unsigned long int *line_count)
 
CGUL_EXPORT const char * cgul_crlf_file__get_line (cgul_exception_t *cex, cgul_crlf_file_t crlf_file)
 
CGUL_EXPORT unsigned long cgul_crlf_file__get_line_count (cgul_exception_t *cex, cgul_crlf_file_t crlf_file)
 
CGUL_EXPORT unsigned long cgul_crlf_file__get_line_offset (cgul_exception_t *cex, cgul_crlf_file_t crlf_file)
 
CGUL_EXPORT void cgul_crlf_file__fseek (cgul_exception_t *cex, cgul_crlf_file_t crlf_file, unsigned long offset, int whence)
 
CGUL_EXPORT void cgul_crlf_file__rewind (cgul_exception_t *cex, cgul_crlf_file_t crlf_file)
 
CGUL_EXPORT unsigned long cgul_crlf_file__get_buffer_size (cgul_exception_t *cex, cgul_crlf_file_t crlf_file)
 
CGUL_EXPORT void cgul_crlf_file__set_buffer_size (cgul_exception_t *cex, cgul_crlf_file_t crlf_file, unsigned long bsize)
 

Detailed Description

This class reads lines of text from DOS, Mac, or Unix text files optionally stripping a leading UTF-8 byte order mark if present.

Author
Paul Serice

Typedef Documentation

§ cgul_crlf_file_t

typedef typedefCGUL_BEGIN_C struct cgul_crlf_file* cgul_crlf_file_t

Opaque pointer to a cgul_crlf_file instance.

Function Documentation

§ cgul_crlf_file__new()

CGUL_EXPORT cgul_crlf_file_t cgul_crlf_file__new ( cgul_exception_t cex)

This method creates a new cgul_crlf_file instance. After this method returns, cgul_crlf_file__open_fname(), cgul_crlf_file__open_file(), or cgul_crlf_file__open_memory() should be called before calling any other method. The client is responsible for calling cgul_crlf_file__delete() on the object returned. If an error occurs, NULL is returned, and an exception is thrown.

Parameters
[in,out]cexc-style exception
Returns
new cgul_crlf_file instance

Referenced by cgul_crlf_file_cxx::cgul_crlf_file_cxx().

§ cgul_crlf_file__new_from_fname()

CGUL_EXPORT cgul_crlf_file_t cgul_crlf_file__new_from_fname ( cgul_exception_t cex,
const char *  fname,
int  is_block_buffered 
)

This method creates a new cgul_crlf_file instance and calls cgul_crlf_file__open_fname() passing it fname. The file will be closed when this instance is deleted. The client is responsible for calling cgul_crlf_file__delete() on the object returned. If an error occurs, NULL is returned, and an exception is thrown.

If is_block_buffered is true, this method results in a very fast cgul_crlf_file instance, but the underlying file should be a regular file or any other file that is capable of block buffering. If the file is not capable of block buffering, is_block_buffered should be false which results in a much slower cgul_crlf_file instance, but one that should be able to read from any type of file. As a general rule, if you are reading from a regular file or are working in batch mode, is_block_buffered should be true to get the best performance. If you are reading from a line-oriented or character-oriented device like a terminal or from a named pipe across which small messages are being passed, is_block_buffered should be false.

Parameters
[in,out]cexc-style exception
[in]fnamefile name
[in]is_block_bufferedwhether the file is block buffered
Returns
new cgul_crlf_file instance

Referenced by cgul_crlf_file_cxx::cgul_crlf_file_cxx().

§ cgul_crlf_file__new_from_file()

CGUL_EXPORT cgul_crlf_file_t cgul_crlf_file__new_from_file ( cgul_exception_t cex,
FILE *  f,
int  is_block_buffered 
)

This method creates a new cgul_crlf_file instance and calls cgul_crlf_file__open_file() passing it f. The class does not take ownership of f. Thus, the client is still responsible for calling fclose() on it. The client is also responsible for calling cgul_crlf_file__delete() on the object returned. If an error occurs, NULL is returned, and an exception is thrown.

If is_block_buffered is true, this method results in a very fast cgul_crlf_file instance, but the underlying file should be a regular file or any other file that is capable of block buffering. If the file is not capable of block buffering, is_block_buffered should be false which results in a much slower cgul_crlf_file instance, but one that should be able to read from any type of file. As a general rule, if you are reading from a regular file or are working in batch mode, is_block_buffered should be true to get the best performance. If you are reading from a line-oriented or character-oriented device like a terminal or from a named pipe across which small messages are being passed, is_block_buffered should be false.

Parameters
[in,out]cexc-style exception
[in]ffile
[in]is_block_bufferedwhether the file is block buffered
Returns
new cgul_crlf_file instance

Referenced by cgul_crlf_file_cxx::cgul_crlf_file_cxx().

§ cgul_crlf_file__new_from_memory()

CGUL_EXPORT cgul_crlf_file_t cgul_crlf_file__new_from_memory ( cgul_exception_t cex,
const char *  buf,
size_t  buf_size 
)

This method creates a new cgul_crlf_file instance and calls cgul_crlf_file__open_memory() passing it buf and buf_size. This class does not take ownership of buf so the client is still responsible for freeing buf if necessary. The client is responsible for calling cgul_crlf_file__delete() on the object returned. If an error occurs, NULL is returned, and an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]bufmemory buffer
[in]buf_sizesize of buf in bytes
Returns
new cgul_crlf_file instance

Referenced by cgul_crlf_file_cxx::cgul_crlf_file_cxx().

§ cgul_crlf_file__delete()

CGUL_EXPORT void cgul_crlf_file__delete ( cgul_crlf_file_t  crlf_file)

This method deletes the crlf_file instance freeing all internally allocated resources. This does not include closing the underlying file if it was obtained from the client by a call to cgul_crlf_file__new_from_file() or cgul_crlf_file__open_file(). The client must not use crlf_file after calling this method.

Parameters
[in]crlf_filecrlf file

Referenced by cgul_crlf_file_cxx::set_obj(), and cgul_crlf_file_cxx::~cgul_crlf_file_cxx().

§ cgul_crlf_file__open_fname()

CGUL_EXPORT void cgul_crlf_file__open_fname ( cgul_exception_t cex,
cgul_crlf_file_t  crlf_file,
const char *  fname,
int  is_block_buffered 
)

Open the file with name fname and use it when getting lines. If a data source is already open, it will be closed before this method attempts to open the new data source. The new file will be closed when this instance is deleted. If an error occurs, an exception is thrown.

If is_block_buffered is true, this method results in a very fast cgul_crlf_file instance, but the underlying file should be a regular file or any other file that is capable of block buffering. If the file is not capable of block buffering, is_block_buffered should be false which results in a much slower cgul_crlf_file instance, but one that should be able to read from any type of file. As a general rule, if you are reading from a regular file or are working in batch mode, is_block_buffered should be true to get the best performance. If you are reading from a line-oriented or character-oriented device like a terminal or from a named pipe across which small messages are being passed, is_block_buffered should be false.

Parameters
[in,out]cexc-style exception
[in]crlf_filecgul_crlf_file instance
[in]fnamefile name
[in]is_block_bufferedwhether the file is block buffered

Referenced by cgul_crlf_file_cxx::open_fname().

§ cgul_crlf_file__open_file()

CGUL_EXPORT void cgul_crlf_file__open_file ( cgul_exception_t cex,
cgul_crlf_file_t  crlf_file,
FILE *  f,
int  is_block_buffered 
)

Use the file f when getting lines. If a data source is already open, it is closed before attempting to open the new data source. This class does not take ownership of f. Thus, the client is still responsible for calling fclose() on f. If an error occurs, an exception is thrown.

If is_block_buffered is true, this method results in a very fast cgul_crlf_file instance, but the underlying file should be a regular file or any other file that is capable of block buffering. If the file is not capable of block buffering, is_block_buffered should be false which results in a much slower cgul_crlf_file instance, but one that should be able to read from any type of file. As a general rule, if you are reading from a regular file or are working in batch mode, is_block_buffered should be true to get the best performance. If you are reading from a line-oriented or character-oriented device like a terminal or from a named pipe across which small messages are being passed, is_block_buffered should be false.

Parameters
[in]cexc-style exception
[in]crlf_filecgul_crlf_file instance
[in]ffile
[in]is_block_bufferedwhether the file is block buffered

Referenced by cgul_crlf_file_cxx::open_file().

§ cgul_crlf_file__open_memory()

CGUL_EXPORT void cgul_crlf_file__open_memory ( cgul_exception_t cex,
cgul_crlf_file_t  crlf_file,
const char *  buf,
size_t  buf_size 
)

Use the memory buffer buf when getting lines. If a data source is already open, it will be closed before this method attempts to open the new data source. This class does not take ownership of buf. Thus, the client is still responsible for freeing buf if necessary. If an error occurs, an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]crlf_filecgul_crlf_file instance
[in]bufmemory buffer
[in]buf_sizesize of buf in bytes

Referenced by cgul_crlf_file_cxx::open_memory().

§ cgul_crlf_file__close()

CGUL_EXPORT void cgul_crlf_file__close ( cgul_exception_t cex,
cgul_crlf_file_t  crlf_file 
)

Close the file. After this method returns, cgul_crlf_file__open_fname(), cgul_crlf_file__open_file(), or cgul_crlf_file__open_memory() should be called before calling any other method.

Parameters
[in]cexc-style exception
[in]crlf_filecgul_crlf_file instance

Referenced by cgul_crlf_file_cxx::close().

§ cgul_crlf_file__get_fname()

CGUL_EXPORT const char* cgul_crlf_file__get_fname ( cgul_exception_t cex,
cgul_crlf_file_t  crlf_file 
)

Return the name of the currently opened file. If a FILE* was opened instead of a file name, "FILE" will be used. If memory was opened instead of a file name, "MEMORY" will be used. The client must not attempt to free the pointer returned. This method throws an exception only if a file is not currently open.

Parameters
[in]cexc-style exception
[in]crlf_filecgul_crlf_file instance
Returns
name of the currently opened file

Referenced by cgul_crlf_file_cxx::get_file_name().

§ cgul_crlf_file__get_file()

CGUL_EXPORT const void* cgul_crlf_file__get_file ( cgul_exception_t cex,
cgul_crlf_file_t  crlf_file 
)

Return the currently opened file. If a file name or FILE* was opened, a FILE* is returned. If memory was opened, a char* is returned. The client must not do anything that would invalidate the pointer or corrupt the associated data stream. This method throws an exception only if a file is not currently open.

Parameters
[in]cexc-style exception
[in]crlf_filecgul_crlf_file instance
Returns
currently opened file or memory

Referenced by cgul_crlf_file_cxx::get_file().

§ cgul_crlf_file__get_strip_utf8_bom()

CGUL_EXPORT int cgul_crlf_file__get_strip_utf8_bom ( cgul_exception_t cex,
cgul_crlf_file_t  crlf_file 
)

This method returns whether the leading UTF-8 byte-order mark (BOM) should be removed from the first line if it is present.

Parameters
[in]cexc-style exception
[in]crlf_filecgul_crlf_file instance
Returns
whether to strip a leading UTF-8 byte-order mark

Referenced by cgul_crlf_file_cxx::get_strip_utf8_bom().

§ cgul_crlf_file__set_strip_utf8_bom()

CGUL_EXPORT void cgul_crlf_file__set_strip_utf8_bom ( cgul_exception_t cex,
cgul_crlf_file_t  crlf_file,
int  strip_utf8_bom 
)

By default, this class detects the leading UTF-8 byte-order mark (BOM) and strips it from the first line returned by cgul_crlf_file__get_line() if it is present. It then clears its internal flag so that BOMs internal to the text file will be returned. This is generally what you want because the leading BOM is not significant but the internal BOMs are.

You can alter the way this class handles the leading BOM by calling this method with strip_utf8_bom set to 0. This will cause the leading BOM to be returned as part of the first line. This can be useful, for example, if you just want to convert the text file and are not interested in its contents.

It should be noted that most operating systems do not save UTF-8 text files with a leading BOM because UTF-8 is a character stream and, as such, does not have byte-order problems; however, Microsoft Windows adds the BOM to its UTF-8 text files presumably to help distinguish UTF-8 text files from text files with different encodings.

Parameters
[in]cexc-style exception
[in]crlf_filecgul_crlf_file instance
[in]strip_utf8_bomwhether to strip leading UTF-8 byte-order mark

Referenced by cgul_crlf_file_cxx::set_strip_utf8_bom().

§ cgul_crlf_file__get_lines()

void cgul_crlf_file__get_lines ( cgul_exception_t cex,
cgul_crlf_file_t  crlf_file,
char ***  lines,
unsigned long int *  line_count 
)

Return the array of lines from the text file fname in *lines and the number of lines in *line_count. Each line is stored in the array as a C-style string. The client is responsible for calling free() on each line. If an error occurs, an exception is thrown.

To use this function, do something like the following:

    char** lines = NULL;
    unsigned long int line_count = 0;
    ...
    cgul_crlf_file__get_lines(cex, crlf, &lines, &line_count);
    if (*cex) {
        goto out;
    }
Parameters
[in]cexc-style exception
[in]crlf_filecgul_crlf_file instance
[out]linesordered list of lines from the file
[out]line_countnumber of lines
Returns
list of lines from the file

Referenced by cgul_crlf_file_cxx::get_lines().

§ cgul_crlf_file__get_line()

CGUL_EXPORT const char* cgul_crlf_file__get_line ( cgul_exception_t cex,
cgul_crlf_file_t  crlf_file 
)

This method returns the next line of text from the underlying DOS, Mac, or Unix text file. The caller must not call free() on the pointer returned as it points into a larger block owned by this class. If EOF is reached, NULL is returned. If an error occurs, NULL is returned, and an exception is thrown.

Parameters
[in,out]cexc-style exception
[in]crlf_filecgul_crlf_file instance
Returns
next line of text

Referenced by cgul_crlf_file_cxx::get_line().

§ cgul_crlf_file__get_line_count()

CGUL_EXPORT unsigned long cgul_crlf_file__get_line_count ( cgul_exception_t cex,
cgul_crlf_file_t  crlf_file 
)

Get the line count for the last line returned by cgul_crlf_file__get_line. The line count is one-based. No attempt is made to prevent the return value from overflowing. So, the caller is responsible for verifying the return value.

Parameters
[in,out]cexc-style exception
[in]crlf_filecgul_crlf_file instance
Returns
line count

Referenced by cgul_crlf_file_cxx::get_line_count().

§ cgul_crlf_file__get_line_offset()

CGUL_EXPORT unsigned long cgul_crlf_file__get_line_offset ( cgul_exception_t cex,
cgul_crlf_file_t  crlf_file 
)

Get the line offset for the last line returned by cgul_crlf_file__get_line. The offset is zero-based. If the underlying file is binary and random access, you can use the return value to directly seek to the line as follows:

    cgul_crlf_file__fseek(cex, crlf_file, offset, SEEK_SET);

Because the prototype for fseek() requires a long for the offset parameter, no attempt is made to prevent the return value from overflowing. So, the caller is responsible for verifying the return value.

Note that the offset returned is the number of bytes from the start of the file to the current line. This is not necessarily the same as the number of characters which depends on how the file is encoded.

Parameters
[in,out]cexc-style exception
[in]crlf_filecgul_crlf_file instance
Returns
file offset

Referenced by cgul_crlf_file_cxx::get_line_offset().

§ cgul_crlf_file__fseek()

CGUL_EXPORT void cgul_crlf_file__fseek ( cgul_exception_t cex,
cgul_crlf_file_t  crlf_file,
unsigned long  offset,
int  whence 
)

This method calls cgul_libc__fseek() on the underlying FILE* object and resets the underlying cgul_crlf object so that reading new lines can continue at offset which is relative to whence. If an error occurs, an exception is thrown.

You can call cgul_crlf_file__get_line_offset() immediately after calling cgul_crlf_file__get_line() to determine the offset for the beginning of the last line returned.

Parameters
[in,out]cexc-style exception
[in]crlf_filecgul_crlf_file instance
[in]offsetoffset
[in]whenceposition to which offset is relative

Referenced by cgul_crlf_file_cxx::fseek().

§ cgul_crlf_file__rewind()

CGUL_EXPORT void cgul_crlf_file__rewind ( cgul_exception_t cex,
cgul_crlf_file_t  crlf_file 
)

This method calls cgul_libc__rewind() on the underlying FILE* object and resets the underlying cgul_crlf object so that reading new lines can continue at the beginning of the file. This method throws an exception if either cgul_crlf_file__open_fname() or cgul_crlf_file__open_file() has not been called. This method also throws an exception if the underlying file is not seekable.

Parameters
[in,out]cexc-style exception
[in]crlf_filecgul_crlf_file instance

Referenced by cgul_crlf_file_cxx::rewind().

§ cgul_crlf_file__get_buffer_size()

CGUL_EXPORT unsigned long cgul_crlf_file__get_buffer_size ( cgul_exception_t cex,
cgul_crlf_file_t  crlf_file 
)

This method returns the size of the buffer used to read blocks out of the underlying file.

Parameters
[in]cexc-style exception
[in]crlf_filecgul_crlf_file instance
Returns
buffer size

Referenced by cgul_crlf_file_cxx::get_buffer_size().

§ cgul_crlf_file__set_buffer_size()

CGUL_EXPORT void cgul_crlf_file__set_buffer_size ( cgul_exception_t cex,
cgul_crlf_file_t  crlf_file,
unsigned long  bsize 
)

This method sets the size of the buffer used to read blocks out of the underlying file. By default, the buffer size is 16K which is efficient for processing large files en masse but may not be efficient when randomly accessing small parts of the same file. If an error occurs allocating the new buffer, an exception is thrown, and the original buffer will continue to be used.

Parameters
[in,out]cexc-style exception
[in]crlf_filecgul_crlf_file instance
[in]bsizenew buffer size

Referenced by cgul_crlf_file_cxx::set_buffer_size().