Hubbub
Data Structures | Macros | Typedefs | Enumerations | Functions | Variables
tokeniser.c File Reference
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include <parserutils/charset/utf8.h>
#include "utils/parserutilserror.h"
#include "utils/utils.h"
#include "hubbub/errors.h"
#include "tokeniser/entities.h"
#include "tokeniser/tokeniser.h"

Go to the source code of this file.

Data Structures

struct  hubbub_tokeniser_context
 Context for tokeniser. More...
 
struct  hubbub_tokeniser
 Tokeniser data structure. More...
 

Macros

#define state(x)   case x:
 
#define START_BUF(str, cptr, length)
 Various macros for manipulating buffers. More...
 
#define COLLECT(str, cptr, length)
 
#define COLLECT_MS(str, cptr, length)
 
#define DOCTYPE   "DOCTYPE"
 
#define DOCTYPE_LEN   (SLEN(DOCTYPE) - 1)
 
#define PUBLIC   "PUBLIC"
 
#define PUBLIC_LEN   (SLEN(PUBLIC) - 1)
 
#define SYSTEM   "SYSTEM"
 
#define SYSTEM_LEN   (SLEN(SYSTEM) - 1)
 
#define CDATA   "[CDATA["
 
#define CDATA_LEN   (SLEN(CDATA) - 1)
 

Typedefs

typedef enum hubbub_tokeniser_state hubbub_tokeniser_state
 Tokeniser states. More...
 
typedef struct hubbub_tokeniser_context hubbub_tokeniser_context
 Context for tokeniser. More...
 

Enumerations

enum  hubbub_tokeniser_state {
  STATE_DATA, STATE_CHARACTER_REFERENCE_DATA, STATE_TAG_OPEN, STATE_CLOSE_TAG_OPEN,
  STATE_TAG_NAME, STATE_BEFORE_ATTRIBUTE_NAME, STATE_ATTRIBUTE_NAME, STATE_AFTER_ATTRIBUTE_NAME,
  STATE_BEFORE_ATTRIBUTE_VALUE, STATE_ATTRIBUTE_VALUE_DQ, STATE_ATTRIBUTE_VALUE_SQ, STATE_ATTRIBUTE_VALUE_UQ,
  STATE_CHARACTER_REFERENCE_IN_ATTRIBUTE_VALUE, STATE_AFTER_ATTRIBUTE_VALUE_Q, STATE_SELF_CLOSING_START_TAG, STATE_BOGUS_COMMENT,
  STATE_MARKUP_DECLARATION_OPEN, STATE_MATCH_COMMENT, STATE_COMMENT_START, STATE_COMMENT_START_DASH,
  STATE_COMMENT, STATE_COMMENT_END_DASH, STATE_COMMENT_END, STATE_MATCH_DOCTYPE,
  STATE_DOCTYPE, STATE_BEFORE_DOCTYPE_NAME, STATE_DOCTYPE_NAME, STATE_AFTER_DOCTYPE_NAME,
  STATE_MATCH_PUBLIC, STATE_BEFORE_DOCTYPE_PUBLIC, STATE_DOCTYPE_PUBLIC_DQ, STATE_DOCTYPE_PUBLIC_SQ,
  STATE_AFTER_DOCTYPE_PUBLIC, STATE_MATCH_SYSTEM, STATE_BEFORE_DOCTYPE_SYSTEM, STATE_DOCTYPE_SYSTEM_DQ,
  STATE_DOCTYPE_SYSTEM_SQ, STATE_AFTER_DOCTYPE_SYSTEM, STATE_BOGUS_DOCTYPE, STATE_MATCH_CDATA,
  STATE_CDATA_BLOCK, STATE_NUMBERED_ENTITY, STATE_NAMED_ENTITY
}
 Tokeniser states. More...
 

Functions

static hubbub_error hubbub_tokeniser_handle_data (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_character_reference_data (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_tag_open (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_close_tag_open (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_tag_name (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_before_attribute_name (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_attribute_name (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_after_attribute_name (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_before_attribute_value (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_attribute_value_dq (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_attribute_value_sq (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_attribute_value_uq (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_character_reference_in_attribute_value (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_after_attribute_value_q (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_self_closing_start_tag (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_bogus_comment (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_markup_declaration_open (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_match_comment (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_comment (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_match_doctype (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_doctype (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_before_doctype_name (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_doctype_name (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_after_doctype_name (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_match_public (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_before_doctype_public (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_doctype_public_dq (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_doctype_public_sq (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_after_doctype_public (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_match_system (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_before_doctype_system (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_doctype_system_dq (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_doctype_system_sq (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_after_doctype_system (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_bogus_doctype (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_match_cdata (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_cdata_block (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_consume_character_reference (hubbub_tokeniser *tokeniser, size_t off)
 
static hubbub_error hubbub_tokeniser_handle_numbered_entity (hubbub_tokeniser *tokeniser)
 
static hubbub_error hubbub_tokeniser_handle_named_entity (hubbub_tokeniser *tokeniser)
 
static hubbub_error emit_character_token (hubbub_tokeniser *tokeniser, const hubbub_string *chars)
 Emit a character token. More...
 
static hubbub_error emit_current_chars (hubbub_tokeniser *tokeniser)
 Emit the current pending characters being stored in the tokeniser context. More...
 
static hubbub_error emit_current_tag (hubbub_tokeniser *tokeniser)
 Emit the current tag token being stored in the tokeniser context. More...
 
static hubbub_error emit_current_comment (hubbub_tokeniser *tokeniser)
 Emit the current comment token being stored in the tokeniser context. More...
 
static hubbub_error emit_current_doctype (hubbub_tokeniser *tokeniser, bool force_quirks)
 Emit the current doctype token being stored in the tokeniser context. More...
 
static hubbub_error hubbub_tokeniser_emit_token (hubbub_tokeniser *tokeniser, hubbub_token *token)
 Emit a token, performing sanity checks if necessary. More...
 
hubbub_error hubbub_tokeniser_create (parserutils_inputstream *input, hubbub_tokeniser **tokeniser)
 Create a hubbub tokeniser. More...
 
hubbub_error hubbub_tokeniser_destroy (hubbub_tokeniser *tokeniser)
 Destroy a hubbub tokeniser. More...
 
hubbub_error hubbub_tokeniser_setopt (hubbub_tokeniser *tokeniser, hubbub_tokeniser_opttype type, hubbub_tokeniser_optparams *params)
 Configure a hubbub tokeniser. More...
 
hubbub_error hubbub_tokeniser_insert_chunk (hubbub_tokeniser *tokeniser, const uint8_t *data, size_t len)
 Insert a chunk of data into the input stream. More...
 
hubbub_error hubbub_tokeniser_run (hubbub_tokeniser *tokeniser)
 Process remaining data in the input stream. More...
 

Variables

static const uint32_t cp1252Table [32]
 Table of mappings between Windows-1252 codepoints 128-159 and UCS4. More...
 
static const uint8_t u_fffd [3] = { '\xEF', '\xBF', '\xBD' }
 UTF-8 encoding of U+FFFD REPLACEMENT CHARACTER. More...
 
static const hubbub_string u_fffd_str = { u_fffd, sizeof(u_fffd) }
 
static const uint8_t lf = '\n'
 String for when we want to emit newlines. More...
 
static const hubbub_string lf_str = { &lf, 1 }
 

Macro Definition Documentation

#define CDATA   "[CDATA["

Definition at line 2740 of file tokeniser.c.

Referenced by hubbub_tokeniser_handle_match_cdata().

#define CDATA_LEN   (SLEN(CDATA) - 1)

Definition at line 2741 of file tokeniser.c.

Referenced by hubbub_tokeniser_handle_match_cdata().

#define COLLECT (   str,
  cptr,
  length 
)
Value:
do { \
parserutils_error perror; \
assert(str.len != 0); \
perror = parserutils_buffer_append(tokeniser->buffer, \
(uint8_t *) (cptr), (length)); \
if (perror != PARSERUTILS_OK) \
(str).len += (length); \
} while (0)
size_t len
Definition: initial.c:23
static hubbub_error hubbub_error_from_parserutils_error(parserutils_error error)
Convert a ParserUtils error into a Hubbub error.

Definition at line 637 of file tokeniser.c.

Referenced by hubbub_tokeniser_handle_attribute_name(), hubbub_tokeniser_handle_attribute_value_uq(), hubbub_tokeniser_handle_doctype_name(), and hubbub_tokeniser_handle_tag_name().

#define COLLECT_MS (   str,
  cptr,
  length 
)
Value:
do { \
parserutils_error perror; \
perror = parserutils_buffer_append(tokeniser->buffer, \
(uint8_t *) (cptr), (length)); \
if (perror != PARSERUTILS_OK) \
(str).len += (length); \
} while (0)
size_t len
Definition: initial.c:23
static hubbub_error hubbub_error_from_parserutils_error(parserutils_error error)
Convert a ParserUtils error into a Hubbub error.

Definition at line 648 of file tokeniser.c.

Referenced by hubbub_tokeniser_handle_attribute_value_dq(), hubbub_tokeniser_handle_attribute_value_sq(), hubbub_tokeniser_handle_character_reference_in_attribute_value(), hubbub_tokeniser_handle_doctype_public_dq(), hubbub_tokeniser_handle_doctype_public_sq(), hubbub_tokeniser_handle_doctype_system_dq(), and hubbub_tokeniser_handle_doctype_system_sq().

#define DOCTYPE   "DOCTYPE"

Definition at line 1985 of file tokeniser.c.

Referenced by hubbub_tokeniser_handle_match_doctype().

#define DOCTYPE_LEN   (SLEN(DOCTYPE) - 1)

Definition at line 1986 of file tokeniser.c.

Referenced by hubbub_tokeniser_handle_match_doctype().

#define PUBLIC   "PUBLIC"

Definition at line 2211 of file tokeniser.c.

Referenced by hubbub_tokeniser_handle_match_public().

#define PUBLIC_LEN   (SLEN(PUBLIC) - 1)

Definition at line 2212 of file tokeniser.c.

Referenced by hubbub_tokeniser_handle_match_public().

#define START_BUF (   str,
  cptr,
  length 
)
Value:
do { \
parserutils_error perror; \
perror = parserutils_buffer_append(tokeniser->buffer, \
(uint8_t *) (cptr), (length)); \
if (perror != PARSERUTILS_OK) \
(str).len = (length); \
} while (0)
size_t len
Definition: initial.c:23
static hubbub_error hubbub_error_from_parserutils_error(parserutils_error error)
Convert a ParserUtils error into a Hubbub error.

Various macros for manipulating buffers.

Todo:

make some of these inline functions (type-safety)

document them properly here

Definition at line 627 of file tokeniser.c.

Referenced by hubbub_tokeniser_handle_after_attribute_name(), hubbub_tokeniser_handle_before_attribute_name(), hubbub_tokeniser_handle_before_attribute_value(), hubbub_tokeniser_handle_before_doctype_name(), hubbub_tokeniser_handle_close_tag_open(), and hubbub_tokeniser_handle_tag_open().

#define state (   x)    case x:

Referenced by hubbub_tokeniser_run().

#define SYSTEM   "SYSTEM"

Definition at line 2466 of file tokeniser.c.

Referenced by hubbub_tokeniser_handle_match_system().

#define SYSTEM_LEN   (SLEN(SYSTEM) - 1)

Definition at line 2467 of file tokeniser.c.

Referenced by hubbub_tokeniser_handle_match_system().

Typedef Documentation

Context for tokeniser.

Tokeniser states.

Enumeration Type Documentation

Tokeniser states.

Enumerator
STATE_DATA 
STATE_CHARACTER_REFERENCE_DATA 
STATE_TAG_OPEN 
STATE_CLOSE_TAG_OPEN 
STATE_TAG_NAME 
STATE_BEFORE_ATTRIBUTE_NAME 
STATE_ATTRIBUTE_NAME 
STATE_AFTER_ATTRIBUTE_NAME 
STATE_BEFORE_ATTRIBUTE_VALUE 
STATE_ATTRIBUTE_VALUE_DQ 
STATE_ATTRIBUTE_VALUE_SQ 
STATE_ATTRIBUTE_VALUE_UQ 
STATE_CHARACTER_REFERENCE_IN_ATTRIBUTE_VALUE 
STATE_AFTER_ATTRIBUTE_VALUE_Q 
STATE_SELF_CLOSING_START_TAG 
STATE_BOGUS_COMMENT 
STATE_MARKUP_DECLARATION_OPEN 
STATE_MATCH_COMMENT 
STATE_COMMENT_START 
STATE_COMMENT_START_DASH 
STATE_COMMENT 
STATE_COMMENT_END_DASH 
STATE_COMMENT_END 
STATE_MATCH_DOCTYPE 
STATE_DOCTYPE 
STATE_BEFORE_DOCTYPE_NAME 
STATE_DOCTYPE_NAME 
STATE_AFTER_DOCTYPE_NAME 
STATE_MATCH_PUBLIC 
STATE_BEFORE_DOCTYPE_PUBLIC 
STATE_DOCTYPE_PUBLIC_DQ 
STATE_DOCTYPE_PUBLIC_SQ 
STATE_AFTER_DOCTYPE_PUBLIC 
STATE_MATCH_SYSTEM 
STATE_BEFORE_DOCTYPE_SYSTEM 
STATE_DOCTYPE_SYSTEM_DQ 
STATE_DOCTYPE_SYSTEM_SQ 
STATE_AFTER_DOCTYPE_SYSTEM 
STATE_BOGUS_DOCTYPE 
STATE_MATCH_CDATA 
STATE_CDATA_BLOCK 
STATE_NUMBERED_ENTITY 
STATE_NAMED_ENTITY 

Definition at line 50 of file tokeniser.c.

Function Documentation

hubbub_error emit_character_token ( hubbub_tokeniser tokeniser,
const hubbub_string chars 
)
inlinestatic

Emit a character token.

Parameters
tokeniserTokeniser instance
charsPointer to hubbub_string to emit
Returns
true

Definition at line 3154 of file tokeniser.c.

References hubbub_token::character, hubbub_token::data, HUBBUB_TOKEN_CHARACTER, hubbub_tokeniser_emit_token(), and hubbub_token::type.

Referenced by hubbub_tokeniser_handle_cdata_block(), and hubbub_tokeniser_handle_data().

hubbub_error emit_current_chars ( hubbub_tokeniser tokeniser)
inlinestatic
hubbub_error emit_current_comment ( hubbub_tokeniser tokeniser)
inlinestatic

Emit the current comment token being stored in the tokeniser context.

Parameters
tokeniserTokeniser instance
Returns
true

Definition at line 3297 of file tokeniser.c.

References hubbub_tokeniser::buffer, hubbub_token::comment, hubbub_token::data, HUBBUB_TOKEN_COMMENT, hubbub_tokeniser_emit_token(), hubbub_string::len, hubbub_string::ptr, and hubbub_token::type.

Referenced by hubbub_tokeniser_handle_bogus_comment(), and hubbub_tokeniser_handle_comment().

hubbub_error emit_current_doctype ( hubbub_tokeniser tokeniser,
bool  force_quirks 
)
inlinestatic
hubbub_error emit_current_tag ( hubbub_tokeniser tokeniser)
inlinestatic
hubbub_error hubbub_tokeniser_consume_character_reference ( hubbub_tokeniser tokeniser,
size_t  off 
)
static
hubbub_error hubbub_tokeniser_create ( parserutils_inputstream *  input,
hubbub_tokeniser **  tokeniser 
)
hubbub_error hubbub_tokeniser_destroy ( hubbub_tokeniser tokeniser)

Destroy a hubbub tokeniser.

Parameters
tokeniserThe tokeniser instance to destroy
Returns
HUBBUB_OK on success, appropriate error otherwise

Definition at line 340 of file tokeniser.c.

References hubbub_tag::attributes, hubbub_tokeniser::buffer, hubbub_tokeniser::context, hubbub_tokeniser_context::current_tag, HUBBUB_BADPARM, HUBBUB_OK, and hubbub_tokeniser::insert_buf.

Referenced by hubbub_parser_create(), and hubbub_parser_destroy().

hubbub_error hubbub_tokeniser_emit_token ( hubbub_tokeniser tokeniser,
hubbub_token token 
)
static
hubbub_error hubbub_tokeniser_handle_after_attribute_name ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_after_attribute_value_q ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_after_doctype_name ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_after_doctype_public ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_after_doctype_system ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_attribute_name ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_attribute_value_dq ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_attribute_value_sq ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_attribute_value_uq ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_before_attribute_name ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_before_attribute_value ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_before_doctype_name ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_before_doctype_public ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_before_doctype_system ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_bogus_comment ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_bogus_doctype ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_cdata_block ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_character_reference_data ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_character_reference_in_attribute_value ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_close_tag_open ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_comment ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_data ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_doctype ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_doctype_name ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_doctype_public_dq ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_doctype_public_sq ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_doctype_system_dq ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_doctype_system_sq ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_markup_declaration_open ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_match_cdata ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_match_comment ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_match_doctype ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_match_public ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_match_system ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_named_entity ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_numbered_entity ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_self_closing_start_tag ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_tag_name ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_handle_tag_open ( hubbub_tokeniser tokeniser)
static
hubbub_error hubbub_tokeniser_insert_chunk ( hubbub_tokeniser tokeniser,
const uint8_t *  data,
size_t  len 
)

Insert a chunk of data into the input stream.

Inserts the given data into the input stream ready for parsing but does not cause any additional processing of the input.

Parameters
tokeniserTokeniser instance
dataData to insert (UTF-8 encoded)
lenLength, in bytes, of data
Returns
HUBBUB_OK on success, appropriate error otherwise

Definition at line 415 of file tokeniser.c.

References HUBBUB_BADPARM, hubbub_error_from_parserutils_error(), HUBBUB_OK, and hubbub_tokeniser::insert_buf.

Referenced by hubbub_parser_insert_chunk().

hubbub_error hubbub_tokeniser_run ( hubbub_tokeniser tokeniser)

Process remaining data in the input stream.

Parameters
tokeniserThe tokeniser instance to invoke
Returns
HUBBUB_OK on success, appropriate error otherwise

Definition at line 436 of file tokeniser.c.

References HUBBUB_BADPARM, HUBBUB_NEEDDATA, HUBBUB_OK, HUBBUB_PAUSED, hubbub_tokeniser_handle_after_attribute_name(), hubbub_tokeniser_handle_after_attribute_value_q(), hubbub_tokeniser_handle_after_doctype_name(), hubbub_tokeniser_handle_after_doctype_public(), hubbub_tokeniser_handle_after_doctype_system(), hubbub_tokeniser_handle_attribute_name(), hubbub_tokeniser_handle_attribute_value_dq(), hubbub_tokeniser_handle_attribute_value_sq(), hubbub_tokeniser_handle_attribute_value_uq(), hubbub_tokeniser_handle_before_attribute_name(), hubbub_tokeniser_handle_before_attribute_value(), hubbub_tokeniser_handle_before_doctype_name(), hubbub_tokeniser_handle_before_doctype_public(), hubbub_tokeniser_handle_before_doctype_system(), hubbub_tokeniser_handle_bogus_comment(), hubbub_tokeniser_handle_bogus_doctype(), hubbub_tokeniser_handle_cdata_block(), hubbub_tokeniser_handle_character_reference_data(), hubbub_tokeniser_handle_character_reference_in_attribute_value(), hubbub_tokeniser_handle_close_tag_open(), hubbub_tokeniser_handle_comment(), hubbub_tokeniser_handle_data(), hubbub_tokeniser_handle_doctype(), hubbub_tokeniser_handle_doctype_name(), hubbub_tokeniser_handle_doctype_public_dq(), hubbub_tokeniser_handle_doctype_public_sq(), hubbub_tokeniser_handle_doctype_system_dq(), hubbub_tokeniser_handle_doctype_system_sq(), hubbub_tokeniser_handle_markup_declaration_open(), hubbub_tokeniser_handle_match_cdata(), hubbub_tokeniser_handle_match_comment(), hubbub_tokeniser_handle_match_doctype(), hubbub_tokeniser_handle_match_public(), hubbub_tokeniser_handle_match_system(), hubbub_tokeniser_handle_named_entity(), hubbub_tokeniser_handle_numbered_entity(), hubbub_tokeniser_handle_self_closing_start_tag(), hubbub_tokeniser_handle_tag_name(), hubbub_tokeniser_handle_tag_open(), hubbub_tokeniser::paused, hubbub_tokeniser::state, state, STATE_AFTER_ATTRIBUTE_NAME, STATE_AFTER_ATTRIBUTE_VALUE_Q, STATE_AFTER_DOCTYPE_NAME, STATE_AFTER_DOCTYPE_PUBLIC, STATE_AFTER_DOCTYPE_SYSTEM, STATE_ATTRIBUTE_NAME, STATE_ATTRIBUTE_VALUE_DQ, STATE_ATTRIBUTE_VALUE_SQ, STATE_ATTRIBUTE_VALUE_UQ, STATE_BEFORE_ATTRIBUTE_NAME, STATE_BEFORE_ATTRIBUTE_VALUE, STATE_BEFORE_DOCTYPE_NAME, STATE_BEFORE_DOCTYPE_PUBLIC, STATE_BEFORE_DOCTYPE_SYSTEM, STATE_BOGUS_COMMENT, STATE_BOGUS_DOCTYPE, STATE_CDATA_BLOCK, STATE_CHARACTER_REFERENCE_DATA, STATE_CHARACTER_REFERENCE_IN_ATTRIBUTE_VALUE, STATE_CLOSE_TAG_OPEN, STATE_COMMENT, STATE_COMMENT_END, STATE_COMMENT_END_DASH, STATE_COMMENT_START, STATE_COMMENT_START_DASH, STATE_DATA, STATE_DOCTYPE, STATE_DOCTYPE_NAME, STATE_DOCTYPE_PUBLIC_DQ, STATE_DOCTYPE_PUBLIC_SQ, STATE_DOCTYPE_SYSTEM_DQ, STATE_DOCTYPE_SYSTEM_SQ, STATE_MARKUP_DECLARATION_OPEN, STATE_MATCH_CDATA, STATE_MATCH_COMMENT, STATE_MATCH_DOCTYPE, STATE_MATCH_PUBLIC, STATE_MATCH_SYSTEM, STATE_NAMED_ENTITY, STATE_NUMBERED_ENTITY, STATE_SELF_CLOSING_START_TAG, STATE_TAG_NAME, and STATE_TAG_OPEN.

Referenced by hubbub_parser_completed(), hubbub_parser_parse_chunk(), and hubbub_tokeniser_setopt().

hubbub_error hubbub_tokeniser_setopt ( hubbub_tokeniser tokeniser,
hubbub_tokeniser_opttype  type,
hubbub_tokeniser_optparams params 
)

Variable Documentation

const uint32_t cp1252Table[32]
static
Initial value:
= {
0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178
}

Table of mappings between Windows-1252 codepoints 128-159 and UCS4.

Definition at line 26 of file tokeniser.c.

Referenced by hubbub_tokeniser_handle_numbered_entity().

const uint8_t lf = '\n'
static
const hubbub_string lf_str = { &lf, 1 }
static

Definition at line 44 of file tokeniser.c.

const uint8_t u_fffd[3] = { '\xEF', '\xBF', '\xBD' }
static
const hubbub_string u_fffd_str = { u_fffd, sizeof(u_fffd) }
static

Definition at line 37 of file tokeniser.c.