X-Git-Url: http://andersk.mit.edu/gitweb/libyaml.git/blobdiff_plain/f2b59d4db01af719e98115dae18f6d9b33cc2327..1893dabd78f6707d07882049997b7c2879bf02b9:/src/scanner.c diff --git a/src/scanner.c b/src/scanner.c index 2a555d0..86e2050 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -475,124 +475,103 @@ * BLOCK-END */ -#if HAVE_CONFIG_H -#include -#endif - -#include - -#include +#include "yaml_private.h" /* * Ensure that the buffer contains the required number of characters. * Return 1 on success, 0 on failure (reader error or memory error). */ -#define UPDATE(parser,length) \ - (parser->unread >= (length) \ - ? 1 \ +#define CACHE(parser,length) \ + (parser->unread >= (length) \ + ? 1 \ : yaml_parser_update_buffer(parser, (length))) /* - * Check the octet at the specified position. - */ - -#define CHECK_AT(parser,octet,offset) \ - (parser->buffer[offset] == (yaml_char_t)(octet)) - -/* - * Check the current octet in the buffer. - */ - -#define CHECK(parser,octet) CHECK_AT(parser,(octet),0) - -/* - * Check if the character at the specified position is NUL. - */ - -#define IS_Z_AT(parser,offset) CHECK_AT(parser,'\0',(offset)) - -#define IS_Z(parser) IS_Z_AT(parser,0) - -/* - * Check if the character at the specified position is space. - */ - -#define IS_SPACE_AT(parser,offset) CHECK_AT(parser,' ',(offset)) - -#define IS_SPACE(parser) IS_SPACE_AT(parser,0) - -/* - * Check if the character at the specified position is tab. - */ - -#define IS_TAB_AT(parser,offset) CHECK_AT(parser,'\t',(offset)) - -#define IS_TAB(parser) IS_TAB_AT(parser,0) - -/* - * Check if the character at the specified position is blank (space or tab). - */ - -#define IS_BLANK_AT(parser,offset) \ - (IS_SPACE_AT(parser,(offset)) || IS_TAB_AT(parser,(offset))) - -#define IS_BLANK(parser) IS_BLANK_AT(parser,0) - -/* - * Check if the character at the specified position is a line break. - */ - -#define IS_BREAK_AT(parser,offset) \ - (CHECK_AT(parser,'\r',(offset)) /* CR (#xD)*/ \ - || CHECK_AT(parser,'\n',(offset)) /* LF (#xA) */ \ - || (CHECK_AT(parser,'\xC2',(offset)) \ - && CHECK_AT(parser,'\x85',(offset+1))) /* NEL (#x85) */ \ - || (CHECK_AT(parser,'\xE2',(offset)) \ - && CHECK_AT(parser,'\x80',(offset+1)) \ - && CHECK_AT(parser,'\xA8',(offset+2))) /* LS (#x2028) */ \ - || (CHECK_AT(parser,'\xE2',(offset)) \ - && CHECK_AT(parser,'\x80',(offset+1)) \ - && CHECK_AT(parser,'\xA9',(offset+2)))) /* LS (#x2029) */ - -#define IS_BREAK(parser) IS_BREAK_AT(parser,0) - -/* - * Check if the character is a line break or NUL. + * Advance the buffer pointer. */ -#define IS_BREAKZ_AT(parser,offset) \ - (IS_BREAK_AT(parser,(offset)) || IS_Z_AT(parser,(offset))) - -#define IS_BREAKZ(parser) IS_BREAKZ_AT(parser,0) +#define SKIP(parser) \ + (parser->mark.index ++, \ + parser->mark.column ++, \ + parser->unread --, \ + parser->buffer.pointer += WIDTH(parser->buffer)) + +#define SKIP_LINE(parser) \ + (IS_CRLF(parser->buffer) ? \ + (parser->mark.index += 2, \ + parser->mark.column = 0, \ + parser->mark.line ++, \ + parser->unread -= 2, \ + parser->buffer.pointer += 2) : \ + IS_BREAK(parser->buffer) ? \ + (parser->mark.index ++, \ + parser->mark.column = 0, \ + parser->mark.line ++, \ + parser->unread --, \ + parser->buffer.pointer += WIDTH(parser->buffer)) : 0) /* - * Check if the character is a line break, space, or NUL. + * Copy a character to a string buffer and advance pointers. */ -#define IS_SPACEZ_AT(parser,offset) \ - (IS_SPACE_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset))) - -#define IS_SPACEZ(parser) IS_SPACEZ_AT(parser,0) +#define READ(parser,string) \ + (STRING_EXTEND(parser,string) ? \ + (COPY(string,parser->buffer), \ + parser->mark.index ++, \ + parser->mark.column ++, \ + parser->unread --, \ + 1) : 0) /* - * Check if the character is a line break, space, tab, or NUL. + * Copy a line break character to a string buffer and advance pointers. */ -#define IS_BLANKZ_AT(parser,offset) \ - (IS_BLANK_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset))) - -#define IS_BLANKZ(parser) IS_BLANKZ_AT(parser,0) +#define READ_LINE(parser,string) \ + (STRING_EXTEND(parser,string) ? \ + (((CHECK_AT(parser->buffer,'\r',0) \ + && CHECK_AT(parser->buffer,'\n',1)) ? /* CR LF -> LF */ \ + (*((string).pointer++) = (yaml_char_t) '\n', \ + parser->buffer.pointer += 2, \ + parser->mark.index += 2, \ + parser->mark.column = 0, \ + parser->mark.line ++, \ + parser->unread -= 2) : \ + (CHECK_AT(parser->buffer,'\r',0) \ + || CHECK_AT(parser->buffer,'\n',0)) ? /* CR|LF -> LF */ \ + (*((string).pointer++) = (yaml_char_t) '\n', \ + parser->buffer.pointer ++, \ + parser->mark.index ++, \ + parser->mark.column = 0, \ + parser->mark.line ++, \ + parser->unread --) : \ + (CHECK_AT(parser->buffer,'\xC2',0) \ + && CHECK_AT(parser->buffer,'\x85',1)) ? /* NEL -> LF */ \ + (*((string).pointer++) = (yaml_char_t) '\n', \ + parser->buffer.pointer += 2, \ + parser->mark.index ++, \ + parser->mark.column = 0, \ + parser->mark.line ++, \ + parser->unread --) : \ + (CHECK_AT(parser->buffer,'\xE2',0) && \ + CHECK_AT(parser->buffer,'\x80',1) && \ + (CHECK_AT(parser->buffer,'\xA8',2) || \ + CHECK_AT(parser->buffer,'\xA9',2))) ? /* LS|PS -> LS|PS */ \ + (*((string).pointer++) = *(parser->buffer.pointer++), \ + *((string).pointer++) = *(parser->buffer.pointer++), \ + *((string).pointer++) = *(parser->buffer.pointer++), \ + parser->mark.index ++, \ + parser->mark.column = 0, \ + parser->mark.line ++, \ + parser->unread --) : 0), \ + 1) : 0) /* * Public API declarations. */ -YAML_DECLARE(yaml_token_t *) -yaml_parser_get_token(yaml_parser_t *parser); - -YAML_DECLARE(yaml_token_t *) -yaml_parser_peek_token(yaml_parser_t *parser); +YAML_DECLARE(int) +yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token); /* * Error handling. @@ -602,14 +581,11 @@ static int yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, yaml_mark_t context_mark, const char *problem); -static yaml_mark_t -yaml_parser_get_mark(yaml_parser_t *parser); - /* * High-level token API. */ -static int +YAML_DECLARE(int) yaml_parser_fetch_more_tokens(yaml_parser_t *parser); static int @@ -628,12 +604,19 @@ yaml_parser_save_simple_key(yaml_parser_t *parser); static int yaml_parser_remove_simple_key(yaml_parser_t *parser); +static int +yaml_parser_increase_flow_level(yaml_parser_t *parser); + +static int +yaml_parser_decrease_flow_level(yaml_parser_t *parser); + /* * Indentation treatment. */ static int -yaml_parser_roll_indent(yaml_parser_t *parser, int column); +yaml_parser_roll_indent(yaml_parser_t *parser, int column, + int number, yaml_token_type_t type, yaml_mark_t mark); static int yaml_parser_unroll_indent(yaml_parser_t *parser, int column); @@ -651,32 +634,14 @@ yaml_parser_fetch_stream_end(yaml_parser_t *parser); static int yaml_parser_fetch_directive(yaml_parser_t *parser); -static int -yaml_parser_fetch_document_start(yaml_parser_t *parser); - -static int -yaml_parser_fetch_document_end(yaml_parser_t *parser); - static int yaml_parser_fetch_document_indicator(yaml_parser_t *parser, yaml_token_type_t type); -static int -yaml_parser_fetch_flow_sequence_start(yaml_parser_t *parser); - -static int -yaml_parser_fetch_flow_mapping_start(yaml_parser_t *parser); - static int yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser, yaml_token_type_t type); -static int -yaml_parser_fetch_flow_sequence_end(yaml_parser_t *parser); - -static int -yaml_parser_fetch_flow_mapping_end(yaml_parser_t *parser); - static int yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser, yaml_token_type_t type); @@ -694,10 +659,7 @@ static int yaml_parser_fetch_value(yaml_parser_t *parser); static int -yaml_parser_fetch_alias(yaml_parser_t *parser); - -static int -yaml_parser_fetch_anchor(yaml_parser_t *parser); +yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type); static int yaml_parser_fetch_tag(yaml_parser_t *parser); @@ -718,31 +680,31 @@ yaml_parser_fetch_plain_scalar(yaml_parser_t *parser); static int yaml_parser_scan_to_next_token(yaml_parser_t *parser); -static yaml_token_t * -yaml_parser_scan_directive(yaml_parser_t *parser); +static int +yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token); static int yaml_parser_scan_directive_name(yaml_parser_t *parser, yaml_mark_t start_mark, yaml_char_t **name); static int -yaml_parser_scan_yaml_directive_value(yaml_parser_t *parser, +yaml_parser_scan_version_directive_value(yaml_parser_t *parser, yaml_mark_t start_mark, int *major, int *minor); static int -yaml_parser_scan_yaml_directive_number(yaml_parser_t *parser, +yaml_parser_scan_version_directive_number(yaml_parser_t *parser, yaml_mark_t start_mark, int *number); static int yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, - yaml_char_t **handle, yaml_char_t **prefix); + yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix); -static yaml_token_t * -yaml_parser_scan_anchor(yaml_parser_t *parser, +static int +yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token, yaml_token_type_t type); -static yaml_token_t * -yaml_parser_scan_tag(yaml_parser_t *parser); +static int +yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token); static int yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, @@ -750,69 +712,66 @@ yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, static int yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, - yaml_mark_t start_mark, yaml_char_t **url); + yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri); + +static int +yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, + yaml_mark_t start_mark, yaml_string_t *string); -static yaml_token_t * -yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal); +static int +yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, + int literal); static int -yaml_parser_scan_block_scalar_indicators(yaml_parser_t *parser, - yaml_mark_t start_mark, int *chomping, int *increment); +yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, + int *indent, yaml_string_t *breaks, + yaml_mark_t start_mark, yaml_mark_t *end_mark); -static yaml_token_t * -yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single); +static int +yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, + int single); -static yaml_token_t * -yaml_parser_scan_plain_scalar(yaml_parser_t *parser); +static int +yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token); /* - * Get the next token and remove it from the tokens queue. + * Get the next token. */ -YAML_DECLARE(yaml_token_t *) -yaml_parser_get_token(yaml_parser_t *parser) +YAML_DECLARE(int) +yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token) { - yaml_token_t *token; - assert(parser); /* Non-NULL parser object is expected. */ - assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */ - - /* Ensure that the tokens queue contains enough tokens. */ - - if (!yaml_parser_fetch_more_tokens(parser)) return NULL; - - /* Fetch the next token from the queue. */ - - token = parser->tokens[parser->tokens_head]; - - /* Move the queue head. */ - - parser->tokens[parser->tokens_head++] = NULL; - if (parser->tokens_head == parser->tokens_size) - parser->tokens_head = 0; + assert(token); /* Non-NULL token object is expected. */ - parser->tokens_parsed++; + /* Erase the token object. */ - return token; -} + memset(token, 0, sizeof(yaml_token_t)); -/* - * Get the next token, but don't remove it from the queue. - */ + /* No tokens after STREAM-END or error. */ -YAML_DECLARE(yaml_token_t *) -yaml_parser_peek_token(yaml_parser_t *parser) -{ - assert(parser); /* Non-NULL parser object is expected. */ - assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */ + if (parser->stream_end_produced || parser->error) { + return 1; + } /* Ensure that the tokens queue contains enough tokens. */ - if (!yaml_parser_fetch_more_tokens(parser)) return NULL; + if (!parser->token_available) { + if (!yaml_parser_fetch_more_tokens(parser)) + return 0; + } /* Fetch the next token from the queue. */ + + *token = DEQUEUE(parser, parser->tokens); + parser->token_available = 0; + parser->tokens_parsed ++; - return parser->tokens[parser->tokens_head]; + if (token->type == YAML_STREAM_END_TOKEN) { + parser->stream_end_produced = 1; + } + + return 1; } /* @@ -827,32 +786,20 @@ yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, parser->context = context; parser->context_mark = context_mark; parser->problem = problem; - parser->problem_mark = yaml_parser_get_mark(parser); -} + parser->problem_mark = parser->mark; -/* - * Get the mark for the current buffer position. - */ - -static yaml_mark_t -yaml_parser_get_mark(yaml_parser_t *parser) -{ - yaml_mark_t mark = { parser->index, parser->line, parser->column }; - - return mark; + return 0; } - /* * Ensure that the tokens queue contains at least one token which can be * returned to the Parser. */ -static int +YAML_DECLARE(int) yaml_parser_fetch_more_tokens(yaml_parser_t *parser) { int need_more_tokens; - int k; /* While we need more tokens to fetch, do it. */ @@ -864,7 +811,7 @@ yaml_parser_fetch_more_tokens(yaml_parser_t *parser) need_more_tokens = 0; - if (parser->tokens_head == parser->tokens_tail) + if (parser->tokens.head == parser->tokens.tail) { /* Queue is empty. */ @@ -872,12 +819,17 @@ yaml_parser_fetch_more_tokens(yaml_parser_t *parser) } else { + yaml_simple_key_t *simple_key; + /* Check if any potential simple key may occupy the head position. */ - for (k = 0; k <= parser->flow_level; k++) { - yaml_simple_key_t *simple_key = parser->simple_keys[k]; - if (simple_key - && (simple_key->token_number == parser->tokens_parsed)) { + if (!yaml_parser_stale_simple_keys(parser)) + return 0; + + for (simple_key = parser->simple_keys.start; + simple_key != parser->simple_keys.top; simple_key++) { + if (simple_key->possible + && simple_key->token_number == parser->tokens_parsed) { need_more_tokens = 1; break; } @@ -895,6 +847,8 @@ yaml_parser_fetch_more_tokens(yaml_parser_t *parser) return 0; } + parser->token_available = 1; + return 1; } @@ -907,7 +861,7 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) { /* Ensure that the buffer is initialized. */ - if (!UPDATE(parser, 1)) + if (!CACHE(parser, 1)) return 0; /* Check if we just started scanning. Fetch STREAM-START then. */ @@ -920,9 +874,14 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) if (!yaml_parser_scan_to_next_token(parser)) return 0; + /* Remove obsolete potential simple keys. */ + + if (!yaml_parser_stale_simple_keys(parser)) + return 0; + /* Check the indentation level against the current column. */ - if (!yaml_parser_unroll_indent(parser, parser->column)) + if (!yaml_parser_unroll_indent(parser, parser->mark.column)) return 0; /* @@ -930,110 +889,118 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) * of the longest indicators ('--- ' and '... '). */ - if (!UPDATE(parser, 4)) + if (!CACHE(parser, 4)) return 0; /* Is it the end of the stream? */ - if (IS_Z(parser)) + if (IS_Z(parser->buffer)) return yaml_parser_fetch_stream_end(parser); /* Is it a directive? */ - if (parser->column == 0 && CHECK(parser, '%')) + if (parser->mark.column == 0 && CHECK(parser->buffer, '%')) return yaml_parser_fetch_directive(parser); /* Is it the document start indicator? */ - if (parser->column == 0 - && CHECK_AT(parser, '-', 0) - && CHECK_AT(parser, '-', 1) - && CHECK_AT(parser, '-', 2) - && IS_BLANKZ_AT(parser, 3)) - return yaml_parser_fetch_document_start(parser); + if (parser->mark.column == 0 + && CHECK_AT(parser->buffer, '-', 0) + && CHECK_AT(parser->buffer, '-', 1) + && CHECK_AT(parser->buffer, '-', 2) + && IS_BLANKZ_AT(parser->buffer, 3)) + return yaml_parser_fetch_document_indicator(parser, + YAML_DOCUMENT_START_TOKEN); /* Is it the document end indicator? */ - if (parser->column == 0 - && CHECK_AT(parser, '.', 0) - && CHECK_AT(parser, '.', 1) - && CHECK_AT(parser, '.', 2) - && IS_BLANKZ_AT(parser, 3)) - return yaml_parser_fetch_document_start(parser); + if (parser->mark.column == 0 + && CHECK_AT(parser->buffer, '.', 0) + && CHECK_AT(parser->buffer, '.', 1) + && CHECK_AT(parser->buffer, '.', 2) + && IS_BLANKZ_AT(parser->buffer, 3)) + return yaml_parser_fetch_document_indicator(parser, + YAML_DOCUMENT_END_TOKEN); /* Is it the flow sequence start indicator? */ - if (CHECK(parser, '[')) - return yaml_parser_fetch_flow_sequence_start(parser); + if (CHECK(parser->buffer, '[')) + return yaml_parser_fetch_flow_collection_start(parser, + YAML_FLOW_SEQUENCE_START_TOKEN); /* Is it the flow mapping start indicator? */ - if (CHECK(parser, '{')) - return yaml_parser_fetch_flow_mapping_start(parser); + if (CHECK(parser->buffer, '{')) + return yaml_parser_fetch_flow_collection_start(parser, + YAML_FLOW_MAPPING_START_TOKEN); /* Is it the flow sequence end indicator? */ - if (CHECK(parser, ']')) - return yaml_parser_fetch_flow_sequence_end(parser); + if (CHECK(parser->buffer, ']')) + return yaml_parser_fetch_flow_collection_end(parser, + YAML_FLOW_SEQUENCE_END_TOKEN); /* Is it the flow mapping end indicator? */ - if (CHECK(parser, '}')) - return yaml_parser_fetch_flow_mapping_end(parser); + if (CHECK(parser->buffer, '}')) + return yaml_parser_fetch_flow_collection_end(parser, + YAML_FLOW_MAPPING_END_TOKEN); /* Is it the flow entry indicator? */ - if (CHECK(parser, ',')) + if (CHECK(parser->buffer, ',')) return yaml_parser_fetch_flow_entry(parser); /* Is it the block entry indicator? */ - if (CHECK(parser, '-') && IS_BLANKZ_AT(parser, 1)) + if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1)) return yaml_parser_fetch_block_entry(parser); /* Is it the key indicator? */ - if (CHECK(parser, '?') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1))) + if (CHECK(parser->buffer, '?') + && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1))) return yaml_parser_fetch_key(parser); /* Is it the value indicator? */ - if (CHECK(parser, ':') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1))) + if (CHECK(parser->buffer, ':') + && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1))) return yaml_parser_fetch_value(parser); /* Is it an alias? */ - if (CHECK(parser, '*')) - return yaml_parser_fetch_alias(parser); + if (CHECK(parser->buffer, '*')) + return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN); /* Is it an anchor? */ - if (CHECK(parser, '&')) - return yaml_parser_fetch_anchor(parser); + if (CHECK(parser->buffer, '&')) + return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN); /* Is it a tag? */ - if (CHECK(parser, '!')) + if (CHECK(parser->buffer, '!')) return yaml_parser_fetch_tag(parser); /* Is it a literal scalar? */ - if (CHECK(parser, '|') && !parser->flow_level) + if (CHECK(parser->buffer, '|') && !parser->flow_level) return yaml_parser_fetch_block_scalar(parser, 1); /* Is it a folded scalar? */ - if (CHECK(parser, '>') && !parser->flow_level) + if (CHECK(parser->buffer, '>') && !parser->flow_level) return yaml_parser_fetch_block_scalar(parser, 0); /* Is it a single-quoted scalar? */ - if (CHECK(parser, '\'')) + if (CHECK(parser->buffer, '\'')) return yaml_parser_fetch_flow_scalar(parser, 1); /* Is it a double-quoted scalar? */ - if (CHECK(parser, '"')) + if (CHECK(parser->buffer, '"')) return yaml_parser_fetch_flow_scalar(parser, 0); /* @@ -1045,7 +1012,8 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) * '#', '&', '*', '!', '|', '>', '\'', '\"', * '%', '@', '`'. * - * In the block context, it may also start with the characters + * In the block context (and, for the '-' indicator, in the flow context + * too), it may also start with the characters * * '-', '?', ':' * @@ -1054,23 +1022,2549 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) * The last rule is more restrictive than the specification requires. */ - if (!(IS_BLANKZ(parser) || CHECK(parser, '-') || CHECK(parser, '?') - || CHECK(parser, ':') || CHECK(parser, ',') || CHECK(parser, '[') - || CHECK(parser, ']') || CHECK(parser, '{') || CHECK(parser, '}') - || CHECK(parser, '#') || CHECK(parser, '&') || CHECK(parser, '*') - || CHECK(parser, '!') || CHECK(parser, '|') || CHECK(parser, '>') - || CHECK(parser, '\'') || CHECK(parser, '"') || CHECK(parser, '%') - || CHECK(parser, '@') || CHECK(parser, '`')) || + if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-') + || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':') + || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[') + || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{') + || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#') + || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*') + || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|') + || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'') + || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%') + || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) || + (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) || (!parser->flow_level && - (CHECK(parser, '-') || CHECK(parser, '?') || CHECK(parser, ':')) && - IS_BLANKZ_AT(parser, 1))) + (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')) + && !IS_BLANKZ_AT(parser->buffer, 1))) return yaml_parser_fetch_plain_scalar(parser); /* * If we don't determine the token type so far, it is an error. */ - return yaml_parser_set_scanner_error(parser, "while scanning for the next token", - yaml_parser_get_mark(parser), "found character that cannot start any token"); + return yaml_parser_set_scanner_error(parser, + "while scanning for the next token", parser->mark, + "found character that cannot start any token"); +} + +/* + * Check the list of potential simple keys and remove the positions that + * cannot contain simple keys anymore. + */ + +static int +yaml_parser_stale_simple_keys(yaml_parser_t *parser) +{ + yaml_simple_key_t *simple_key; + + /* Check for a potential simple key for each flow level. */ + + for (simple_key = parser->simple_keys.start; + simple_key != parser->simple_keys.top; simple_key ++) + { + /* + * The specification requires that a simple key + * + * - is limited to a single line, + * - is shorter than 1024 characters. + */ + + if (simple_key->possible + && (simple_key->mark.line < parser->mark.line + || simple_key->mark.index+1024 < parser->mark.index)) { + + /* Check if the potential simple key to be removed is required. */ + + if (simple_key->required) { + return yaml_parser_set_scanner_error(parser, + "while scanning a simple key", simple_key->mark, + "could not find expected ':'"); + } + + simple_key->possible = 0; + } + } + + return 1; +} + +/* + * Check if a simple key may start at the current position and add it if + * needed. + */ + +static int +yaml_parser_save_simple_key(yaml_parser_t *parser) +{ + /* + * A simple key is required at the current position if the scanner is in + * the block context and the current column coincides with the indentation + * level. + */ + + int required = (!parser->flow_level + && parser->indent == (int)parser->mark.column); + + /* + * A simple key is required only when it is the first token in the current + * line. Therefore it is always allowed. But we add a check anyway. + */ + + assert(parser->simple_key_allowed || !required); /* Impossible. */ + + /* + * If the current position may start a simple key, save it. + */ + + if (parser->simple_key_allowed) + { + yaml_simple_key_t simple_key; + simple_key.possible = 1; + simple_key.required = required; + simple_key.token_number = + parser->tokens_parsed + parser->tokens.tail - parser->tokens.head; + simple_key.mark = parser->mark; + + if (!yaml_parser_remove_simple_key(parser)) return 0; + + *(parser->simple_keys.top-1) = simple_key; + } + + return 1; +} + +/* + * Remove a potential simple key at the current flow level. + */ + +static int +yaml_parser_remove_simple_key(yaml_parser_t *parser) +{ + yaml_simple_key_t *simple_key = parser->simple_keys.top-1; + + if (simple_key->possible) + { + /* If the key is required, it is an error. */ + + if (simple_key->required) { + return yaml_parser_set_scanner_error(parser, + "while scanning a simple key", simple_key->mark, + "could not find expected ':'"); + } + } + + /* Remove the key from the stack. */ + + simple_key->possible = 0; + + return 1; +} + +/* + * Increase the flow level and resize the simple key list if needed. + */ + +static int +yaml_parser_increase_flow_level(yaml_parser_t *parser) +{ + yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } }; + + /* Reset the simple key on the next level. */ + + if (!PUSH(parser, parser->simple_keys, empty_simple_key)) + return 0; + + /* Increase the flow level. */ + + parser->flow_level++; + + return 1; +} + +/* + * Decrease the flow level. + */ + +static int +yaml_parser_decrease_flow_level(yaml_parser_t *parser) +{ + yaml_simple_key_t dummy_key; /* Used to eliminate a compiler warning. */ + + if (parser->flow_level) { + parser->flow_level --; + dummy_key = POP(parser, parser->simple_keys); + } + + return 1; +} + +/* + * Push the current indentation level to the stack and set the new level + * the current column is greater than the indentation level. In this case, + * append or insert the specified token into the token queue. + * + */ + +static int +yaml_parser_roll_indent(yaml_parser_t *parser, int column, + int number, yaml_token_type_t type, yaml_mark_t mark) +{ + yaml_token_t token; + + /* In the flow context, do nothing. */ + + if (parser->flow_level) + return 1; + + if (parser->indent < column) + { + /* + * Push the current indentation level to the stack and set the new + * indentation level. + */ + + if (!PUSH(parser, parser->indents, parser->indent)) + return 0; + + parser->indent = column; + + /* Create a token and insert it into the queue. */ + + TOKEN_INIT(token, type, mark, mark); + + if (number == -1) { + if (!ENQUEUE(parser, parser->tokens, token)) + return 0; + } + else { + if (!QUEUE_INSERT(parser, + parser->tokens, number - parser->tokens_parsed, token)) + return 0; + } + } + + return 1; +} + +/* + * Pop indentation levels from the indents stack until the current level + * becomes less or equal to the column. For each intendation level, append + * the BLOCK-END token. + */ + + +static int +yaml_parser_unroll_indent(yaml_parser_t *parser, int column) +{ + yaml_token_t token; + + /* In the flow context, do nothing. */ + + if (parser->flow_level) + return 1; + + /* Loop through the intendation levels in the stack. */ + + while (parser->indent > column) + { + /* Create a token and append it to the queue. */ + + TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark); + + if (!ENQUEUE(parser, parser->tokens, token)) + return 0; + + /* Pop the indentation level. */ + + parser->indent = POP(parser, parser->indents); + } + + return 1; +} + +/* + * Initialize the scanner and produce the STREAM-START token. + */ + +static int +yaml_parser_fetch_stream_start(yaml_parser_t *parser) +{ + yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } }; + yaml_token_t token; + + /* Set the initial indentation. */ + + parser->indent = -1; + + /* Initialize the simple key stack. */ + + if (!PUSH(parser, parser->simple_keys, simple_key)) + return 0; + + /* A simple key is allowed at the beginning of the stream. */ + + parser->simple_key_allowed = 1; + + /* We have started. */ + + parser->stream_start_produced = 1; + + /* Create the STREAM-START token and append it to the queue. */ + + STREAM_START_TOKEN_INIT(token, parser->encoding, + parser->mark, parser->mark); + + if (!ENQUEUE(parser, parser->tokens, token)) + return 0; + + return 1; +} + +/* + * Produce the STREAM-END token and shut down the scanner. + */ + +static int +yaml_parser_fetch_stream_end(yaml_parser_t *parser) +{ + yaml_token_t token; + + /* Force new line. */ + + if (parser->mark.column != 0) { + parser->mark.column = 0; + parser->mark.line ++; + } + + /* Reset the indentation level. */ + + if (!yaml_parser_unroll_indent(parser, -1)) + return 0; + + /* Reset simple keys. */ + + if (!yaml_parser_remove_simple_key(parser)) + return 0; + + parser->simple_key_allowed = 0; + + /* Create the STREAM-END token and append it to the queue. */ + + STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark); + + if (!ENQUEUE(parser, parser->tokens, token)) + return 0; + + return 1; +} + +/* + * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token. + */ + +static int +yaml_parser_fetch_directive(yaml_parser_t *parser) +{ + yaml_token_t token; + + /* Reset the indentation level. */ + + if (!yaml_parser_unroll_indent(parser, -1)) + return 0; + + /* Reset simple keys. */ + + if (!yaml_parser_remove_simple_key(parser)) + return 0; + + parser->simple_key_allowed = 0; + + /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */ + + if (!yaml_parser_scan_directive(parser, &token)) + return 0; + + /* Append the token to the queue. */ + + if (!ENQUEUE(parser, parser->tokens, token)) { + yaml_token_delete(&token); + return 0; + } + + return 1; +} + +/* + * Produce the DOCUMENT-START or DOCUMENT-END token. + */ + +static int +yaml_parser_fetch_document_indicator(yaml_parser_t *parser, + yaml_token_type_t type) +{ + yaml_mark_t start_mark, end_mark; + yaml_token_t token; + + /* Reset the indentation level. */ + + if (!yaml_parser_unroll_indent(parser, -1)) + return 0; + + /* Reset simple keys. */ + + if (!yaml_parser_remove_simple_key(parser)) + return 0; + + parser->simple_key_allowed = 0; + + /* Consume the token. */ + + start_mark = parser->mark; + + SKIP(parser); + SKIP(parser); + SKIP(parser); + + end_mark = parser->mark; + + /* Create the DOCUMENT-START or DOCUMENT-END token. */ + + TOKEN_INIT(token, type, start_mark, end_mark); + + /* Append the token to the queue. */ + + if (!ENQUEUE(parser, parser->tokens, token)) + return 0; + + return 1; +} + +/* + * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token. + */ + +static int +yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser, + yaml_token_type_t type) +{ + yaml_mark_t start_mark, end_mark; + yaml_token_t token; + + /* The indicators '[' and '{' may start a simple key. */ + + if (!yaml_parser_save_simple_key(parser)) + return 0; + + /* Increase the flow level. */ + + if (!yaml_parser_increase_flow_level(parser)) + return 0; + + /* A simple key may follow the indicators '[' and '{'. */ + + parser->simple_key_allowed = 1; + + /* Consume the token. */ + + start_mark = parser->mark; + SKIP(parser); + end_mark = parser->mark; + + /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */ + + TOKEN_INIT(token, type, start_mark, end_mark); + + /* Append the token to the queue. */ + + if (!ENQUEUE(parser, parser->tokens, token)) + return 0; + + return 1; +} + +/* + * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token. + */ + +static int +yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser, + yaml_token_type_t type) +{ + yaml_mark_t start_mark, end_mark; + yaml_token_t token; + + /* Reset any potential simple key on the current flow level. */ + + if (!yaml_parser_remove_simple_key(parser)) + return 0; + + /* Decrease the flow level. */ + + if (!yaml_parser_decrease_flow_level(parser)) + return 0; + + /* No simple keys after the indicators ']' and '}'. */ + + parser->simple_key_allowed = 0; + + /* Consume the token. */ + + start_mark = parser->mark; + SKIP(parser); + end_mark = parser->mark; + + /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */ + + TOKEN_INIT(token, type, start_mark, end_mark); + + /* Append the token to the queue. */ + + if (!ENQUEUE(parser, parser->tokens, token)) + return 0; + + return 1; +} + +/* + * Produce the FLOW-ENTRY token. + */ + +static int +yaml_parser_fetch_flow_entry(yaml_parser_t *parser) +{ + yaml_mark_t start_mark, end_mark; + yaml_token_t token; + + /* Reset any potential simple keys on the current flow level. */ + + if (!yaml_parser_remove_simple_key(parser)) + return 0; + + /* Simple keys are allowed after ','. */ + + parser->simple_key_allowed = 1; + + /* Consume the token. */ + + start_mark = parser->mark; + SKIP(parser); + end_mark = parser->mark; + + /* Create the FLOW-ENTRY token and append it to the queue. */ + + TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark); + + if (!ENQUEUE(parser, parser->tokens, token)) + return 0; + + return 1; +} + +/* + * Produce the BLOCK-ENTRY token. + */ + +static int +yaml_parser_fetch_block_entry(yaml_parser_t *parser) +{ + yaml_mark_t start_mark, end_mark; + yaml_token_t token; + + /* Check if the scanner is in the block context. */ + + if (!parser->flow_level) + { + /* Check if we are allowed to start a new entry. */ + + if (!parser->simple_key_allowed) { + return yaml_parser_set_scanner_error(parser, NULL, parser->mark, + "block sequence entries are not allowed in this context"); + } + + /* Add the BLOCK-SEQUENCE-START token if needed. */ + + if (!yaml_parser_roll_indent(parser, parser->mark.column, -1, + YAML_BLOCK_SEQUENCE_START_TOKEN, parser->mark)) + return 0; + } + else + { + /* + * It is an error for the '-' indicator to occur in the flow context, + * but we let the Parser detect and report about it because the Parser + * is able to point to the context. + */ + } + + /* Reset any potential simple keys on the current flow level. */ + + if (!yaml_parser_remove_simple_key(parser)) + return 0; + + /* Simple keys are allowed after '-'. */ + + parser->simple_key_allowed = 1; + + /* Consume the token. */ + + start_mark = parser->mark; + SKIP(parser); + end_mark = parser->mark; + + /* Create the BLOCK-ENTRY token and append it to the queue. */ + + TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark); + + if (!ENQUEUE(parser, parser->tokens, token)) + return 0; + + return 1; +} + +/* + * Produce the KEY token. + */ + +static int +yaml_parser_fetch_key(yaml_parser_t *parser) +{ + yaml_mark_t start_mark, end_mark; + yaml_token_t token; + + /* In the block context, additional checks are required. */ + + if (!parser->flow_level) + { + /* Check if we are allowed to start a new key (not nessesary simple). */ + + if (!parser->simple_key_allowed) { + return yaml_parser_set_scanner_error(parser, NULL, parser->mark, + "mapping keys are not allowed in this context"); + } + + /* Add the BLOCK-MAPPING-START token if needed. */ + + if (!yaml_parser_roll_indent(parser, parser->mark.column, -1, + YAML_BLOCK_MAPPING_START_TOKEN, parser->mark)) + return 0; + } + + /* Reset any potential simple keys on the current flow level. */ + + if (!yaml_parser_remove_simple_key(parser)) + return 0; + + /* Simple keys are allowed after '?' in the block context. */ + + parser->simple_key_allowed = (!parser->flow_level); + + /* Consume the token. */ + + start_mark = parser->mark; + SKIP(parser); + end_mark = parser->mark; + + /* Create the KEY token and append it to the queue. */ + + TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark); + + if (!ENQUEUE(parser, parser->tokens, token)) + return 0; + + return 1; +} + +/* + * Produce the VALUE token. + */ + +static int +yaml_parser_fetch_value(yaml_parser_t *parser) +{ + yaml_mark_t start_mark, end_mark; + yaml_token_t token; + yaml_simple_key_t *simple_key = parser->simple_keys.top-1; + + /* Have we found a simple key? */ + + if (simple_key->possible) + { + + /* Create the KEY token and insert it into the queue. */ + + TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark); + + if (!QUEUE_INSERT(parser, parser->tokens, + simple_key->token_number - parser->tokens_parsed, token)) + return 0; + + /* In the block context, we may need to add the BLOCK-MAPPING-START token. */ + + if (!yaml_parser_roll_indent(parser, simple_key->mark.column, + simple_key->token_number, + YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark)) + return 0; + + /* Remove the simple key. */ + + simple_key->possible = 0; + + /* A simple key cannot follow another simple key. */ + + parser->simple_key_allowed = 0; + } + else + { + /* The ':' indicator follows a complex key. */ + + /* In the block context, extra checks are required. */ + + if (!parser->flow_level) + { + /* Check if we are allowed to start a complex value. */ + + if (!parser->simple_key_allowed) { + return yaml_parser_set_scanner_error(parser, NULL, parser->mark, + "mapping values are not allowed in this context"); + } + + /* Add the BLOCK-MAPPING-START token if needed. */ + + if (!yaml_parser_roll_indent(parser, parser->mark.column, -1, + YAML_BLOCK_MAPPING_START_TOKEN, parser->mark)) + return 0; + } + + /* Simple keys after ':' are allowed in the block context. */ + + parser->simple_key_allowed = (!parser->flow_level); + } + + /* Consume the token. */ + + start_mark = parser->mark; + SKIP(parser); + end_mark = parser->mark; + + /* Create the VALUE token and append it to the queue. */ + + TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark); + + if (!ENQUEUE(parser, parser->tokens, token)) + return 0; + + return 1; +} + +/* + * Produce the ALIAS or ANCHOR token. + */ + +static int +yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type) +{ + yaml_token_t token; + + /* An anchor or an alias could be a simple key. */ + + if (!yaml_parser_save_simple_key(parser)) + return 0; + + /* A simple key cannot follow an anchor or an alias. */ + + parser->simple_key_allowed = 0; + + /* Create the ALIAS or ANCHOR token and append it to the queue. */ + + if (!yaml_parser_scan_anchor(parser, &token, type)) + return 0; + + if (!ENQUEUE(parser, parser->tokens, token)) { + yaml_token_delete(&token); + return 0; + } + return 1; +} + +/* + * Produce the TAG token. + */ + +static int +yaml_parser_fetch_tag(yaml_parser_t *parser) +{ + yaml_token_t token; + + /* A tag could be a simple key. */ + + if (!yaml_parser_save_simple_key(parser)) + return 0; + + /* A simple key cannot follow a tag. */ + + parser->simple_key_allowed = 0; + + /* Create the TAG token and append it to the queue. */ + + if (!yaml_parser_scan_tag(parser, &token)) + return 0; + + if (!ENQUEUE(parser, parser->tokens, token)) { + yaml_token_delete(&token); + return 0; + } + + return 1; +} + +/* + * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens. + */ + +static int +yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal) +{ + yaml_token_t token; + + /* Remove any potential simple keys. */ + + if (!yaml_parser_remove_simple_key(parser)) + return 0; + + /* A simple key may follow a block scalar. */ + + parser->simple_key_allowed = 1; + + /* Create the SCALAR token and append it to the queue. */ + + if (!yaml_parser_scan_block_scalar(parser, &token, literal)) + return 0; + + if (!ENQUEUE(parser, parser->tokens, token)) { + yaml_token_delete(&token); + return 0; + } + + return 1; +} + +/* + * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens. + */ + +static int +yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single) +{ + yaml_token_t token; + + /* A plain scalar could be a simple key. */ + + if (!yaml_parser_save_simple_key(parser)) + return 0; + + /* A simple key cannot follow a flow scalar. */ + + parser->simple_key_allowed = 0; + + /* Create the SCALAR token and append it to the queue. */ + + if (!yaml_parser_scan_flow_scalar(parser, &token, single)) + return 0; + + if (!ENQUEUE(parser, parser->tokens, token)) { + yaml_token_delete(&token); + return 0; + } + + return 1; +} + +/* + * Produce the SCALAR(...,plain) token. + */ + +static int +yaml_parser_fetch_plain_scalar(yaml_parser_t *parser) +{ + yaml_token_t token; + + /* A plain scalar could be a simple key. */ + + if (!yaml_parser_save_simple_key(parser)) + return 0; + + /* A simple key cannot follow a flow scalar. */ + + parser->simple_key_allowed = 0; + + /* Create the SCALAR token and append it to the queue. */ + + if (!yaml_parser_scan_plain_scalar(parser, &token)) + return 0; + + if (!ENQUEUE(parser, parser->tokens, token)) { + yaml_token_delete(&token); + return 0; + } + + return 1; +} + +/* + * Eat whitespaces and comments until the next token is found. + */ + +static int +yaml_parser_scan_to_next_token(yaml_parser_t *parser) +{ + /* Until the next token is not found. */ + + while (1) + { + /* Allow the BOM mark to start a line. */ + + if (!CACHE(parser, 1)) return 0; + + if (parser->mark.column == 0 && IS_BOM(parser->buffer)) + SKIP(parser); + + /* + * Eat whitespaces. + * + * Tabs are allowed: + * + * - in the flow context; + * - in the block context, but not at the beginning of the line or + * after '-', '?', or ':' (complex value). + */ + + if (!CACHE(parser, 1)) return 0; + + while (CHECK(parser->buffer,' ') || + ((parser->flow_level || !parser->simple_key_allowed) && + CHECK(parser->buffer, '\t'))) { + SKIP(parser); + if (!CACHE(parser, 1)) return 0; + } + + /* Eat a comment until a line break. */ + + if (CHECK(parser->buffer, '#')) { + while (!IS_BREAKZ(parser->buffer)) { + SKIP(parser); + if (!CACHE(parser, 1)) return 0; + } + } + + /* If it is a line break, eat it. */ + + if (IS_BREAK(parser->buffer)) + { + if (!CACHE(parser, 2)) return 0; + SKIP_LINE(parser); + + /* In the block context, a new line may start a simple key. */ + + if (!parser->flow_level) { + parser->simple_key_allowed = 1; + } + } + else + { + /* We have found a token. */ + + break; + } + } + + return 1; +} + +/* + * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token. + * + * Scope: + * %YAML 1.1 # a comment \n + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * %TAG !yaml! tag:yaml.org,2002: \n + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + */ + +int +yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token) +{ + yaml_mark_t start_mark, end_mark; + yaml_char_t *name = NULL; + int major, minor; + yaml_char_t *handle = NULL, *prefix = NULL; + + /* Eat '%'. */ + + start_mark = parser->mark; + + SKIP(parser); + + /* Scan the directive name. */ + + if (!yaml_parser_scan_directive_name(parser, start_mark, &name)) + goto error; + + /* Is it a YAML directive? */ + + if (strcmp((char *)name, "YAML") == 0) + { + /* Scan the VERSION directive value. */ + + if (!yaml_parser_scan_version_directive_value(parser, start_mark, + &major, &minor)) + goto error; + + end_mark = parser->mark; + + /* Create a VERSION-DIRECTIVE token. */ + + VERSION_DIRECTIVE_TOKEN_INIT(*token, major, minor, + start_mark, end_mark); + } + + /* Is it a TAG directive? */ + + else if (strcmp((char *)name, "TAG") == 0) + { + /* Scan the TAG directive value. */ + + if (!yaml_parser_scan_tag_directive_value(parser, start_mark, + &handle, &prefix)) + goto error; + + end_mark = parser->mark; + + /* Create a TAG-DIRECTIVE token. */ + + TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix, + start_mark, end_mark); + } + + /* Unknown directive. */ + + else + { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "found uknown directive name"); + goto error; + } + + /* Eat the rest of the line including any comments. */ + + if (!CACHE(parser, 1)) goto error; + + while (IS_BLANK(parser->buffer)) { + SKIP(parser); + if (!CACHE(parser, 1)) goto error; + } + + if (CHECK(parser->buffer, '#')) { + while (!IS_BREAKZ(parser->buffer)) { + SKIP(parser); + if (!CACHE(parser, 1)) goto error; + } + } + + /* Check if we are at the end of the line. */ + + if (!IS_BREAKZ(parser->buffer)) { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "did not find expected comment or line break"); + goto error; + } + + /* Eat a line break. */ + + if (IS_BREAK(parser->buffer)) { + if (!CACHE(parser, 2)) goto error; + SKIP_LINE(parser); + } + + yaml_free(name); + + return 1; + +error: + yaml_free(prefix); + yaml_free(handle); + yaml_free(name); + return 0; +} + +/* + * Scan the directive name. + * + * Scope: + * %YAML 1.1 # a comment \n + * ^^^^ + * %TAG !yaml! tag:yaml.org,2002: \n + * ^^^ + */ + +static int +yaml_parser_scan_directive_name(yaml_parser_t *parser, + yaml_mark_t start_mark, yaml_char_t **name) +{ + yaml_string_t string = NULL_STRING; + + if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; + + /* Consume the directive name. */ + + if (!CACHE(parser, 1)) goto error; + + while (IS_ALPHA(parser->buffer)) + { + if (!READ(parser, string)) goto error; + if (!CACHE(parser, 1)) goto error; + } + + /* Check if the name is empty. */ + + if (string.start == string.pointer) { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "could not find expected directive name"); + goto error; + } + + /* Check for an blank character after the name. */ + + if (!IS_BLANKZ(parser->buffer)) { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "found unexpected non-alphabetical character"); + goto error; + } + + *name = string.start; + + return 1; + +error: + STRING_DEL(parser, string); + return 0; +} + +/* + * Scan the value of VERSION-DIRECTIVE. + * + * Scope: + * %YAML 1.1 # a comment \n + * ^^^^^^ + */ + +static int +yaml_parser_scan_version_directive_value(yaml_parser_t *parser, + yaml_mark_t start_mark, int *major, int *minor) +{ + /* Eat whitespaces. */ + + if (!CACHE(parser, 1)) return 0; + + while (IS_BLANK(parser->buffer)) { + SKIP(parser); + if (!CACHE(parser, 1)) return 0; + } + + /* Consume the major version number. */ + + if (!yaml_parser_scan_version_directive_number(parser, start_mark, major)) + return 0; + + /* Eat '.'. */ + + if (!CHECK(parser->buffer, '.')) { + return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", + start_mark, "did not find expected digit or '.' character"); + } + + SKIP(parser); + + /* Consume the minor version number. */ + + if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor)) + return 0; + + return 1; +} + +#define MAX_NUMBER_LENGTH 9 + +/* + * Scan the version number of VERSION-DIRECTIVE. + * + * Scope: + * %YAML 1.1 # a comment \n + * ^ + * %YAML 1.1 # a comment \n + * ^ + */ + +static int +yaml_parser_scan_version_directive_number(yaml_parser_t *parser, + yaml_mark_t start_mark, int *number) +{ + int value = 0; + size_t length = 0; + + /* Repeat while the next character is digit. */ + + if (!CACHE(parser, 1)) return 0; + + while (IS_DIGIT(parser->buffer)) + { + /* Check if the number is too long. */ + + if (++length > MAX_NUMBER_LENGTH) { + return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", + start_mark, "found extremely long version number"); + } + + value = value*10 + AS_DIGIT(parser->buffer); + + SKIP(parser); + + if (!CACHE(parser, 1)) return 0; + } + + /* Check if the number was present. */ + + if (!length) { + return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", + start_mark, "did not find expected version number"); + } + + *number = value; + + return 1; +} + +/* + * Scan the value of a TAG-DIRECTIVE token. + * + * Scope: + * %TAG !yaml! tag:yaml.org,2002: \n + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + */ + +static int +yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, + yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix) +{ + yaml_char_t *handle_value = NULL; + yaml_char_t *prefix_value = NULL; + + /* Eat whitespaces. */ + + if (!CACHE(parser, 1)) goto error; + + while (IS_BLANK(parser->buffer)) { + SKIP(parser); + if (!CACHE(parser, 1)) goto error; + } + + /* Scan a handle. */ + + if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value)) + goto error; + + /* Expect a whitespace. */ + + if (!CACHE(parser, 1)) goto error; + + if (!IS_BLANK(parser->buffer)) { + yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", + start_mark, "did not find expected whitespace"); + goto error; + } + + /* Eat whitespaces. */ + + while (IS_BLANK(parser->buffer)) { + SKIP(parser); + if (!CACHE(parser, 1)) goto error; + } + + /* Scan a prefix. */ + + if (!yaml_parser_scan_tag_uri(parser, 1, NULL, start_mark, &prefix_value)) + goto error; + + /* Expect a whitespace or line break. */ + + if (!CACHE(parser, 1)) goto error; + + if (!IS_BLANKZ(parser->buffer)) { + yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", + start_mark, "did not find expected whitespace or line break"); + goto error; + } + + *handle = handle_value; + *prefix = prefix_value; + + return 1; + +error: + yaml_free(handle_value); + yaml_free(prefix_value); + return 0; +} + +static int +yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token, + yaml_token_type_t type) +{ + int length = 0; + yaml_mark_t start_mark, end_mark; + yaml_string_t string = NULL_STRING; + + if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; + + /* Eat the indicator character. */ + + start_mark = parser->mark; + + SKIP(parser); + + /* Consume the value. */ + + if (!CACHE(parser, 1)) goto error; + + while (IS_ALPHA(parser->buffer)) { + if (!READ(parser, string)) goto error; + if (!CACHE(parser, 1)) goto error; + length ++; + } + + end_mark = parser->mark; + + /* + * Check if length of the anchor is greater than 0 and it is followed by + * a whitespace character or one of the indicators: + * + * '?', ':', ',', ']', '}', '%', '@', '`'. + */ + + if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?') + || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',') + || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}') + || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@') + || CHECK(parser->buffer, '`'))) { + yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ? + "while scanning an anchor" : "while scanning an alias", start_mark, + "did not find expected alphabetic or numeric character"); + goto error; + } + + /* Create a token. */ + + if (type == YAML_ANCHOR_TOKEN) { + ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark); + } + else { + ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark); + } + + return 1; + +error: + STRING_DEL(parser, string); + return 0; +} + +/* + * Scan a TAG token. + */ + +static int +yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token) +{ + yaml_char_t *handle = NULL; + yaml_char_t *suffix = NULL; + yaml_mark_t start_mark, end_mark; + + start_mark = parser->mark; + + /* Check if the tag is in the canonical form. */ + + if (!CACHE(parser, 2)) goto error; + + if (CHECK_AT(parser->buffer, '<', 1)) + { + /* Set the handle to '' */ + + handle = yaml_malloc(1); + if (!handle) goto error; + handle[0] = '\0'; + + /* Eat '!<' */ + + SKIP(parser); + SKIP(parser); + + /* Consume the tag value. */ + + if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix)) + goto error; + + /* Check for '>' and eat it. */ + + if (!CHECK(parser->buffer, '>')) { + yaml_parser_set_scanner_error(parser, "while scanning a tag", + start_mark, "did not find the expected '>'"); + goto error; + } + + SKIP(parser); + } + else + { + /* The tag has either the '!suffix' or the '!handle!suffix' form. */ + + /* First, try to scan a handle. */ + + if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle)) + goto error; + + /* Check if it is, indeed, handle. */ + + if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!') + { + /* Scan the suffix now. */ + + if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix)) + goto error; + } + else + { + /* It wasn't a handle after all. Scan the rest of the tag. */ + + if (!yaml_parser_scan_tag_uri(parser, 0, handle, start_mark, &suffix)) + goto error; + + /* Set the handle to '!'. */ + + yaml_free(handle); + handle = yaml_malloc(2); + if (!handle) goto error; + handle[0] = '!'; + handle[1] = '\0'; + + /* + * A special case: the '!' tag. Set the handle to '' and the + * suffix to '!'. + */ + + if (suffix[0] == '\0') { + yaml_char_t *tmp = handle; + handle = suffix; + suffix = tmp; + } + } + } + + /* Check the character which ends the tag. */ + + if (!CACHE(parser, 1)) goto error; + + if (!IS_BLANKZ(parser->buffer)) { + yaml_parser_set_scanner_error(parser, "while scanning a tag", + start_mark, "did not find expected whitespace or line break"); + goto error; + } + + end_mark = parser->mark; + + /* Create a token. */ + + TAG_TOKEN_INIT(*token, handle, suffix, start_mark, end_mark); + + return 1; + +error: + yaml_free(handle); + yaml_free(suffix); + return 0; +} + +/* + * Scan a tag handle. + */ + +static int +yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, + yaml_mark_t start_mark, yaml_char_t **handle) +{ + yaml_string_t string = NULL_STRING; + + if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; + + /* Check the initial '!' character. */ + + if (!CACHE(parser, 1)) goto error; + + if (!CHECK(parser->buffer, '!')) { + yaml_parser_set_scanner_error(parser, directive ? + "while scanning a tag directive" : "while scanning a tag", + start_mark, "did not find expected '!'"); + goto error; + } + + /* Copy the '!' character. */ + + if (!READ(parser, string)) goto error; + + /* Copy all subsequent alphabetical and numerical characters. */ + + if (!CACHE(parser, 1)) goto error; + + while (IS_ALPHA(parser->buffer)) + { + if (!READ(parser, string)) goto error; + if (!CACHE(parser, 1)) goto error; + } + + /* Check if the trailing character is '!' and copy it. */ + + if (CHECK(parser->buffer, '!')) + { + if (!READ(parser, string)) goto error; + } + else + { + /* + * It's either the '!' tag or not really a tag handle. If it's a %TAG + * directive, it's an error. If it's a tag token, it must be a part of + * URI. + */ + + if (directive && !(string.start[0] == '!' && string.start[1] == '\0')) { + yaml_parser_set_scanner_error(parser, "while parsing a tag directive", + start_mark, "did not find expected '!'"); + goto error; + } + } + + *handle = string.start; + + return 1; + +error: + STRING_DEL(parser, string); + return 0; +} + +/* + * Scan a tag. + */ + +static int +yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, + yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri) +{ + size_t length = head ? strlen((char *)head) : 0; + yaml_string_t string = NULL_STRING; + + if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; + + /* Resize the string to include the head. */ + + while (string.end - string.start <= (int)length) { + if (!yaml_string_extend(&string.start, &string.pointer, &string.end)) { + parser->error = YAML_MEMORY_ERROR; + goto error; + } + } + + /* + * Copy the head if needed. + * + * Note that we don't copy the leading '!' character. + */ + + if (length > 1) { + memcpy(string.start, head+1, length-1); + string.pointer += length-1; + } + + /* Scan the tag. */ + + if (!CACHE(parser, 1)) goto error; + + /* + * The set of characters that may appear in URI is as follows: + * + * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', + * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', + * '%'. + */ + + while (IS_ALPHA(parser->buffer) || CHECK(parser->buffer, ';') + || CHECK(parser->buffer, '/') || CHECK(parser->buffer, '?') + || CHECK(parser->buffer, ':') || CHECK(parser->buffer, '@') + || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '=') + || CHECK(parser->buffer, '+') || CHECK(parser->buffer, '$') + || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '.') + || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '~') + || CHECK(parser->buffer, '*') || CHECK(parser->buffer, '\'') + || CHECK(parser->buffer, '(') || CHECK(parser->buffer, ')') + || CHECK(parser->buffer, '[') || CHECK(parser->buffer, ']') + || CHECK(parser->buffer, '%')) + { + /* Check if it is a URI-escape sequence. */ + + if (CHECK(parser->buffer, '%')) { + if (!yaml_parser_scan_uri_escapes(parser, + directive, start_mark, &string)) goto error; + } + else { + if (!READ(parser, string)) goto error; + } + + length ++; + if (!CACHE(parser, 1)) goto error; + } + + /* Check if the tag is non-empty. */ + + if (!length) { + if (!STRING_EXTEND(parser, string)) + goto error; + + yaml_parser_set_scanner_error(parser, directive ? + "while parsing a %TAG directive" : "while parsing a tag", + start_mark, "did not find expected tag URI"); + goto error; + } + + *uri = string.start; + + return 1; + +error: + STRING_DEL(parser, string); + return 0; +} + +/* + * Decode an URI-escape sequence corresponding to a single UTF-8 character. + */ + +static int +yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, + yaml_mark_t start_mark, yaml_string_t *string) +{ + int width = 0; + + /* Decode the required number of characters. */ + + do { + + unsigned char octet = 0; + + /* Check for a URI-escaped octet. */ + + if (!CACHE(parser, 3)) return 0; + + if (!(CHECK(parser->buffer, '%') + && IS_HEX_AT(parser->buffer, 1) + && IS_HEX_AT(parser->buffer, 2))) { + return yaml_parser_set_scanner_error(parser, directive ? + "while parsing a %TAG directive" : "while parsing a tag", + start_mark, "did not find URI escaped octet"); + } + + /* Get the octet. */ + + octet = (AS_HEX_AT(parser->buffer, 1) << 4) + AS_HEX_AT(parser->buffer, 2); + + /* If it is the leading octet, determine the length of the UTF-8 sequence. */ + + if (!width) + { + width = (octet & 0x80) == 0x00 ? 1 : + (octet & 0xE0) == 0xC0 ? 2 : + (octet & 0xF0) == 0xE0 ? 3 : + (octet & 0xF8) == 0xF0 ? 4 : 0; + if (!width) { + return yaml_parser_set_scanner_error(parser, directive ? + "while parsing a %TAG directive" : "while parsing a tag", + start_mark, "found an incorrect leading UTF-8 octet"); + } + } + else + { + /* Check if the trailing octet is correct. */ + + if ((octet & 0xC0) != 0x80) { + return yaml_parser_set_scanner_error(parser, directive ? + "while parsing a %TAG directive" : "while parsing a tag", + start_mark, "found an incorrect trailing UTF-8 octet"); + } + } + + /* Copy the octet and move the pointers. */ + + *(string->pointer++) = octet; + SKIP(parser); + SKIP(parser); + SKIP(parser); + + } while (--width); + + return 1; +} + +/* + * Scan a block scalar. + */ + +static int +yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, + int literal) +{ + yaml_mark_t start_mark; + yaml_mark_t end_mark; + yaml_string_t string = NULL_STRING; + yaml_string_t leading_break = NULL_STRING; + yaml_string_t trailing_breaks = NULL_STRING; + int chomping = 0; + int increment = 0; + int indent = 0; + int leading_blank = 0; + int trailing_blank = 0; + + if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; + if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error; + if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error; + + /* Eat the indicator '|' or '>'. */ + + start_mark = parser->mark; + + SKIP(parser); + + /* Scan the additional block scalar indicators. */ + + if (!CACHE(parser, 1)) goto error; + + /* Check for a chomping indicator. */ + + if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) + { + /* Set the chomping method and eat the indicator. */ + + chomping = CHECK(parser->buffer, '+') ? +1 : -1; + + SKIP(parser); + + /* Check for an indentation indicator. */ + + if (!CACHE(parser, 1)) goto error; + + if (IS_DIGIT(parser->buffer)) + { + /* Check that the intendation is greater than 0. */ + + if (CHECK(parser->buffer, '0')) { + yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "found an intendation indicator equal to 0"); + goto error; + } + + /* Get the intendation level and eat the indicator. */ + + increment = AS_DIGIT(parser->buffer); + + SKIP(parser); + } + } + + /* Do the same as above, but in the opposite order. */ + + else if (IS_DIGIT(parser->buffer)) + { + if (CHECK(parser->buffer, '0')) { + yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "found an intendation indicator equal to 0"); + goto error; + } + + increment = AS_DIGIT(parser->buffer); + + SKIP(parser); + + if (!CACHE(parser, 1)) goto error; + + if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) { + chomping = CHECK(parser->buffer, '+') ? +1 : -1; + + SKIP(parser); + } + } + + /* Eat whitespaces and comments to the end of the line. */ + + if (!CACHE(parser, 1)) goto error; + + while (IS_BLANK(parser->buffer)) { + SKIP(parser); + if (!CACHE(parser, 1)) goto error; + } + + if (CHECK(parser->buffer, '#')) { + while (!IS_BREAKZ(parser->buffer)) { + SKIP(parser); + if (!CACHE(parser, 1)) goto error; + } + } + + /* Check if we are at the end of the line. */ + + if (!IS_BREAKZ(parser->buffer)) { + yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "did not find expected comment or line break"); + goto error; + } + + /* Eat a line break. */ + + if (IS_BREAK(parser->buffer)) { + if (!CACHE(parser, 2)) goto error; + SKIP_LINE(parser); + } + + end_mark = parser->mark; + + /* Set the intendation level if it was specified. */ + + if (increment) { + indent = parser->indent >= 0 ? parser->indent+increment : increment; + } + + /* Scan the leading line breaks and determine the indentation level if needed. */ + + if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, + start_mark, &end_mark)) goto error; + + /* Scan the block scalar content. */ + + if (!CACHE(parser, 1)) goto error; + + while ((int)parser->mark.column == indent && !IS_Z(parser->buffer)) + { + /* + * We are at the beginning of a non-empty line. + */ + + /* Is it a trailing whitespace? */ + + trailing_blank = IS_BLANK(parser->buffer); + + /* Check if we need to fold the leading line break. */ + + if (!literal && (*leading_break.start == '\n') + && !leading_blank && !trailing_blank) + { + /* Do we need to join the lines by space? */ + + if (*trailing_breaks.start == '\0') { + if (!STRING_EXTEND(parser, string)) goto error; + *(string.pointer ++) = ' '; + } + + CLEAR(parser, leading_break); + } + else { + if (!JOIN(parser, string, leading_break)) goto error; + CLEAR(parser, leading_break); + } + + /* Append the remaining line breaks. */ + + if (!JOIN(parser, string, trailing_breaks)) goto error; + CLEAR(parser, trailing_breaks); + + /* Is it a leading whitespace? */ + + leading_blank = IS_BLANK(parser->buffer); + + /* Consume the current line. */ + + while (!IS_BREAKZ(parser->buffer)) { + if (!READ(parser, string)) goto error; + if (!CACHE(parser, 1)) goto error; + } + + /* Consume the line break. */ + + if (!CACHE(parser, 2)) goto error; + + if (!READ_LINE(parser, leading_break)) goto error; + + /* Eat the following intendation spaces and line breaks. */ + + if (!yaml_parser_scan_block_scalar_breaks(parser, + &indent, &trailing_breaks, start_mark, &end_mark)) goto error; + } + + /* Chomp the tail. */ + + if (chomping != -1) { + if (!JOIN(parser, string, leading_break)) goto error; + } + if (chomping == 1) { + if (!JOIN(parser, string, trailing_breaks)) goto error; + } + + /* Create a token. */ + + SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start, + literal ? YAML_LITERAL_SCALAR_STYLE : YAML_FOLDED_SCALAR_STYLE, + start_mark, end_mark); + + STRING_DEL(parser, leading_break); + STRING_DEL(parser, trailing_breaks); + + return 1; + +error: + STRING_DEL(parser, string); + STRING_DEL(parser, leading_break); + STRING_DEL(parser, trailing_breaks); + + return 0; +} + +/* + * Scan intendation spaces and line breaks for a block scalar. Determine the + * intendation level if needed. + */ + +static int +yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, + int *indent, yaml_string_t *breaks, + yaml_mark_t start_mark, yaml_mark_t *end_mark) +{ + int max_indent = 0; + + *end_mark = parser->mark; + + /* Eat the intendation spaces and line breaks. */ + + while (1) + { + /* Eat the intendation spaces. */ + + if (!CACHE(parser, 1)) return 0; + + while ((!*indent || (int)parser->mark.column < *indent) + && IS_SPACE(parser->buffer)) { + SKIP(parser); + if (!CACHE(parser, 1)) return 0; + } + + if ((int)parser->mark.column > max_indent) + max_indent = (int)parser->mark.column; + + /* Check for a tab character messing the intendation. */ + + if ((!*indent || (int)parser->mark.column < *indent) + && IS_TAB(parser->buffer)) { + return yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "found a tab character where an intendation space is expected"); + } + + /* Have we found a non-empty line? */ + + if (!IS_BREAK(parser->buffer)) break; + + /* Consume the line break. */ + + if (!CACHE(parser, 2)) return 0; + if (!READ_LINE(parser, *breaks)) return 0; + *end_mark = parser->mark; + } + + /* Determine the indentation level if needed. */ + + if (!*indent) { + *indent = max_indent; + if (*indent < parser->indent + 1) + *indent = parser->indent + 1; + if (*indent < 1) + *indent = 1; + } + + return 1; +} + +/* + * Scan a quoted scalar. + */ + +static int +yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, + int single) +{ + yaml_mark_t start_mark; + yaml_mark_t end_mark; + yaml_string_t string = NULL_STRING; + yaml_string_t leading_break = NULL_STRING; + yaml_string_t trailing_breaks = NULL_STRING; + yaml_string_t whitespaces = NULL_STRING; + int leading_blanks; + + if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; + if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error; + if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error; + if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error; + + /* Eat the left quote. */ + + start_mark = parser->mark; + + SKIP(parser); + + /* Consume the content of the quoted scalar. */ + + while (1) + { + /* Check that there are no document indicators at the beginning of the line. */ + + if (!CACHE(parser, 4)) goto error; + + if (parser->mark.column == 0 && + ((CHECK_AT(parser->buffer, '-', 0) && + CHECK_AT(parser->buffer, '-', 1) && + CHECK_AT(parser->buffer, '-', 2)) || + (CHECK_AT(parser->buffer, '.', 0) && + CHECK_AT(parser->buffer, '.', 1) && + CHECK_AT(parser->buffer, '.', 2))) && + IS_BLANKZ_AT(parser->buffer, 3)) + { + yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", + start_mark, "found unexpected document indicator"); + goto error; + } + + /* Check for EOF. */ + + if (IS_Z(parser->buffer)) { + yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", + start_mark, "found unexpected end of stream"); + goto error; + } + + /* Consume non-blank characters. */ + + if (!CACHE(parser, 2)) goto error; + + leading_blanks = 0; + + while (!IS_BLANKZ(parser->buffer)) + { + /* Check for an escaped single quote. */ + + if (single && CHECK_AT(parser->buffer, '\'', 0) + && CHECK_AT(parser->buffer, '\'', 1)) + { + if (!STRING_EXTEND(parser, string)) goto error; + *(string.pointer++) = '\''; + SKIP(parser); + SKIP(parser); + } + + /* Check for the right quote. */ + + else if (CHECK(parser->buffer, single ? '\'' : '"')) + { + break; + } + + /* Check for an escaped line break. */ + + else if (!single && CHECK(parser->buffer, '\\') + && IS_BREAK_AT(parser->buffer, 1)) + { + if (!CACHE(parser, 3)) goto error; + SKIP(parser); + SKIP_LINE(parser); + leading_blanks = 1; + break; + } + + /* Check for an escape sequence. */ + + else if (!single && CHECK(parser->buffer, '\\')) + { + size_t code_length = 0; + + if (!STRING_EXTEND(parser, string)) goto error; + + /* Check the escape character. */ + + switch (parser->buffer.pointer[1]) + { + case '0': + *(string.pointer++) = '\0'; + break; + + case 'a': + *(string.pointer++) = '\x07'; + break; + + case 'b': + *(string.pointer++) = '\x08'; + break; + + case 't': + case '\t': + *(string.pointer++) = '\x09'; + break; + + case 'n': + *(string.pointer++) = '\x0A'; + break; + + case 'v': + *(string.pointer++) = '\x0B'; + break; + + case 'f': + *(string.pointer++) = '\x0C'; + break; + + case 'r': + *(string.pointer++) = '\x0D'; + break; + + case 'e': + *(string.pointer++) = '\x1B'; + break; + + case ' ': + *(string.pointer++) = '\x20'; + break; + + case '"': + *(string.pointer++) = '"'; + break; + + case '\'': + *(string.pointer++) = '\''; + break; + + case '\\': + *(string.pointer++) = '\\'; + break; + + case 'N': /* NEL (#x85) */ + *(string.pointer++) = '\xC2'; + *(string.pointer++) = '\x85'; + break; + + case '_': /* #xA0 */ + *(string.pointer++) = '\xC2'; + *(string.pointer++) = '\xA0'; + break; + + case 'L': /* LS (#x2028) */ + *(string.pointer++) = '\xE2'; + *(string.pointer++) = '\x80'; + *(string.pointer++) = '\xA8'; + break; + + case 'P': /* PS (#x2029) */ + *(string.pointer++) = '\xE2'; + *(string.pointer++) = '\x80'; + *(string.pointer++) = '\xA9'; + break; + + case 'x': + code_length = 2; + break; + + case 'u': + code_length = 4; + break; + + case 'U': + code_length = 8; + break; + + default: + yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", + start_mark, "found unknown escape character"); + goto error; + } + + SKIP(parser); + SKIP(parser); + + /* Consume an arbitrary escape code. */ + + if (code_length) + { + unsigned int value = 0; + size_t k; + + /* Scan the character value. */ + + if (!CACHE(parser, code_length)) goto error; + + for (k = 0; k < code_length; k ++) { + if (!IS_HEX_AT(parser->buffer, k)) { + yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", + start_mark, "did not find expected hexdecimal number"); + goto error; + } + value = (value << 4) + AS_HEX_AT(parser->buffer, k); + } + + /* Check the value and write the character. */ + + if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) { + yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", + start_mark, "found invalid Unicode character escape code"); + goto error; + } + + if (value <= 0x7F) { + *(string.pointer++) = value; + } + else if (value <= 0x7FF) { + *(string.pointer++) = 0xC0 + (value >> 6); + *(string.pointer++) = 0x80 + (value & 0x3F); + } + else if (value <= 0xFFFF) { + *(string.pointer++) = 0xE0 + (value >> 12); + *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F); + *(string.pointer++) = 0x80 + (value & 0x3F); + } + else { + *(string.pointer++) = 0xF0 + (value >> 18); + *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F); + *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F); + *(string.pointer++) = 0x80 + (value & 0x3F); + } + + /* Advance the pointer. */ + + for (k = 0; k < code_length; k ++) { + SKIP(parser); + } + } + } + + else + { + /* It is a non-escaped non-blank character. */ + + if (!READ(parser, string)) goto error; + } + + if (!CACHE(parser, 2)) goto error; + } + + /* Check if we are at the end of the scalar. */ + + if (CHECK(parser->buffer, single ? '\'' : '"')) + break; + + /* Consume blank characters. */ + + if (!CACHE(parser, 1)) goto error; + + while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)) + { + if (IS_BLANK(parser->buffer)) + { + /* Consume a space or a tab character. */ + + if (!leading_blanks) { + if (!READ(parser, whitespaces)) goto error; + } + else { + SKIP(parser); + } + } + else + { + if (!CACHE(parser, 2)) goto error; + + /* Check if it is a first line break. */ + + if (!leading_blanks) + { + CLEAR(parser, whitespaces); + if (!READ_LINE(parser, leading_break)) goto error; + leading_blanks = 1; + } + else + { + if (!READ_LINE(parser, trailing_breaks)) goto error; + } + } + if (!CACHE(parser, 1)) goto error; + } + + /* Join the whitespaces or fold line breaks. */ + + if (leading_blanks) + { + /* Do we need to fold line breaks? */ + + if (leading_break.start[0] == '\n') { + if (trailing_breaks.start[0] == '\0') { + if (!STRING_EXTEND(parser, string)) goto error; + *(string.pointer++) = ' '; + } + else { + if (!JOIN(parser, string, trailing_breaks)) goto error; + CLEAR(parser, trailing_breaks); + } + CLEAR(parser, leading_break); + } + else { + if (!JOIN(parser, string, leading_break)) goto error; + if (!JOIN(parser, string, trailing_breaks)) goto error; + CLEAR(parser, leading_break); + CLEAR(parser, trailing_breaks); + } + } + else + { + if (!JOIN(parser, string, whitespaces)) goto error; + CLEAR(parser, whitespaces); + } + } + + /* Eat the right quote. */ + + SKIP(parser); + + end_mark = parser->mark; + + /* Create a token. */ + + SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start, + single ? YAML_SINGLE_QUOTED_SCALAR_STYLE : YAML_DOUBLE_QUOTED_SCALAR_STYLE, + start_mark, end_mark); + + STRING_DEL(parser, leading_break); + STRING_DEL(parser, trailing_breaks); + STRING_DEL(parser, whitespaces); + + return 1; + +error: + STRING_DEL(parser, string); + STRING_DEL(parser, leading_break); + STRING_DEL(parser, trailing_breaks); + STRING_DEL(parser, whitespaces); + + return 0; +} + +/* + * Scan a plain scalar. + */ + +static int +yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token) +{ + yaml_mark_t start_mark; + yaml_mark_t end_mark; + yaml_string_t string = NULL_STRING; + yaml_string_t leading_break = NULL_STRING; + yaml_string_t trailing_breaks = NULL_STRING; + yaml_string_t whitespaces = NULL_STRING; + int leading_blanks = 0; + int indent = parser->indent+1; + + if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; + if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error; + if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error; + if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error; + + start_mark = end_mark = parser->mark; + + /* Consume the content of the plain scalar. */ + + while (1) + { + /* Check for a document indicator. */ + + if (!CACHE(parser, 4)) goto error; + + if (parser->mark.column == 0 && + ((CHECK_AT(parser->buffer, '-', 0) && + CHECK_AT(parser->buffer, '-', 1) && + CHECK_AT(parser->buffer, '-', 2)) || + (CHECK_AT(parser->buffer, '.', 0) && + CHECK_AT(parser->buffer, '.', 1) && + CHECK_AT(parser->buffer, '.', 2))) && + IS_BLANKZ_AT(parser->buffer, 3)) break; + + /* Check for a comment. */ + + if (CHECK(parser->buffer, '#')) + break; + + /* Consume non-blank characters. */ + + while (!IS_BLANKZ(parser->buffer)) + { + /* Check for 'x:x' in the flow context. TODO: Fix the test "spec-08-13". */ + + if (parser->flow_level + && CHECK(parser->buffer, ':') + && !IS_BLANKZ_AT(parser->buffer, 1)) { + yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", + start_mark, "found unexpected ':'"); + goto error; + } + + /* Check for indicators that may end a plain scalar. */ + + if ((CHECK(parser->buffer, ':') && IS_BLANKZ_AT(parser->buffer, 1)) + || (parser->flow_level && + (CHECK(parser->buffer, ',') || CHECK(parser->buffer, ':') + || CHECK(parser->buffer, '?') || CHECK(parser->buffer, '[') + || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{') + || CHECK(parser->buffer, '}')))) + break; + + /* Check if we need to join whitespaces and breaks. */ + + if (leading_blanks || whitespaces.start != whitespaces.pointer) + { + if (leading_blanks) + { + /* Do we need to fold line breaks? */ + + if (leading_break.start[0] == '\n') { + if (trailing_breaks.start[0] == '\0') { + if (!STRING_EXTEND(parser, string)) goto error; + *(string.pointer++) = ' '; + } + else { + if (!JOIN(parser, string, trailing_breaks)) goto error; + CLEAR(parser, trailing_breaks); + } + CLEAR(parser, leading_break); + } + else { + if (!JOIN(parser, string, leading_break)) goto error; + if (!JOIN(parser, string, trailing_breaks)) goto error; + CLEAR(parser, leading_break); + CLEAR(parser, trailing_breaks); + } + + leading_blanks = 0; + } + else + { + if (!JOIN(parser, string, whitespaces)) goto error; + CLEAR(parser, whitespaces); + } + } + + /* Copy the character. */ + + if (!READ(parser, string)) goto error; + + end_mark = parser->mark; + + if (!CACHE(parser, 2)) goto error; + } + + /* Is it the end? */ + + if (!(IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))) + break; + + /* Consume blank characters. */ + + if (!CACHE(parser, 1)) goto error; + + while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)) + { + if (IS_BLANK(parser->buffer)) + { + /* Check for tab character that abuse intendation. */ + + if (leading_blanks && (int)parser->mark.column < indent + && IS_TAB(parser->buffer)) { + yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", + start_mark, "found a tab character that violate intendation"); + goto error; + } + + /* Consume a space or a tab character. */ + + if (!leading_blanks) { + if (!READ(parser, whitespaces)) goto error; + } + else { + SKIP(parser); + } + } + else + { + if (!CACHE(parser, 2)) goto error; + + /* Check if it is a first line break. */ + + if (!leading_blanks) + { + CLEAR(parser, whitespaces); + if (!READ_LINE(parser, leading_break)) goto error; + leading_blanks = 1; + } + else + { + if (!READ_LINE(parser, trailing_breaks)) goto error; + } + } + if (!CACHE(parser, 1)) goto error; + } + + /* Check intendation level. */ + + if (!parser->flow_level && (int)parser->mark.column < indent) + break; + } + + /* Create a token. */ + + SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start, + YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark); + + /* Note that we change the 'simple_key_allowed' flag. */ + + if (leading_blanks) { + parser->simple_key_allowed = 1; + } + + STRING_DEL(parser, leading_break); + STRING_DEL(parser, trailing_breaks); + STRING_DEL(parser, whitespaces); + + return 1; + +error: + STRING_DEL(parser, string); + STRING_DEL(parser, leading_break); + STRING_DEL(parser, trailing_breaks); + STRING_DEL(parser, whitespaces); + + return 0; }