]> andersk Git - libyaml.git/blame - src/scanner.c
Scanner: implement everything except token scanners.
[libyaml.git] / src / scanner.c
CommitLineData
03be97ab
KS
1
2/*
3 * Introduction
4 * ************
5 *
6 * The following notes assume that you are familiar with the YAML specification
7 * (http://yaml.org/spec/cvs/current.html). We mostly follow it, although in
8 * some cases we are less restrictive that it requires.
9 *
10 * The process of transforming a YAML stream into a sequence of events is
11 * divided on two steps: Scanning and Parsing.
12 *
13 * The Scanner transforms the input stream into a sequence of tokens, while the
14 * parser transform the sequence of tokens produced by the Scanner into a
15 * sequence of parsing events.
16 *
17 * The Scanner is rather clever and complicated. The Parser, on the contrary,
18 * is a straightforward implementation of a recursive-descendant parser (or,
19 * LL(1) parser, as it is usually called).
20 *
21 * Actually there are two issues of Scanning that might be called "clever", the
22 * rest is quite straightforward. The issues are "block collection start" and
23 * "simple keys". Both issues are explained below in details.
24 *
25 * Here the Scanning step is explained and implemented. We start with the list
26 * of all the tokens produced by the Scanner together with short descriptions.
27 *
28 * Now, tokens:
29 *
30 * STREAM-START(encoding) # The stream start.
31 * STREAM-END # The stream end.
32 * VERSION-DIRECTIVE(major,minor) # The '%YAML' directive.
33 * TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive.
34 * DOCUMENT-START # '---'
35 * DOCUMENT-END # '...'
36 * BLOCK-SEQUENCE-START # Indentation increase denoting a block
37 * BLOCK-MAPPING-START # sequence or a block mapping.
38 * BLOCK-END # Indentation decrease.
39 * FLOW-SEQUENCE-START # '['
40 * FLOW-SEQUENCE-END # ']'
41 * BLOCK-SEQUENCE-START # '{'
42 * BLOCK-SEQUENCE-END # '}'
43 * BLOCK-ENTRY # '-'
44 * FLOW-ENTRY # ','
45 * KEY # '?' or nothing (simple keys).
46 * VALUE # ':'
47 * ALIAS(anchor) # '*anchor'
48 * ANCHOR(anchor) # '&anchor'
49 * TAG(handle,suffix) # '!handle!suffix'
50 * SCALAR(value,style) # A scalar.
51 *
52 * The following two tokens are "virtual" tokens denoting the beginning and the
53 * end of the stream:
54 *
55 * STREAM-START(encoding)
56 * STREAM-END
57 *
58 * We pass the information about the input stream encoding with the
59 * STREAM-START token.
60 *
61 * The next two tokens are responsible for tags:
62 *
63 * VERSION-DIRECTIVE(major,minor)
64 * TAG-DIRECTIVE(handle,prefix)
65 *
66 * Example:
67 *
68 * %YAML 1.1
69 * %TAG ! !foo
70 * %TAG !yaml! tag:yaml.org,2002:
71 * ---
72 *
73 * The correspoding sequence of tokens:
74 *
75 * STREAM-START(utf-8)
76 * VERSION-DIRECTIVE(1,1)
77 * TAG-DIRECTIVE("!","!foo")
78 * TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
79 * DOCUMENT-START
80 * STREAM-END
81 *
82 * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
83 * line.
84 *
85 * The document start and end indicators are represented by:
86 *
87 * DOCUMENT-START
88 * DOCUMENT-END
89 *
90 * Note that if a YAML stream contains an implicit document (without '---'
91 * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
92 * produced.
93 *
94 * In the following examples, we present whole documents together with the
95 * produced tokens.
96 *
97 * 1. An implicit document:
98 *
99 * 'a scalar'
100 *
101 * Tokens:
102 *
103 * STREAM-START(utf-8)
104 * SCALAR("a scalar",single-quoted)
105 * STREAM-END
106 *
107 * 2. An explicit document:
108 *
109 * ---
110 * 'a scalar'
111 * ...
112 *
113 * Tokens:
114 *
115 * STREAM-START(utf-8)
116 * DOCUMENT-START
117 * SCALAR("a scalar",single-quoted)
118 * DOCUMENT-END
119 * STREAM-END
120 *
121 * 3. Several documents in a stream:
122 *
123 * 'a scalar'
124 * ---
125 * 'another scalar'
126 * ---
127 * 'yet another scalar'
128 *
129 * Tokens:
130 *
131 * STREAM-START(utf-8)
132 * SCALAR("a scalar",single-quoted)
133 * DOCUMENT-START
134 * SCALAR("another scalar",single-quoted)
135 * DOCUMENT-START
136 * SCALAR("yet another scalar",single-quoted)
137 * STREAM-END
138 *
139 * We have already introduced the SCALAR token above. The following tokens are
140 * used to describe aliases, anchors, tag, and scalars:
141 *
142 * ALIAS(anchor)
143 * ANCHOR(anchor)
144 * TAG(handle,suffix)
145 * SCALAR(value,style)
146 *
147 * The following series of examples illustrate the usage of these tokens:
148 *
149 * 1. A recursive sequence:
150 *
151 * &A [ *A ]
152 *
153 * Tokens:
154 *
155 * STREAM-START(utf-8)
156 * ANCHOR("A")
157 * FLOW-SEQUENCE-START
158 * ALIAS("A")
159 * FLOW-SEQUENCE-END
160 * STREAM-END
161 *
162 * 2. A tagged scalar:
163 *
164 * !!float "3.14" # A good approximation.
165 *
166 * Tokens:
167 *
168 * STREAM-START(utf-8)
169 * TAG("!!","float")
170 * SCALAR("3.14",double-quoted)
171 * STREAM-END
172 *
173 * 3. Various scalar styles:
174 *
175 * --- # Implicit empty plain scalars do not produce tokens.
176 * --- a plain scalar
177 * --- 'a single-quoted scalar'
178 * --- "a double-quoted scalar"
179 * --- |-
180 * a literal scalar
181 * --- >-
182 * a folded
183 * scalar
184 *
185 * Tokens:
186 *
187 * STREAM-START(utf-8)
188 * DOCUMENT-START
189 * DOCUMENT-START
190 * SCALAR("a plain scalar",plain)
191 * DOCUMENT-START
192 * SCALAR("a single-quoted scalar",single-quoted)
193 * DOCUMENT-START
194 * SCALAR("a double-quoted scalar",double-quoted)
195 * DOCUMENT-START
196 * SCALAR("a literal scalar",literal)
197 * DOCUMENT-START
198 * SCALAR("a folded scalar",folded)
199 * STREAM-END
200 *
201 * Now it's time to review collection-related tokens. We will start with
202 * flow collections:
203 *
204 * FLOW-SEQUENCE-START
205 * FLOW-SEQUENCE-END
206 * FLOW-MAPPING-START
207 * FLOW-MAPPING-END
208 * FLOW-ENTRY
209 * KEY
210 * VALUE
211 *
212 * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
213 * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
214 * correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the
215 * indicators '?' and ':', which are used for denoting mapping keys and values,
216 * are represented by the KEY and VALUE tokens.
217 *
218 * The following examples show flow collections:
219 *
220 * 1. A flow sequence:
221 *
222 * [item 1, item 2, item 3]
223 *
224 * Tokens:
225 *
226 * STREAM-START(utf-8)
227 * FLOW-SEQUENCE-START
228 * SCALAR("item 1",plain)
229 * FLOW-ENTRY
230 * SCALAR("item 2",plain)
231 * FLOW-ENTRY
232 * SCALAR("item 3",plain)
233 * FLOW-SEQUENCE-END
234 * STREAM-END
235 *
236 * 2. A flow mapping:
237 *
238 * {
239 * a simple key: a value, # Note that the KEY token is produced.
240 * ? a complex key: another value,
241 * }
242 *
243 * Tokens:
244 *
245 * STREAM-START(utf-8)
246 * FLOW-MAPPING-START
247 * KEY
248 * SCALAR("a simple key",plain)
249 * VALUE
250 * SCALAR("a value",plain)
251 * FLOW-ENTRY
252 * KEY
253 * SCALAR("a complex key",plain)
254 * VALUE
255 * SCALAR("another value",plain)
256 * FLOW-ENTRY
257 * FLOW-MAPPING-END
258 * STREAM-END
259 *
260 * A simple key is a key which is not denoted by the '?' indicator. Note that
261 * the Scanner still produce the KEY token whenever it encounters a simple key.
262 *
263 * For scanning block collections, the following tokens are used (note that we
264 * repeat KEY and VALUE here):
265 *
266 * BLOCK-SEQUENCE-START
267 * BLOCK-MAPPING-START
268 * BLOCK-END
269 * BLOCK-ENTRY
270 * KEY
271 * VALUE
272 *
273 * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
274 * increase that precedes a block collection (cf. the INDENT token in Python).
275 * The token BLOCK-END denote indentation decrease that ends a block collection
276 * (cf. the DEDENT token in Python). However YAML has some syntax pecularities
277 * that makes detections of these tokens more complex.
278 *
279 * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
280 * '-', '?', and ':' correspondingly.
281 *
282 * The following examples show how the tokens BLOCK-SEQUENCE-START,
283 * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
284 *
285 * 1. Block sequences:
286 *
287 * - item 1
288 * - item 2
289 * -
290 * - item 3.1
291 * - item 3.2
292 * -
293 * key 1: value 1
294 * key 2: value 2
295 *
296 * Tokens:
297 *
298 * STREAM-START(utf-8)
299 * BLOCK-SEQUENCE-START
300 * BLOCK-ENTRY
301 * SCALAR("item 1",plain)
302 * BLOCK-ENTRY
303 * SCALAR("item 2",plain)
304 * BLOCK-ENTRY
305 * BLOCK-SEQUENCE-START
306 * BLOCK-ENTRY
307 * SCALAR("item 3.1",plain)
308 * BLOCK-ENTRY
309 * SCALAR("item 3.2",plain)
310 * BLOCK-END
311 * BLOCK-ENTRY
312 * BLOCK-MAPPING-START
313 * KEY
314 * SCALAR("key 1",plain)
315 * VALUE
316 * SCALAR("value 1",plain)
317 * KEY
318 * SCALAR("key 2",plain)
319 * VALUE
320 * SCALAR("value 2",plain)
321 * BLOCK-END
322 * BLOCK-END
323 * STREAM-END
324 *
325 * 2. Block mappings:
326 *
327 * a simple key: a value # The KEY token is produced here.
328 * ? a complex key
329 * : another value
330 * a mapping:
331 * key 1: value 1
332 * key 2: value 2
333 * a sequence:
334 * - item 1
335 * - item 2
336 *
337 * Tokens:
338 *
339 * STREAM-START(utf-8)
340 * BLOCK-MAPPING-START
341 * KEY
342 * SCALAR("a simple key",plain)
343 * VALUE
344 * SCALAR("a value",plain)
345 * KEY
346 * SCALAR("a complex key",plain)
347 * VALUE
348 * SCALAR("another value",plain)
349 * KEY
350 * SCALAR("a mapping",plain)
351 * BLOCK-MAPPING-START
352 * KEY
353 * SCALAR("key 1",plain)
354 * VALUE
355 * SCALAR("value 1",plain)
356 * KEY
357 * SCALAR("key 2",plain)
358 * VALUE
359 * SCALAR("value 2",plain)
360 * BLOCK-END
361 * KEY
362 * SCALAR("a sequence",plain)
363 * VALUE
364 * BLOCK-SEQUENCE-START
365 * BLOCK-ENTRY
366 * SCALAR("item 1",plain)
367 * BLOCK-ENTRY
368 * SCALAR("item 2",plain)
369 * BLOCK-END
370 * BLOCK-END
371 * STREAM-END
372 *
373 * YAML does not always require to start a new block collection from a new
374 * line. If the current line contains only '-', '?', and ':' indicators, a new
375 * block collection may start at the current line. The following examples
376 * illustrate this case:
377 *
378 * 1. Collections in a sequence:
379 *
380 * - - item 1
381 * - item 2
382 * - key 1: value 1
383 * key 2: value 2
384 * - ? complex key
385 * : complex value
386 *
387 * Tokens:
388 *
389 * STREAM-START(utf-8)
390 * BLOCK-SEQUENCE-START
391 * BLOCK-ENTRY
392 * BLOCK-SEQUENCE-START
393 * BLOCK-ENTRY
394 * SCALAR("item 1",plain)
395 * BLOCK-ENTRY
396 * SCALAR("item 2",plain)
397 * BLOCK-END
398 * BLOCK-ENTRY
399 * BLOCK-MAPPING-START
400 * KEY
401 * SCALAR("key 1",plain)
402 * VALUE
403 * SCALAR("value 1",plain)
404 * KEY
405 * SCALAR("key 2",plain)
406 * VALUE
407 * SCALAR("value 2",plain)
408 * BLOCK-END
409 * BLOCK-ENTRY
410 * BLOCK-MAPPING-START
411 * KEY
412 * SCALAR("complex key")
413 * VALUE
414 * SCALAR("complex value")
415 * BLOCK-END
416 * BLOCK-END
417 * STREAM-END
418 *
419 * 2. Collections in a mapping:
420 *
421 * ? a sequence
422 * : - item 1
423 * - item 2
424 * ? a mapping
425 * : key 1: value 1
426 * key 2: value 2
427 *
428 * Tokens:
429 *
430 * STREAM-START(utf-8)
431 * BLOCK-MAPPING-START
432 * KEY
433 * SCALAR("a sequence",plain)
434 * VALUE
435 * BLOCK-SEQUENCE-START
436 * BLOCK-ENTRY
437 * SCALAR("item 1",plain)
438 * BLOCK-ENTRY
439 * SCALAR("item 2",plain)
440 * BLOCK-END
441 * KEY
442 * SCALAR("a mapping",plain)
443 * VALUE
444 * BLOCK-MAPPING-START
445 * KEY
446 * SCALAR("key 1",plain)
447 * VALUE
448 * SCALAR("value 1",plain)
449 * KEY
450 * SCALAR("key 2",plain)
451 * VALUE
452 * SCALAR("value 2",plain)
453 * BLOCK-END
454 * BLOCK-END
455 * STREAM-END
456 *
457 * YAML also permits non-indented sequences if they are included into a block
458 * mapping. In this case, the token BLOCK-SEQUENCE-START is not produced:
459 *
460 * key:
461 * - item 1 # BLOCK-SEQUENCE-START is NOT produced here.
462 * - item 2
463 *
464 * Tokens:
465 *
466 * STREAM-START(utf-8)
467 * BLOCK-MAPPING-START
468 * KEY
469 * SCALAR("key",plain)
470 * VALUE
471 * BLOCK-ENTRY
472 * SCALAR("item 1",plain)
473 * BLOCK-ENTRY
474 * SCALAR("item 2",plain)
475 * BLOCK-END
476 */
477
478#if HAVE_CONFIG_H
479#include <config.h>
480#endif
481
482#include <yaml/yaml.h>
483
484#include <assert.h>
485
f2b59d4d
KS
486/*
487 * Ensure that the buffer contains the required number of characters.
488 * Return 1 on success, 0 on failure (reader error or memory error).
489 */
490
491#define UPDATE(parser,length) \
492 (parser->unread >= (length) \
493 ? 1 \
494 : yaml_parser_update_buffer(parser, (length)))
495
496/*
497 * Check the octet at the specified position.
498 */
499
500#define CHECK_AT(parser,octet,offset) \
eb9cceb5 501 (parser->pointer[offset] == (yaml_char_t)(octet))
f2b59d4d
KS
502
503/*
504 * Check the current octet in the buffer.
505 */
506
507#define CHECK(parser,octet) CHECK_AT(parser,(octet),0)
508
509/*
510 * Check if the character at the specified position is NUL.
511 */
512
513#define IS_Z_AT(parser,offset) CHECK_AT(parser,'\0',(offset))
514
515#define IS_Z(parser) IS_Z_AT(parser,0)
516
517/*
518 * Check if the character at the specified position is space.
519 */
520
521#define IS_SPACE_AT(parser,offset) CHECK_AT(parser,' ',(offset))
522
523#define IS_SPACE(parser) IS_SPACE_AT(parser,0)
524
525/*
526 * Check if the character at the specified position is tab.
527 */
528
529#define IS_TAB_AT(parser,offset) CHECK_AT(parser,'\t',(offset))
530
531#define IS_TAB(parser) IS_TAB_AT(parser,0)
532
533/*
534 * Check if the character at the specified position is blank (space or tab).
535 */
536
537#define IS_BLANK_AT(parser,offset) \
538 (IS_SPACE_AT(parser,(offset)) || IS_TAB_AT(parser,(offset)))
539
540#define IS_BLANK(parser) IS_BLANK_AT(parser,0)
541
542/*
543 * Check if the character at the specified position is a line break.
544 */
545
546#define IS_BREAK_AT(parser,offset) \
547 (CHECK_AT(parser,'\r',(offset)) /* CR (#xD)*/ \
548 || CHECK_AT(parser,'\n',(offset)) /* LF (#xA) */ \
549 || (CHECK_AT(parser,'\xC2',(offset)) \
550 && CHECK_AT(parser,'\x85',(offset+1))) /* NEL (#x85) */ \
551 || (CHECK_AT(parser,'\xE2',(offset)) \
552 && CHECK_AT(parser,'\x80',(offset+1)) \
553 && CHECK_AT(parser,'\xA8',(offset+2))) /* LS (#x2028) */ \
554 || (CHECK_AT(parser,'\xE2',(offset)) \
555 && CHECK_AT(parser,'\x80',(offset+1)) \
556 && CHECK_AT(parser,'\xA9',(offset+2)))) /* LS (#x2029) */
557
558#define IS_BREAK(parser) IS_BREAK_AT(parser,0)
559
eb9cceb5
KS
560#define IS_CRLF_AT(parser,offset) \
561 (CHECK_AT(parser,'\r',(offset)) && CHECK_AT(parser,'\n',(offset)+1))
562
563#define IS_CRLF(parser) IS_CRLF_AT(parser,0)
564
f2b59d4d
KS
565/*
566 * Check if the character is a line break or NUL.
567 */
568
569#define IS_BREAKZ_AT(parser,offset) \
570 (IS_BREAK_AT(parser,(offset)) || IS_Z_AT(parser,(offset)))
571
572#define IS_BREAKZ(parser) IS_BREAKZ_AT(parser,0)
573
574/*
575 * Check if the character is a line break, space, or NUL.
576 */
577
578#define IS_SPACEZ_AT(parser,offset) \
579 (IS_SPACE_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset)))
580
581#define IS_SPACEZ(parser) IS_SPACEZ_AT(parser,0)
582
583/*
584 * Check if the character is a line break, space, tab, or NUL.
585 */
586
587#define IS_BLANKZ_AT(parser,offset) \
588 (IS_BLANK_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset)))
589
590#define IS_BLANKZ(parser) IS_BLANKZ_AT(parser,0)
591
eb9cceb5
KS
592/*
593 * Determine the width of the character.
594 */
595
596#define WIDTH_AT(parser,offset) \
597 ((parser->pointer[(offset)] & 0x80) == 0x00 ? 1 : \
598 (parser->pointer[(offset)] & 0xE0) == 0xC0 ? 2 : \
599 (parser->pointer[(offset)] & 0xF0) == 0xE0 ? 3 : \
600 (parser->pointer[(offset)] & 0xF8) == 0xF0 ? 4 : 0)
601
602#define WIDTH(parser) WIDTH_AT(parser,0)
603
604/*
605 * Advance the buffer pointer.
606 */
607
608#define FORWARD(parser) \
609 (parser->index ++, \
610 ((IS_BREAK(parser) && !IS_CRLF(parser)) ? \
611 (parser->line ++, parser->column = 0) : \
612 (parser->column ++)), \
613 parser->unread --, \
614 parser->pointer += WIDTH(parser))
615
03be97ab
KS
616/*
617 * Public API declarations.
618 */
619
620YAML_DECLARE(yaml_token_t *)
621yaml_parser_get_token(yaml_parser_t *parser);
622
623YAML_DECLARE(yaml_token_t *)
624yaml_parser_peek_token(yaml_parser_t *parser);
625
f2b59d4d
KS
626/*
627 * Error handling.
628 */
629
630static int
631yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
632 yaml_mark_t context_mark, const char *problem);
633
634static yaml_mark_t
635yaml_parser_get_mark(yaml_parser_t *parser);
636
03be97ab
KS
637/*
638 * High-level token API.
639 */
640
641static int
642yaml_parser_fetch_more_tokens(yaml_parser_t *parser);
643
644static int
645yaml_parser_fetch_next_token(yaml_parser_t *parser);
646
647/*
648 * Potential simple keys.
649 */
650
651static int
652yaml_parser_stale_simple_keys(yaml_parser_t *parser);
653
654static int
655yaml_parser_save_simple_key(yaml_parser_t *parser);
656
657static int
658yaml_parser_remove_simple_key(yaml_parser_t *parser);
659
eb9cceb5
KS
660static int
661yaml_parser_increase_flow_level(yaml_parser_t *parser);
662
663static int
664yaml_parser_decrease_flow_level(yaml_parser_t *parser);
665
666/*
667 * Token manipulation.
668 */
669
670static int
671yaml_parser_append_token(yaml_parser_t *parser, yaml_token_t *token);
672
673static int
674yaml_parser_insert_token(yaml_parser_t *parser,
675 int number, yaml_token_t *token);
676
03be97ab
KS
677/*
678 * Indentation treatment.
679 */
680
681static int
eb9cceb5
KS
682yaml_parser_roll_indent(yaml_parser_t *parser, int column,
683 int number, yaml_token_type_t type, yaml_mark_t mark);
03be97ab
KS
684
685static int
f2b59d4d 686yaml_parser_unroll_indent(yaml_parser_t *parser, int column);
03be97ab
KS
687
688/*
689 * Token fetchers.
690 */
691
692static int
693yaml_parser_fetch_stream_start(yaml_parser_t *parser);
694
695static int
696yaml_parser_fetch_stream_end(yaml_parser_t *parser);
697
698static int
699yaml_parser_fetch_directive(yaml_parser_t *parser);
700
03be97ab
KS
701static int
702yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
703 yaml_token_type_t type);
704
03be97ab
KS
705static int
706yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
707 yaml_token_type_t type);
708
03be97ab
KS
709static int
710yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
711 yaml_token_type_t type);
712
713static int
714yaml_parser_fetch_flow_entry(yaml_parser_t *parser);
715
716static int
717yaml_parser_fetch_block_entry(yaml_parser_t *parser);
718
719static int
720yaml_parser_fetch_key(yaml_parser_t *parser);
721
722static int
723yaml_parser_fetch_value(yaml_parser_t *parser);
724
725static int
eb9cceb5 726yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type);
03be97ab
KS
727
728static int
729yaml_parser_fetch_tag(yaml_parser_t *parser);
730
03be97ab
KS
731static int
732yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);
733
03be97ab
KS
734static int
735yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);
736
737static int
738yaml_parser_fetch_plain_scalar(yaml_parser_t *parser);
739
740/*
741 * Token scanners.
742 */
743
744static int
745yaml_parser_scan_to_next_token(yaml_parser_t *parser);
746
747static yaml_token_t *
748yaml_parser_scan_directive(yaml_parser_t *parser);
749
750static int
751yaml_parser_scan_directive_name(yaml_parser_t *parser,
752 yaml_mark_t start_mark, yaml_char_t **name);
753
754static int
755yaml_parser_scan_yaml_directive_value(yaml_parser_t *parser,
756 yaml_mark_t start_mark, int *major, int *minor);
757
758static int
759yaml_parser_scan_yaml_directive_number(yaml_parser_t *parser,
760 yaml_mark_t start_mark, int *number);
761
762static int
763yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
764 yaml_char_t **handle, yaml_char_t **prefix);
765
766static yaml_token_t *
767yaml_parser_scan_anchor(yaml_parser_t *parser,
768 yaml_token_type_t type);
769
770static yaml_token_t *
771yaml_parser_scan_tag(yaml_parser_t *parser);
772
773static int
774yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
775 yaml_mark_t start_mark, yaml_char_t **handle);
776
777static int
778yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive,
779 yaml_mark_t start_mark, yaml_char_t **url);
780
781static yaml_token_t *
782yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal);
783
784static int
785yaml_parser_scan_block_scalar_indicators(yaml_parser_t *parser,
786 yaml_mark_t start_mark, int *chomping, int *increment);
787
788static yaml_token_t *
789yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single);
790
791static yaml_token_t *
792yaml_parser_scan_plain_scalar(yaml_parser_t *parser);
793
f2b59d4d
KS
794/*
795 * Get the next token and remove it from the tokens queue.
796 */
797
798YAML_DECLARE(yaml_token_t *)
799yaml_parser_get_token(yaml_parser_t *parser)
800{
801 yaml_token_t *token;
802
803 assert(parser); /* Non-NULL parser object is expected. */
804 assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */
805
806 /* Ensure that the tokens queue contains enough tokens. */
807
808 if (!yaml_parser_fetch_more_tokens(parser)) return NULL;
809
810 /* Fetch the next token from the queue. */
811
812 token = parser->tokens[parser->tokens_head];
813
814 /* Move the queue head. */
815
816 parser->tokens[parser->tokens_head++] = NULL;
817 if (parser->tokens_head == parser->tokens_size)
818 parser->tokens_head = 0;
819
820 parser->tokens_parsed++;
821
822 return token;
823}
824
825/*
826 * Get the next token, but don't remove it from the queue.
827 */
828
829YAML_DECLARE(yaml_token_t *)
830yaml_parser_peek_token(yaml_parser_t *parser)
831{
832 assert(parser); /* Non-NULL parser object is expected. */
833 assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */
834
835 /* Ensure that the tokens queue contains enough tokens. */
836
837 if (!yaml_parser_fetch_more_tokens(parser)) return NULL;
838
839 /* Fetch the next token from the queue. */
840
841 return parser->tokens[parser->tokens_head];
842}
843
844/*
845 * Set the scanner error and return 0.
846 */
847
848static int
849yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
850 yaml_mark_t context_mark, const char *problem)
851{
852 parser->error = YAML_SCANNER_ERROR;
853 parser->context = context;
854 parser->context_mark = context_mark;
855 parser->problem = problem;
856 parser->problem_mark = yaml_parser_get_mark(parser);
857}
858
859/*
860 * Get the mark for the current buffer position.
861 */
862
863static yaml_mark_t
864yaml_parser_get_mark(yaml_parser_t *parser)
865{
866 yaml_mark_t mark = { parser->index, parser->line, parser->column };
867
868 return mark;
869}
870
871
872/*
873 * Ensure that the tokens queue contains at least one token which can be
874 * returned to the Parser.
875 */
876
877static int
878yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
879{
880 int need_more_tokens;
881 int k;
882
883 /* While we need more tokens to fetch, do it. */
884
885 while (1)
886 {
887 /*
888 * Check if we really need to fetch more tokens.
889 */
890
891 need_more_tokens = 0;
892
893 if (parser->tokens_head == parser->tokens_tail)
894 {
895 /* Queue is empty. */
896
897 need_more_tokens = 1;
898 }
899 else
900 {
901 /* Check if any potential simple key may occupy the head position. */
902
903 for (k = 0; k <= parser->flow_level; k++) {
904 yaml_simple_key_t *simple_key = parser->simple_keys[k];
905 if (simple_key
906 && (simple_key->token_number == parser->tokens_parsed)) {
907 need_more_tokens = 1;
908 break;
909 }
910 }
911 }
912
913 /* We are finished. */
914
915 if (!need_more_tokens)
916 break;
917
918 /* Fetch the next token. */
919
920 if (!yaml_parser_fetch_next_token(parser))
921 return 0;
922 }
923
924 return 1;
925}
926
927/*
928 * The dispatcher for token fetchers.
929 */
930
931static int
932yaml_parser_fetch_next_token(yaml_parser_t *parser)
933{
934 /* Ensure that the buffer is initialized. */
935
936 if (!UPDATE(parser, 1))
937 return 0;
938
939 /* Check if we just started scanning. Fetch STREAM-START then. */
940
941 if (!parser->stream_start_produced)
942 return yaml_parser_fetch_stream_start(parser);
943
944 /* Eat whitespaces and comments until we reach the next token. */
945
946 if (!yaml_parser_scan_to_next_token(parser))
947 return 0;
948
949 /* Check the indentation level against the current column. */
950
951 if (!yaml_parser_unroll_indent(parser, parser->column))
952 return 0;
953
954 /*
955 * Ensure that the buffer contains at least 4 characters. 4 is the length
956 * of the longest indicators ('--- ' and '... ').
957 */
958
959 if (!UPDATE(parser, 4))
960 return 0;
961
962 /* Is it the end of the stream? */
963
964 if (IS_Z(parser))
965 return yaml_parser_fetch_stream_end(parser);
966
967 /* Is it a directive? */
968
969 if (parser->column == 0 && CHECK(parser, '%'))
970 return yaml_parser_fetch_directive(parser);
971
972 /* Is it the document start indicator? */
973
974 if (parser->column == 0
975 && CHECK_AT(parser, '-', 0)
976 && CHECK_AT(parser, '-', 1)
977 && CHECK_AT(parser, '-', 2)
978 && IS_BLANKZ_AT(parser, 3))
eb9cceb5
KS
979 return yaml_parser_fetch_document_indicator(parser,
980 YAML_DOCUMENT_START_TOKEN);
f2b59d4d
KS
981
982 /* Is it the document end indicator? */
983
984 if (parser->column == 0
985 && CHECK_AT(parser, '.', 0)
986 && CHECK_AT(parser, '.', 1)
987 && CHECK_AT(parser, '.', 2)
988 && IS_BLANKZ_AT(parser, 3))
eb9cceb5
KS
989 return yaml_parser_fetch_document_indicator(parser,
990 YAML_DOCUMENT_END_TOKEN);
f2b59d4d
KS
991
992 /* Is it the flow sequence start indicator? */
993
994 if (CHECK(parser, '['))
eb9cceb5
KS
995 return yaml_parser_fetch_flow_collection_start(parser,
996 YAML_FLOW_SEQUENCE_START_TOKEN);
f2b59d4d
KS
997
998 /* Is it the flow mapping start indicator? */
999
1000 if (CHECK(parser, '{'))
eb9cceb5
KS
1001 return yaml_parser_fetch_flow_collection_start(parser,
1002 YAML_FLOW_MAPPING_START_TOKEN);
f2b59d4d
KS
1003
1004 /* Is it the flow sequence end indicator? */
1005
1006 if (CHECK(parser, ']'))
eb9cceb5
KS
1007 return yaml_parser_fetch_flow_collection_end(parser,
1008 YAML_FLOW_SEQUENCE_END_TOKEN);
f2b59d4d
KS
1009
1010 /* Is it the flow mapping end indicator? */
1011
1012 if (CHECK(parser, '}'))
eb9cceb5
KS
1013 return yaml_parser_fetch_flow_collection_end(parser,
1014 YAML_FLOW_MAPPING_END_TOKEN);
f2b59d4d
KS
1015
1016 /* Is it the flow entry indicator? */
1017
1018 if (CHECK(parser, ','))
1019 return yaml_parser_fetch_flow_entry(parser);
1020
1021 /* Is it the block entry indicator? */
1022
1023 if (CHECK(parser, '-') && IS_BLANKZ_AT(parser, 1))
1024 return yaml_parser_fetch_block_entry(parser);
1025
1026 /* Is it the key indicator? */
1027
1028 if (CHECK(parser, '?') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1)))
1029 return yaml_parser_fetch_key(parser);
1030
1031 /* Is it the value indicator? */
1032
1033 if (CHECK(parser, ':') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1)))
1034 return yaml_parser_fetch_value(parser);
1035
1036 /* Is it an alias? */
1037
1038 if (CHECK(parser, '*'))
eb9cceb5 1039 return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN);
f2b59d4d
KS
1040
1041 /* Is it an anchor? */
1042
1043 if (CHECK(parser, '&'))
eb9cceb5 1044 return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN);
f2b59d4d
KS
1045
1046 /* Is it a tag? */
1047
1048 if (CHECK(parser, '!'))
1049 return yaml_parser_fetch_tag(parser);
1050
1051 /* Is it a literal scalar? */
1052
1053 if (CHECK(parser, '|') && !parser->flow_level)
1054 return yaml_parser_fetch_block_scalar(parser, 1);
1055
1056 /* Is it a folded scalar? */
1057
1058 if (CHECK(parser, '>') && !parser->flow_level)
1059 return yaml_parser_fetch_block_scalar(parser, 0);
1060
1061 /* Is it a single-quoted scalar? */
1062
1063 if (CHECK(parser, '\''))
1064 return yaml_parser_fetch_flow_scalar(parser, 1);
1065
1066 /* Is it a double-quoted scalar? */
1067
1068 if (CHECK(parser, '"'))
1069 return yaml_parser_fetch_flow_scalar(parser, 0);
1070
1071 /*
1072 * Is it a plain scalar?
1073 *
1074 * A plain scalar may start with any non-blank characters except
1075 *
1076 * '-', '?', ':', ',', '[', ']', '{', '}',
1077 * '#', '&', '*', '!', '|', '>', '\'', '\"',
1078 * '%', '@', '`'.
1079 *
1080 * In the block context, it may also start with the characters
1081 *
1082 * '-', '?', ':'
1083 *
1084 * if it is followed by a non-space character.
1085 *
1086 * The last rule is more restrictive than the specification requires.
1087 */
1088
1089 if (!(IS_BLANKZ(parser) || CHECK(parser, '-') || CHECK(parser, '?')
1090 || CHECK(parser, ':') || CHECK(parser, ',') || CHECK(parser, '[')
1091 || CHECK(parser, ']') || CHECK(parser, '{') || CHECK(parser, '}')
1092 || CHECK(parser, '#') || CHECK(parser, '&') || CHECK(parser, '*')
1093 || CHECK(parser, '!') || CHECK(parser, '|') || CHECK(parser, '>')
1094 || CHECK(parser, '\'') || CHECK(parser, '"') || CHECK(parser, '%')
1095 || CHECK(parser, '@') || CHECK(parser, '`')) ||
1096 (!parser->flow_level &&
1097 (CHECK(parser, '-') || CHECK(parser, '?') || CHECK(parser, ':')) &&
1098 IS_BLANKZ_AT(parser, 1)))
1099 return yaml_parser_fetch_plain_scalar(parser);
1100
1101 /*
1102 * If we don't determine the token type so far, it is an error.
1103 */
1104
1105 return yaml_parser_set_scanner_error(parser, "while scanning for the next token",
1106 yaml_parser_get_mark(parser), "found character that cannot start any token");
1107}
1108
eb9cceb5
KS
1109/*
1110 * Check the list of potential simple keys and remove the positions that
1111 * cannot contain simple keys anymore.
1112 */
1113
1114static int
1115yaml_parser_stale_simple_keys(yaml_parser_t *parser)
1116{
1117 int level;
1118
1119 /* Check for a potential simple key for each flow level. */
1120
1121 for (level = 0; level <= parser->flow_level; level++)
1122 {
1123 yaml_simple_key_t *simple_key = parser->simple_keys[level];
1124
1125 /*
1126 * The specification requires that a simple key
1127 *
1128 * - is limited to a single line,
1129 * - is shorter than 1024 characters.
1130 */
1131
1132 if (simple_key && (simple_key->line < parser->line ||
1133 simple_key->index < parser->index+1024)) {
1134
1135 /* Check if the potential simple key to be removed is required. */
1136
1137 if (simple_key->required) {
1138 return yaml_parser_set_scanner_error(parser,
1139 "while scanning a simple key", simple_key->mark,
1140 "could not found expected ':'");
1141 }
1142
1143 yaml_free(simple_key);
1144 parser->simple_keys[level] = NULL;
1145 }
1146 }
1147
1148 return 1;
1149}
1150
1151/*
1152 * Check if a simple key may start at the current position and add it if
1153 * needed.
1154 */
1155
1156static int
1157yaml_parser_save_simple_key(yaml_parser_t *parser)
1158{
1159 /*
1160 * A simple key is required at the current position if the scanner is in
1161 * the block context and the current column coincides with the indentation
1162 * level.
1163 */
1164
1165 int required = (!parser->flow_level && parser->indent == parser->column);
1166
1167 /*
1168 * A simple key is required only when it is the first token in the current
1169 * line. Therefore it is always allowed. But we add a check anyway.
1170 */
1171
1172 assert(parser->simple_key_allowed || !required); /* Impossible. */
1173
1174 /*
1175 * If the current position may start a simple key, save it.
1176 */
1177
1178 if (parser->simple_key_allowed)
1179 {
1180 yaml_simple_key_t simple_key = { required,
1181 parser->tokens_parsed + parser->tokens_tail - parser->tokens_head,
1182 parser->index, parser->line, parser->column,
1183 yaml_parser_get_mark(parser) };
1184
1185 if (!yaml_parser_remove_simple_key(parser)) return 0;
1186
1187 parser->simple_keys[parser->flow_level] =
1188 yaml_malloc(sizeof(yaml_simple_key_t));
1189 if (!parser->simple_keys[parser->flow_level]) {
1190 parser->error = YAML_MEMORY_ERROR;
1191 return 0;
1192 }
1193
1194 *(parser->simple_keys[parser->flow_level]) = simple_key;
1195 }
1196
1197 return 1;
1198}
1199
1200/*
1201 * Remove a potential simple key at the current flow level.
1202 */
1203
1204static int
1205yaml_parser_remove_simple_key(yaml_parser_t *parser)
1206{
1207 yaml_simple_key_t *simple_key = parser->simple_keys[parser->flow_level];
1208
1209 if (simple_key)
1210 {
1211 /* If the key is required, it is an error. */
1212
1213 if (simple_key->required) {
1214 return yaml_parser_set_scanner_error(parser,
1215 "while scanning a simple key", simple_key->mark,
1216 "could not found expected ':'");
1217 }
1218
1219 /* Remove the key from the list. */
1220
1221 yaml_free(simple_key);
1222 parser->simple_keys[parser->flow_level] = NULL;
1223 }
1224
1225 return 1;
1226}
1227
1228/*
1229 * Increase the flow level and resize the simple key list if needed.
1230 */
1231
1232static int
1233yaml_parser_increase_flow_level(yaml_parser_t *parser)
1234{
1235 /* Check if we need to resize the list. */
1236
1237 if (parser->flow_level == parser->simple_keys_size-1)
1238 {
1239 yaml_simple_key_t **new_simple_keys =
1240 yaml_realloc(parser->simple_keys,
1241 sizeof(yaml_simple_key_t *) * parser->simple_keys_size * 2);
1242
1243 if (!new_simple_keys) {
1244 parser->error = YAML_MEMORY_ERROR;
1245 return 0;
1246 }
1247
1248 memset(new_simple_keys+parser->simple_keys_size, 0,
1249 sizeof(yaml_simple_key_t *)*parser->simple_keys_size);
1250
1251 parser->simple_keys = new_simple_keys;
1252 parser->simple_keys_size *= 2;
1253 }
1254
1255 /* Increase the flow level and reset the simple key. */
1256
1257 parser->simple_keys[++parser->flow_level] = NULL;
1258
1259 return 1;
1260}
1261
1262/*
1263 * Decrease the flow level.
1264 */
1265
1266static int
1267yaml_parser_decrease_flow_level(yaml_parser_t *parser)
1268{
1269 assert(parser->flow_level); /* Greater than 0. */
1270 assert(!parser->simple_keys[parser->flow_level]); /* Must be removed. */
1271
1272 parser->flow_level --;
1273
1274 return 1;
1275}
1276
1277/*
1278 * Add a token to the tail of the tokens queue.
1279 */
1280
1281static int
1282yaml_parser_append_token(yaml_parser_t *parser, yaml_token_t *token)
1283{
1284 return yaml_parser_insert_token(parser, -1, token);
1285}
1286
1287/*
1288 * Insert the token into the tokens queue. The number parameter is the
1289 * ordinal number of the token. If the number is equal to -1, add the token
1290 * to the tail of the queue.
1291 */
1292
1293static int
1294yaml_parser_insert_token(yaml_parser_t *parser,
1295 int number, yaml_token_t *token)
1296{
1297 /* The index of the token in the queue. */
1298
1299 int index = (number == -1)
1300 ? parser->tokens_tail - parser->tokens_head
1301 : number - parser->tokens_parsed;
1302
1303 assert(index >= 0 && index <= (parser->tokens_tail-parser->tokens_head));
1304
1305 /* Check if we need to resize the queue. */
1306
1307 if (parser->tokens_head == 0 && parser->tokens_tail == parser->tokens_size)
1308 {
1309 yaml_token_t **new_tokens = yaml_realloc(parser->tokens,
1310 sizeof(yaml_token_t *) * parser->tokens_size * 2);
1311
1312 if (!new_tokens) {
1313 parser->error = YAML_MEMORY_ERROR;
1314 return 0;
1315 }
1316
1317 memset(new_tokens+parser->tokens_size, 0,
1318 sizeof(yaml_token_t *)*parser->tokens_size);
1319
1320 parser->tokens = new_tokens;
1321 parser->tokens_size *= 2;
1322 }
1323
1324 /* Check if we need to move the queue to the beginning of the buffer. */
1325
1326 if (parser->tokens_tail == parser->tokens_size)
1327 {
1328 if (parser->tokens_head < parser->tokens_tail) {
1329 memmove(parser->tokens, parser->tokens+parser->tokens_head,
1330 sizeof(yaml_token_t *)*(parser->tokens_tail-parser->tokens_head));
1331 }
1332 parser->tokens_tail -= parser->tokens_head;
1333 parser->tokens_head = 0;
1334 }
1335
1336 /* Check if we need to free space within the queue. */
1337
1338 if (index < (parser->tokens_tail-parser->tokens_head)) {
1339 memmove(parser->tokens+parser->tokens_head+index+1,
1340 parser->tokens+parser->tokens_head+index,
1341 sizeof(yaml_token_t *)*(parser->tokens_tail-parser->tokens_head-index));
1342 }
1343
1344 /* Insert the token. */
1345
1346 parser->tokens[parser->tokens_head+index] = token;
1347 parser->tokens_tail ++;
1348
1349 return 1;
1350}
1351
1352/*
1353 * Push the current indentation level to the stack and set the new level
1354 * the current column is greater than the indentation level. In this case,
1355 * append or insert the specified token into the token queue.
1356 *
1357 */
1358
1359static int
1360yaml_parser_roll_indent(yaml_parser_t *parser, int column,
1361 int number, yaml_token_type_t type, yaml_mark_t mark)
1362{
1363 yaml_token_t *token;
1364
1365 /* In the flow context, do nothing. */
1366
1367 if (parser->flow_level)
1368 return 1;
1369
1370 if (parser->indent < column)
1371 {
1372 /* Check if we need to expand the indents stack. */
1373
1374 if (parser->indents_length == parser->indents_size)
1375 {
1376 int *new_indents = yaml_realloc(parser->indents,
1377 sizeof(int) * parser->indents_size * 2);
1378
1379 if (!new_indents) {
1380 parser->error = YAML_MEMORY_ERROR;
1381 return 0;
1382 }
1383
1384 memset(new_indents+parser->indents_size, 0,
1385 sizeof(int)*parser->indents_size);
1386
1387 parser->indents = new_indents;
1388 parser->indents_size *= 2;
1389 }
1390
1391 /*
1392 * Push the current indentation level to the stack and set the new
1393 * indentation level.
1394 */
1395
1396 parser->indents[parser->indents_length++] = parser->indent;
1397 parser->indent = column;
1398
1399 /* Create a token. */
1400
1401 token = yaml_token_new(type, mark, mark);
1402 if (!token) {
1403 parser->error = YAML_MEMORY_ERROR;
1404 return 0;
1405 }
1406
1407 /* Insert the token into the queue. */
1408
1409 if (!yaml_parser_insert_token(parser, number, token)) {
1410 yaml_token_delete(token);
1411 return 0;
1412 }
1413 }
1414
1415 return 1;
1416}
1417
1418/*
1419 * Pop indentation levels from the indents stack until the current level
1420 * becomes less or equal to the column. For each intendation level, append
1421 * the BLOCK-END token.
1422 */
1423
1424
1425static int
1426yaml_parser_unroll_indent(yaml_parser_t *parser, int column)
1427{
1428 yaml_token_t *token;
1429
1430 /* In the flow context, do nothing. */
1431
1432 if (parser->flow_level)
1433 return 1;
1434
1435 /* Loop through the intendation levels in the stack. */
1436
1437 while (parser->indent > column)
1438 {
1439 yaml_mark_t mark = yaml_parser_get_mark(parser);
1440
1441 /* Create a token. */
1442
1443 token = yaml_token_new(YAML_BLOCK_END_TOKEN, mark, mark);
1444 if (!token) {
1445 parser->error = YAML_MEMORY_ERROR;
1446 return 0;
1447 }
1448
1449 /* Append the token to the queue. */
1450
1451 if (!yaml_parser_append_token(parser, token)) {
1452 yaml_token_delete(token);
1453 return 0;
1454 }
1455
1456 /* Pop the indentation level. */
1457
1458 assert(parser->indents_length); /* Non-empty stack expected. */
1459
1460 parser->indent = parser->indents[--parser->indents_length];
1461 }
1462
1463 return 1;
1464}
1465
1466/*
1467 * Initialize the scanner and produce the STREAM-START token.
1468 */
1469
1470static int
1471yaml_parser_fetch_stream_start(yaml_parser_t *parser)
1472{
1473 yaml_mark_t mark = yaml_parser_get_mark(parser);
1474 yaml_token_t *token;
1475
1476 /* Set the initial indentation. */
1477
1478 parser->indent = -1;
1479
1480 /* A simple key is allowed at the beginning of the stream. */
1481
1482 parser->simple_key_allowed = 1;
1483
1484 /* We have started. */
1485
1486 parser->stream_start_produced = 1;
1487
1488 /* Create the STREAM-START token. */
1489
1490 token = yaml_stream_start_token_new(parser->encoding, mark, mark);
1491 if (!token) {
1492 parser->error = YAML_MEMORY_ERROR;
1493 return 0;
1494 }
1495
1496 /* Append the token to the queue. */
1497
1498 if (!yaml_parser_append_token(parser, token)) {
1499 yaml_token_delete(token);
1500 return 0;
1501 }
1502
1503 return 1;
1504}
1505
1506/*
1507 * Produce the STREAM-END token and shut down the scanner.
1508 */
1509
1510static int
1511yaml_parser_fetch_stream_end(yaml_parser_t *parser)
1512{
1513 yaml_mark_t mark = yaml_parser_get_mark(parser);
1514 yaml_token_t *token;
1515
1516 /* Reset the indentation level. */
1517
1518 if (!yaml_parser_unroll_indent(parser, -1))
1519 return 0;
1520
1521 /* We have finished. */
1522
1523 parser->stream_end_produced = 1;
1524
1525 /* Create the STREAM-END token. */
1526
1527 token = yaml_stream_end_token_new(mark, mark);
1528 if (!token) {
1529 parser->error = YAML_MEMORY_ERROR;
1530 return 0;
1531 }
1532
1533 /* Append the token to the queue. */
1534
1535 if (!yaml_parser_append_token(parser, token)) {
1536 yaml_token_delete(token);
1537 return 0;
1538 }
1539
1540 return 1;
1541}
1542
1543/*
1544 * Produce the YAML-DIRECTIVE or TAG-DIRECTIVE token.
1545 */
1546
1547static int
1548yaml_parser_fetch_directive(yaml_parser_t *parser)
1549{
1550 yaml_token_t *token;
1551
1552 /* Reset the indentation level. */
1553
1554 if (!yaml_parser_unroll_indent(parser, -1))
1555 return 0;
1556
1557 /* Reset simple keys. */
1558
1559 if (!yaml_parser_remove_simple_key(parser))
1560 return 0;
1561
1562 parser->simple_key_allowed = 0;
1563
1564 /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */
1565
1566 token = yaml_parser_scan_directive(parser);
1567 if (!token) return 0;
1568
1569 /* Append the token to the queue. */
1570
1571 if (!yaml_parser_append_token(parser, token)) {
1572 yaml_token_delete(token);
1573 return 0;
1574 }
1575
1576 return 1;
1577}
1578
1579/*
1580 * Produce the DOCUMENT-START or DOCUMENT-END token.
1581 */
1582
1583static int
1584yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
1585 yaml_token_type_t type)
1586{
1587 yaml_mark_t start_mark, end_mark;
1588 yaml_token_t *token;
1589
1590 /* Reset the indentation level. */
1591
1592 if (!yaml_parser_unroll_indent(parser, -1))
1593 return 0;
1594
1595 /* Reset simple keys. */
1596
1597 if (!yaml_parser_remove_simple_key(parser))
1598 return 0;
1599
1600 parser->simple_key_allowed = 0;
1601
1602 /* Consume the token. */
1603
1604 start_mark = yaml_parser_get_mark(parser);
1605
1606 FORWARD(parser);
1607 FORWARD(parser);
1608 FORWARD(parser);
1609
1610 end_mark = yaml_parser_get_mark(parser);
1611
1612 /* Create the DOCUMENT-START or DOCUMENT-END token. */
1613
1614 token = yaml_token_new(type, start_mark, end_mark);
1615 if (!token) {
1616 parser->error = YAML_MEMORY_ERROR;
1617 return 0;
1618 }
1619
1620 /* Append the token to the queue. */
1621
1622 if (!yaml_parser_append_token(parser, token)) {
1623 yaml_token_delete(token);
1624 return 0;
1625 }
1626
1627 return 1;
1628}
1629
1630/*
1631 * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
1632 */
1633
1634static int
1635yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
1636 yaml_token_type_t type)
1637{
1638 yaml_mark_t start_mark, end_mark;
1639 yaml_token_t *token;
1640
1641 /* The indicators '[' and '{' may start a simple key. */
1642
1643 if (!yaml_parser_save_simple_key(parser))
1644 return 0;
1645
1646 /* Increase the flow level. */
1647
1648 if (!yaml_parser_increase_flow_level(parser))
1649 return 0;
1650
1651 /* A simple key may follow the indicators '[' and '{'. */
1652
1653 parser->simple_key_allowed = 1;
1654
1655 /* Consume the token. */
1656
1657 start_mark = yaml_parser_get_mark(parser);
1658 FORWARD(parser);
1659 end_mark = yaml_parser_get_mark(parser);
1660
1661 /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */
1662
1663 token = yaml_token_new(type, start_mark, end_mark);
1664 if (!token) {
1665 parser->error = YAML_MEMORY_ERROR;
1666 return 0;
1667 }
1668
1669 /* Append the token to the queue. */
1670
1671 if (!yaml_parser_append_token(parser, token)) {
1672 yaml_token_delete(token);
1673 return 0;
1674 }
1675
1676 return 1;
1677}
1678
1679/*
1680 * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
1681 */
1682
1683static int
1684yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
1685 yaml_token_type_t type)
1686{
1687 yaml_mark_t start_mark, end_mark;
1688 yaml_token_t *token;
1689
1690 /* Reset any potential simple key on the current flow level. */
1691
1692 if (!yaml_parser_remove_simple_key(parser))
1693 return 0;
1694
1695 /* Decrease the flow level. */
1696
1697 if (!yaml_parser_decrease_flow_level(parser))
1698 return 0;
1699
1700 /* No simple keys after the indicators ']' and '}'. */
1701
1702 parser->simple_key_allowed = 0;
1703
1704 /* Consume the token. */
1705
1706 start_mark = yaml_parser_get_mark(parser);
1707 FORWARD(parser);
1708 end_mark = yaml_parser_get_mark(parser);
1709
1710 /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */
1711
1712 token = yaml_token_new(type, start_mark, end_mark);
1713 if (!token) {
1714 parser->error = YAML_MEMORY_ERROR;
1715 return 0;
1716 }
1717
1718 /* Append the token to the queue. */
1719
1720 if (!yaml_parser_append_token(parser, token)) {
1721 yaml_token_delete(token);
1722 return 0;
1723 }
1724
1725 return 1;
1726}
1727
1728/*
1729 * Produce the FLOW-ENTRY token.
1730 */
1731
1732static int
1733yaml_parser_fetch_flow_entry(yaml_parser_t *parser)
1734{
1735 yaml_mark_t start_mark, end_mark;
1736 yaml_token_t *token;
1737
1738 /* Reset any potential simple keys on the current flow level. */
1739
1740 if (!yaml_parser_remove_simple_key(parser))
1741 return 0;
1742
1743 /* Simple keys are allowed after ','. */
1744
1745 parser->simple_key_allowed = 1;
1746
1747 /* Consume the token. */
1748
1749 start_mark = yaml_parser_get_mark(parser);
1750 FORWARD(parser);
1751 end_mark = yaml_parser_get_mark(parser);
1752
1753 /* Create the FLOW-ENTRY token. */
1754
1755 token = yaml_token_new(YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark);
1756 if (!token) {
1757 parser->error = YAML_MEMORY_ERROR;
1758 return 0;
1759 }
1760
1761 /* Append the token to the queue. */
1762
1763 if (!yaml_parser_append_token(parser, token)) {
1764 yaml_token_delete(token);
1765 return 0;
1766 }
1767
1768 return 1;
1769}
1770
1771/*
1772 * Produce the BLOCK-ENTRY token.
1773 */
1774
1775static int
1776yaml_parser_fetch_block_entry(yaml_parser_t *parser)
1777{
1778 yaml_mark_t start_mark, end_mark;
1779 yaml_token_t *token;
1780
1781 /* Check if the scanner is in the block context. */
1782
1783 if (!parser->flow_level)
1784 {
1785 /* Check if we are allowed to start a new entry. */
1786
1787 if (!parser->simple_key_allowed) {
1788 return yaml_parser_set_scanner_error(parser, NULL,
1789 yaml_parser_get_mark(parser),
1790 "block sequence entries are not allowed in this context");
1791 }
1792
1793 /* Add the BLOCK-SEQUENCE-START token if needed. */
1794
1795 if (!yaml_parser_roll_indent(parser, parser->column, -1,
1796 YAML_BLOCK_SEQUENCE_START_TOKEN, yaml_parser_get_mark(parser)))
1797 return 0;
1798 }
1799 else
1800 {
1801 /*
1802 * It is an error for the '-' indicator to occur in the flow context,
1803 * but we let the Parser detect and report about it because the Parser
1804 * is able to point to the context.
1805 */
1806 }
1807
1808 /* Reset any potential simple keys on the current flow level. */
1809
1810 if (!yaml_parser_remove_simple_key(parser))
1811 return 0;
1812
1813 /* Simple keys are allowed after '-'. */
1814
1815 parser->simple_key_allowed = 1;
1816
1817 /* Consume the token. */
1818
1819 start_mark = yaml_parser_get_mark(parser);
1820 FORWARD(parser);
1821 end_mark = yaml_parser_get_mark(parser);
1822
1823 /* Create the BLOCK-ENTRY token. */
1824
1825 token = yaml_token_new(YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark);
1826 if (!token) {
1827 parser->error = YAML_MEMORY_ERROR;
1828 return 0;
1829 }
1830
1831 /* Append the token to the queue. */
1832
1833 if (!yaml_parser_append_token(parser, token)) {
1834 yaml_token_delete(token);
1835 return 0;
1836 }
1837
1838 return 1;
1839}
1840
1841/*
1842 * Produce the KEY token.
1843 */
1844
1845static int
1846yaml_parser_fetch_key(yaml_parser_t *parser)
1847{
1848 yaml_mark_t start_mark, end_mark;
1849 yaml_token_t *token;
1850
1851 /* In the block context, additional checks are required. */
1852
1853 if (!parser->flow_level)
1854 {
1855 /* Check if we are allowed to start a new key (not nessesary simple). */
1856
1857 if (!parser->simple_key_allowed) {
1858 return yaml_parser_set_scanner_error(parser, NULL,
1859 yaml_parser_get_mark(parser),
1860 "mapping keys are not allowed in this context");
1861 }
1862
1863 /* Add the BLOCK-MAPPING-START token if needed. */
1864
1865 if (!yaml_parser_roll_indent(parser, parser->column, -1,
1866 YAML_BLOCK_MAPPING_START_TOKEN, yaml_parser_get_mark(parser)))
1867 return 0;
1868 }
1869
1870 /* Reset any potential simple keys on the current flow level. */
1871
1872 if (!yaml_parser_remove_simple_key(parser))
1873 return 0;
1874
1875 /* Simple keys are allowed after '?' in the block context. */
1876
1877 parser->simple_key_allowed = (!parser->flow_level);
1878
1879 /* Consume the token. */
1880
1881 start_mark = yaml_parser_get_mark(parser);
1882 FORWARD(parser);
1883 end_mark = yaml_parser_get_mark(parser);
1884
1885 /* Create the KEY token. */
1886
1887 token = yaml_token_new(YAML_KEY_TOKEN, start_mark, end_mark);
1888 if (!token) {
1889 parser->error = YAML_MEMORY_ERROR;
1890 return 0;
1891 }
1892
1893 /* Append the token to the queue. */
1894
1895 if (!yaml_parser_append_token(parser, token)) {
1896 yaml_token_delete(token);
1897 return 0;
1898 }
1899
1900 return 1;
1901}
1902
1903/*
1904 * Produce the VALUE token.
1905 */
1906
1907static int
1908yaml_parser_fetch_value(yaml_parser_t *parser)
1909{
1910 yaml_mark_t start_mark, end_mark;
1911 yaml_token_t *token;
1912
1913 /* Have we found a simple key? */
1914
1915 if (parser->simple_keys[parser->flow_level])
1916 {
1917 yaml_simple_key_t *simple_key = parser->simple_keys[parser->flow_level];
1918
1919 /* Create the KEY token. */
1920
1921 token = yaml_token_new(YAML_KEY_TOKEN, simple_key->mark, simple_key->mark);
1922 if (!token) {
1923 parser->error = YAML_MEMORY_ERROR;
1924 return 0;
1925 }
1926
1927 /* Insert the token into the queue. */
1928
1929 if (!yaml_parser_insert_token(parser, simple_key->token_number, token)) {
1930 yaml_token_delete(token);
1931 return 0;
1932 }
1933
1934 /* In the block context, we may need to add the BLOCK-MAPPING-START token. */
1935
1936 if (!yaml_parser_roll_indent(parser, parser->column,
1937 simple_key->token_number,
1938 YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark))
1939 return 0;
1940
1941 /* Remove the simple key from the list. */
1942
1943 if (!yaml_parser_remove_simple_key(parser)) return 0;
1944
1945 /* A simple key cannot follow another simple key. */
1946
1947 parser->simple_key_allowed = 0;
1948 }
1949 else
1950 {
1951 /* The ':' indicator follows a complex key. */
1952
1953 /* In the block context, extra checks are required. */
1954
1955 if (!parser->flow_level)
1956 {
1957 /* Check if we are allowed to start a complex value. */
1958
1959 if (!parser->simple_key_allowed) {
1960 return yaml_parser_set_scanner_error(parser, NULL,
1961 yaml_parser_get_mark(parser),
1962 "mapping values are not allowed in this context");
1963 }
1964
1965 /* Add the BLOCK-MAPPING-START token if needed. */
1966
1967 if (!yaml_parser_roll_indent(parser, parser->column, -1,
1968 YAML_BLOCK_MAPPING_START_TOKEN, yaml_parser_get_mark(parser)))
1969 return 0;
1970 }
1971
1972 /* Remove a potential simple key from the list. */
1973
1974 if (!yaml_parser_remove_simple_key(parser)) return 0;
1975
1976 /* Simple keys after ':' are allowed in the block context. */
1977
1978 parser->simple_key_allowed = (!parser->flow_level);
1979 }
1980
1981 /* Consume the token. */
1982
1983 start_mark = yaml_parser_get_mark(parser);
1984 FORWARD(parser);
1985 end_mark = yaml_parser_get_mark(parser);
1986
1987 /* Create the VALUE token. */
1988
1989 token = yaml_token_new(YAML_VALUE_TOKEN, start_mark, end_mark);
1990 if (!token) {
1991 parser->error = YAML_MEMORY_ERROR;
1992 return 0;
1993 }
1994
1995 /* Append the token to the queue. */
1996
1997 if (!yaml_parser_append_token(parser, token)) {
1998 yaml_token_delete(token);
1999 return 0;
2000 }
2001
2002 return 1;
2003}
2004
2005/*
2006 * Produce the ALIAS or ANCHOR token.
2007 */
2008
2009static int
2010yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type)
2011{
2012 yaml_token_t *token;
2013
2014 /* An anchor or an alias could be a simple key. */
2015
2016 if (!yaml_parser_save_simple_key(parser))
2017 return 0;
2018
2019 /* A simple key cannot follow an anchor or an alias. */
2020
2021 parser->simple_key_allowed = 0;
2022
2023 /* Create the ALIAS or ANCHOR token. */
2024
2025 token = yaml_parser_scan_anchor(parser, type);
2026 if (!token) return 0;
2027
2028 /* Append the token to the queue. */
2029
2030 if (!yaml_parser_append_token(parser, token)) {
2031 yaml_token_delete(token);
2032 return 0;
2033 }
2034
2035 return 1;
2036}
2037
2038/*
2039 * Produce the TAG token.
2040 */
2041
2042static int
2043yaml_parser_fetch_tag(yaml_parser_t *parser)
2044{
2045 yaml_token_t *token;
2046
2047 /* A tag could be a simple key. */
2048
2049 if (!yaml_parser_save_simple_key(parser))
2050 return 0;
2051
2052 /* A simple key cannot follow a tag. */
2053
2054 parser->simple_key_allowed = 0;
2055
2056 /* Create the TAG token. */
2057
2058 token = yaml_parser_scan_tag(parser);
2059 if (!token) return 0;
2060
2061 /* Append the token to the queue. */
2062
2063 if (!yaml_parser_append_token(parser, token)) {
2064 yaml_token_delete(token);
2065 return 0;
2066 }
2067
2068 return 1;
2069}
2070
2071/*
2072 * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
2073 */
2074
2075static int
2076yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal)
2077{
2078 yaml_token_t *token;
2079
2080 /* Remove any potential simple keys. */
2081
2082 if (!yaml_parser_remove_simple_key(parser))
2083 return 0;
2084
2085 /* A simple key may follow a block scalar. */
2086
2087 parser->simple_key_allowed = 1;
2088
2089 /* Create the SCALAR token. */
2090
2091 token = yaml_parser_scan_block_scalar(parser, literal);
2092 if (!token) return 0;
2093
2094 /* Append the token to the queue. */
2095
2096 if (!yaml_parser_append_token(parser, token)) {
2097 yaml_token_delete(token);
2098 return 0;
2099 }
2100
2101 return 1;
2102}
2103
2104/*
2105 * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
2106 */
2107
2108static int
2109yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single)
2110{
2111 yaml_token_t *token;
2112
2113 /* A plain scalar could be a simple key. */
2114
2115 if (!yaml_parser_save_simple_key(parser))
2116 return 0;
2117
2118 /* A simple key cannot follow a flow scalar. */
2119
2120 parser->simple_key_allowed = 0;
2121
2122 /* Create the SCALAR token. */
2123
2124 token = yaml_parser_scan_flow_scalar(parser, single);
2125 if (!token) return 0;
2126
2127 /* Append the token to the queue. */
2128
2129 if (!yaml_parser_append_token(parser, token)) {
2130 yaml_token_delete(token);
2131 return 0;
2132 }
2133
2134 return 1;
2135}
2136
2137/*
2138 * Produce the SCALAR(...,plain) token.
2139 */
2140
2141static int
2142yaml_parser_fetch_plain_scalar(yaml_parser_t *parser)
2143{
2144 yaml_token_t *token;
2145
2146 /* A plain scalar could be a simple key. */
2147
2148 if (!yaml_parser_save_simple_key(parser))
2149 return 0;
2150
2151 /* A simple key cannot follow a flow scalar. */
2152
2153 parser->simple_key_allowed = 0;
2154
2155 /* Create the SCALAR token. */
2156
2157 token = yaml_parser_scan_plain_scalar(parser);
2158 if (!token) return 0;
2159
2160 /* Append the token to the queue. */
2161
2162 if (!yaml_parser_append_token(parser, token)) {
2163 yaml_token_delete(token);
2164 return 0;
2165 }
2166
2167 return 1;
2168}
2169
This page took 1.811664 seconds and 5 git commands to generate.