]> andersk Git - libyaml.git/blame - src/scanner.c
Implement the block scalar scanner.
[libyaml.git] / src / scanner.c
CommitLineData
03be97ab
KS
1
2/*
3 * Introduction
4 * ************
5 *
6 * The following notes assume that you are familiar with the YAML specification
7 * (http://yaml.org/spec/cvs/current.html). We mostly follow it, although in
8 * some cases we are less restrictive that it requires.
9 *
10 * The process of transforming a YAML stream into a sequence of events is
11 * divided on two steps: Scanning and Parsing.
12 *
13 * The Scanner transforms the input stream into a sequence of tokens, while the
14 * parser transform the sequence of tokens produced by the Scanner into a
15 * sequence of parsing events.
16 *
17 * The Scanner is rather clever and complicated. The Parser, on the contrary,
18 * is a straightforward implementation of a recursive-descendant parser (or,
19 * LL(1) parser, as it is usually called).
20 *
21 * Actually there are two issues of Scanning that might be called "clever", the
22 * rest is quite straightforward. The issues are "block collection start" and
23 * "simple keys". Both issues are explained below in details.
24 *
25 * Here the Scanning step is explained and implemented. We start with the list
26 * of all the tokens produced by the Scanner together with short descriptions.
27 *
28 * Now, tokens:
29 *
30 * STREAM-START(encoding) # The stream start.
31 * STREAM-END # The stream end.
32 * VERSION-DIRECTIVE(major,minor) # The '%YAML' directive.
33 * TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive.
34 * DOCUMENT-START # '---'
35 * DOCUMENT-END # '...'
36 * BLOCK-SEQUENCE-START # Indentation increase denoting a block
37 * BLOCK-MAPPING-START # sequence or a block mapping.
38 * BLOCK-END # Indentation decrease.
39 * FLOW-SEQUENCE-START # '['
40 * FLOW-SEQUENCE-END # ']'
41 * BLOCK-SEQUENCE-START # '{'
42 * BLOCK-SEQUENCE-END # '}'
43 * BLOCK-ENTRY # '-'
44 * FLOW-ENTRY # ','
45 * KEY # '?' or nothing (simple keys).
46 * VALUE # ':'
47 * ALIAS(anchor) # '*anchor'
48 * ANCHOR(anchor) # '&anchor'
49 * TAG(handle,suffix) # '!handle!suffix'
50 * SCALAR(value,style) # A scalar.
51 *
52 * The following two tokens are "virtual" tokens denoting the beginning and the
53 * end of the stream:
54 *
55 * STREAM-START(encoding)
56 * STREAM-END
57 *
58 * We pass the information about the input stream encoding with the
59 * STREAM-START token.
60 *
61 * The next two tokens are responsible for tags:
62 *
63 * VERSION-DIRECTIVE(major,minor)
64 * TAG-DIRECTIVE(handle,prefix)
65 *
66 * Example:
67 *
68 * %YAML 1.1
69 * %TAG ! !foo
70 * %TAG !yaml! tag:yaml.org,2002:
71 * ---
72 *
73 * The correspoding sequence of tokens:
74 *
75 * STREAM-START(utf-8)
76 * VERSION-DIRECTIVE(1,1)
77 * TAG-DIRECTIVE("!","!foo")
78 * TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
79 * DOCUMENT-START
80 * STREAM-END
81 *
82 * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
83 * line.
84 *
85 * The document start and end indicators are represented by:
86 *
87 * DOCUMENT-START
88 * DOCUMENT-END
89 *
90 * Note that if a YAML stream contains an implicit document (without '---'
91 * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
92 * produced.
93 *
94 * In the following examples, we present whole documents together with the
95 * produced tokens.
96 *
97 * 1. An implicit document:
98 *
99 * 'a scalar'
100 *
101 * Tokens:
102 *
103 * STREAM-START(utf-8)
104 * SCALAR("a scalar",single-quoted)
105 * STREAM-END
106 *
107 * 2. An explicit document:
108 *
109 * ---
110 * 'a scalar'
111 * ...
112 *
113 * Tokens:
114 *
115 * STREAM-START(utf-8)
116 * DOCUMENT-START
117 * SCALAR("a scalar",single-quoted)
118 * DOCUMENT-END
119 * STREAM-END
120 *
121 * 3. Several documents in a stream:
122 *
123 * 'a scalar'
124 * ---
125 * 'another scalar'
126 * ---
127 * 'yet another scalar'
128 *
129 * Tokens:
130 *
131 * STREAM-START(utf-8)
132 * SCALAR("a scalar",single-quoted)
133 * DOCUMENT-START
134 * SCALAR("another scalar",single-quoted)
135 * DOCUMENT-START
136 * SCALAR("yet another scalar",single-quoted)
137 * STREAM-END
138 *
139 * We have already introduced the SCALAR token above. The following tokens are
140 * used to describe aliases, anchors, tag, and scalars:
141 *
142 * ALIAS(anchor)
143 * ANCHOR(anchor)
144 * TAG(handle,suffix)
145 * SCALAR(value,style)
146 *
147 * The following series of examples illustrate the usage of these tokens:
148 *
149 * 1. A recursive sequence:
150 *
151 * &A [ *A ]
152 *
153 * Tokens:
154 *
155 * STREAM-START(utf-8)
156 * ANCHOR("A")
157 * FLOW-SEQUENCE-START
158 * ALIAS("A")
159 * FLOW-SEQUENCE-END
160 * STREAM-END
161 *
162 * 2. A tagged scalar:
163 *
164 * !!float "3.14" # A good approximation.
165 *
166 * Tokens:
167 *
168 * STREAM-START(utf-8)
169 * TAG("!!","float")
170 * SCALAR("3.14",double-quoted)
171 * STREAM-END
172 *
173 * 3. Various scalar styles:
174 *
175 * --- # Implicit empty plain scalars do not produce tokens.
176 * --- a plain scalar
177 * --- 'a single-quoted scalar'
178 * --- "a double-quoted scalar"
179 * --- |-
180 * a literal scalar
181 * --- >-
182 * a folded
183 * scalar
184 *
185 * Tokens:
186 *
187 * STREAM-START(utf-8)
188 * DOCUMENT-START
189 * DOCUMENT-START
190 * SCALAR("a plain scalar",plain)
191 * DOCUMENT-START
192 * SCALAR("a single-quoted scalar",single-quoted)
193 * DOCUMENT-START
194 * SCALAR("a double-quoted scalar",double-quoted)
195 * DOCUMENT-START
196 * SCALAR("a literal scalar",literal)
197 * DOCUMENT-START
198 * SCALAR("a folded scalar",folded)
199 * STREAM-END
200 *
201 * Now it's time to review collection-related tokens. We will start with
202 * flow collections:
203 *
204 * FLOW-SEQUENCE-START
205 * FLOW-SEQUENCE-END
206 * FLOW-MAPPING-START
207 * FLOW-MAPPING-END
208 * FLOW-ENTRY
209 * KEY
210 * VALUE
211 *
212 * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
213 * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
214 * correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the
215 * indicators '?' and ':', which are used for denoting mapping keys and values,
216 * are represented by the KEY and VALUE tokens.
217 *
218 * The following examples show flow collections:
219 *
220 * 1. A flow sequence:
221 *
222 * [item 1, item 2, item 3]
223 *
224 * Tokens:
225 *
226 * STREAM-START(utf-8)
227 * FLOW-SEQUENCE-START
228 * SCALAR("item 1",plain)
229 * FLOW-ENTRY
230 * SCALAR("item 2",plain)
231 * FLOW-ENTRY
232 * SCALAR("item 3",plain)
233 * FLOW-SEQUENCE-END
234 * STREAM-END
235 *
236 * 2. A flow mapping:
237 *
238 * {
239 * a simple key: a value, # Note that the KEY token is produced.
240 * ? a complex key: another value,
241 * }
242 *
243 * Tokens:
244 *
245 * STREAM-START(utf-8)
246 * FLOW-MAPPING-START
247 * KEY
248 * SCALAR("a simple key",plain)
249 * VALUE
250 * SCALAR("a value",plain)
251 * FLOW-ENTRY
252 * KEY
253 * SCALAR("a complex key",plain)
254 * VALUE
255 * SCALAR("another value",plain)
256 * FLOW-ENTRY
257 * FLOW-MAPPING-END
258 * STREAM-END
259 *
260 * A simple key is a key which is not denoted by the '?' indicator. Note that
261 * the Scanner still produce the KEY token whenever it encounters a simple key.
262 *
263 * For scanning block collections, the following tokens are used (note that we
264 * repeat KEY and VALUE here):
265 *
266 * BLOCK-SEQUENCE-START
267 * BLOCK-MAPPING-START
268 * BLOCK-END
269 * BLOCK-ENTRY
270 * KEY
271 * VALUE
272 *
273 * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
274 * increase that precedes a block collection (cf. the INDENT token in Python).
275 * The token BLOCK-END denote indentation decrease that ends a block collection
276 * (cf. the DEDENT token in Python). However YAML has some syntax pecularities
277 * that makes detections of these tokens more complex.
278 *
279 * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
280 * '-', '?', and ':' correspondingly.
281 *
282 * The following examples show how the tokens BLOCK-SEQUENCE-START,
283 * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
284 *
285 * 1. Block sequences:
286 *
287 * - item 1
288 * - item 2
289 * -
290 * - item 3.1
291 * - item 3.2
292 * -
293 * key 1: value 1
294 * key 2: value 2
295 *
296 * Tokens:
297 *
298 * STREAM-START(utf-8)
299 * BLOCK-SEQUENCE-START
300 * BLOCK-ENTRY
301 * SCALAR("item 1",plain)
302 * BLOCK-ENTRY
303 * SCALAR("item 2",plain)
304 * BLOCK-ENTRY
305 * BLOCK-SEQUENCE-START
306 * BLOCK-ENTRY
307 * SCALAR("item 3.1",plain)
308 * BLOCK-ENTRY
309 * SCALAR("item 3.2",plain)
310 * BLOCK-END
311 * BLOCK-ENTRY
312 * BLOCK-MAPPING-START
313 * KEY
314 * SCALAR("key 1",plain)
315 * VALUE
316 * SCALAR("value 1",plain)
317 * KEY
318 * SCALAR("key 2",plain)
319 * VALUE
320 * SCALAR("value 2",plain)
321 * BLOCK-END
322 * BLOCK-END
323 * STREAM-END
324 *
325 * 2. Block mappings:
326 *
327 * a simple key: a value # The KEY token is produced here.
328 * ? a complex key
329 * : another value
330 * a mapping:
331 * key 1: value 1
332 * key 2: value 2
333 * a sequence:
334 * - item 1
335 * - item 2
336 *
337 * Tokens:
338 *
339 * STREAM-START(utf-8)
340 * BLOCK-MAPPING-START
341 * KEY
342 * SCALAR("a simple key",plain)
343 * VALUE
344 * SCALAR("a value",plain)
345 * KEY
346 * SCALAR("a complex key",plain)
347 * VALUE
348 * SCALAR("another value",plain)
349 * KEY
350 * SCALAR("a mapping",plain)
351 * BLOCK-MAPPING-START
352 * KEY
353 * SCALAR("key 1",plain)
354 * VALUE
355 * SCALAR("value 1",plain)
356 * KEY
357 * SCALAR("key 2",plain)
358 * VALUE
359 * SCALAR("value 2",plain)
360 * BLOCK-END
361 * KEY
362 * SCALAR("a sequence",plain)
363 * VALUE
364 * BLOCK-SEQUENCE-START
365 * BLOCK-ENTRY
366 * SCALAR("item 1",plain)
367 * BLOCK-ENTRY
368 * SCALAR("item 2",plain)
369 * BLOCK-END
370 * BLOCK-END
371 * STREAM-END
372 *
373 * YAML does not always require to start a new block collection from a new
374 * line. If the current line contains only '-', '?', and ':' indicators, a new
375 * block collection may start at the current line. The following examples
376 * illustrate this case:
377 *
378 * 1. Collections in a sequence:
379 *
380 * - - item 1
381 * - item 2
382 * - key 1: value 1
383 * key 2: value 2
384 * - ? complex key
385 * : complex value
386 *
387 * Tokens:
388 *
389 * STREAM-START(utf-8)
390 * BLOCK-SEQUENCE-START
391 * BLOCK-ENTRY
392 * BLOCK-SEQUENCE-START
393 * BLOCK-ENTRY
394 * SCALAR("item 1",plain)
395 * BLOCK-ENTRY
396 * SCALAR("item 2",plain)
397 * BLOCK-END
398 * BLOCK-ENTRY
399 * BLOCK-MAPPING-START
400 * KEY
401 * SCALAR("key 1",plain)
402 * VALUE
403 * SCALAR("value 1",plain)
404 * KEY
405 * SCALAR("key 2",plain)
406 * VALUE
407 * SCALAR("value 2",plain)
408 * BLOCK-END
409 * BLOCK-ENTRY
410 * BLOCK-MAPPING-START
411 * KEY
412 * SCALAR("complex key")
413 * VALUE
414 * SCALAR("complex value")
415 * BLOCK-END
416 * BLOCK-END
417 * STREAM-END
418 *
419 * 2. Collections in a mapping:
420 *
421 * ? a sequence
422 * : - item 1
423 * - item 2
424 * ? a mapping
425 * : key 1: value 1
426 * key 2: value 2
427 *
428 * Tokens:
429 *
430 * STREAM-START(utf-8)
431 * BLOCK-MAPPING-START
432 * KEY
433 * SCALAR("a sequence",plain)
434 * VALUE
435 * BLOCK-SEQUENCE-START
436 * BLOCK-ENTRY
437 * SCALAR("item 1",plain)
438 * BLOCK-ENTRY
439 * SCALAR("item 2",plain)
440 * BLOCK-END
441 * KEY
442 * SCALAR("a mapping",plain)
443 * VALUE
444 * BLOCK-MAPPING-START
445 * KEY
446 * SCALAR("key 1",plain)
447 * VALUE
448 * SCALAR("value 1",plain)
449 * KEY
450 * SCALAR("key 2",plain)
451 * VALUE
452 * SCALAR("value 2",plain)
453 * BLOCK-END
454 * BLOCK-END
455 * STREAM-END
456 *
457 * YAML also permits non-indented sequences if they are included into a block
458 * mapping. In this case, the token BLOCK-SEQUENCE-START is not produced:
459 *
460 * key:
461 * - item 1 # BLOCK-SEQUENCE-START is NOT produced here.
462 * - item 2
463 *
464 * Tokens:
465 *
466 * STREAM-START(utf-8)
467 * BLOCK-MAPPING-START
468 * KEY
469 * SCALAR("key",plain)
470 * VALUE
471 * BLOCK-ENTRY
472 * SCALAR("item 1",plain)
473 * BLOCK-ENTRY
474 * SCALAR("item 2",plain)
475 * BLOCK-END
476 */
477
478#if HAVE_CONFIG_H
479#include <config.h>
480#endif
481
482#include <yaml/yaml.h>
483
484#include <assert.h>
485
f2b59d4d
KS
486/*
487 * Ensure that the buffer contains the required number of characters.
488 * Return 1 on success, 0 on failure (reader error or memory error).
489 */
490
491#define UPDATE(parser,length) \
492 (parser->unread >= (length) \
493 ? 1 \
494 : yaml_parser_update_buffer(parser, (length)))
495
496/*
497 * Check the octet at the specified position.
498 */
499
500#define CHECK_AT(parser,octet,offset) \
eb9cceb5 501 (parser->pointer[offset] == (yaml_char_t)(octet))
f2b59d4d
KS
502
503/*
504 * Check the current octet in the buffer.
505 */
506
507#define CHECK(parser,octet) CHECK_AT(parser,(octet),0)
508
e71095e3
KS
509/*
510 * Check if the character at the specified position is an alphabetical
511 * character, a digit, '_', or '-'.
512 */
513
514#define IS_ALPHA_AT(parser,offset) \
515 ((parser->pointer[offset] >= (yaml_char_t) '0' && \
516 parser->pointer[offset] <= (yaml_char_t) '9') || \
517 (parser->pointer[offset] >= (yaml_char_t) 'A' && \
518 parser->pointer[offset] <= (yaml_char_t) 'Z') || \
519 (parser->pointer[offset] >= (yaml_char_t) 'a' && \
520 parser->pointer[offset] <= (yaml_char_t) 'z') || \
521 parser->pointer[offset] == '_' || \
522 parser->pointer[offset] == '-')
523
524#define IS_ALPHA(parser) IS_ALPHA_AT(parser,0)
525
526/*
527 * Check if the character at the specified position is a digit.
528 */
529
530#define IS_DIGIT_AT(parser,offset) \
531 ((parser->pointer[offset] >= (yaml_char_t) '0' && \
532 parser->pointer[offset] <= (yaml_char_t) '9'))
533
534#define IS_DIGIT(parser) IS_DIGIT_AT(parser,0)
535
536/*
537 * Get the value of a digit.
538 */
539
540#define AS_DIGIT_AT(parser,offset) \
541 (parser->pointer[offset] - (yaml_char_t) '0')
542
543#define AS_DIGIT(parser) AS_DIGIT_AT(parser,0)
544
545/*
546 * Check if the character at the specified position is a hex-digit.
547 */
548
549#define IS_HEX_AT(parser,offset) \
550 ((parser->pointer[offset] >= (yaml_char_t) '0' && \
551 parser->pointer[offset] <= (yaml_char_t) '9') || \
552 (parser->pointer[offset] >= (yaml_char_t) 'A' && \
553 parser->pointer[offset] <= (yaml_char_t) 'F') || \
554 (parser->pointer[offset] >= (yaml_char_t) 'a' && \
555 parser->pointer[offset] <= (yaml_char_t) 'f'))
556
557#define IS_HEX(parser) IS_HEX_AT(parser,0)
558
559/*
560 * Get the value of a hex-digit.
561 */
562
563#define AS_HEX_AT(parser,offset) \
564 ((parser->pointer[offset] >= (yaml_char_t) 'A' && \
565 parser->pointer[offset] <= (yaml_char_t) 'F') ? \
566 (parser->pointer[offset] - (yaml_char_t) 'A' + 10) : \
567 (parser->pointer[offset] >= (yaml_char_t) 'a' && \
568 parser->pointer[offset] <= (yaml_char_t) 'f') ? \
569 (parser->pointer[offset] - (yaml_char_t) 'a' + 10) : \
570 (parser->pointer[offset] - (yaml_char_t) '0'))
571
572#define AS_HEX(parser) AS_HEX_AT(parser,0)
573
f2b59d4d
KS
574/*
575 * Check if the character at the specified position is NUL.
576 */
577
578#define IS_Z_AT(parser,offset) CHECK_AT(parser,'\0',(offset))
579
580#define IS_Z(parser) IS_Z_AT(parser,0)
581
e71095e3
KS
582/*
583 * Check if the character at the specified position is BOM.
584 */
585
586#define IS_BOM_AT(parser,offset) \
587 (CHECK_AT(parser,'\xEF',(offset)) \
588 && CHECK_AT(parser,'\xBB',(offset)+1) \
589 && CHECK_AT(parser,'\xBF',(offset)+1)) /* BOM (#xFEFF) */
590
591#define IS_BOM(parser) IS_BOM_AT(parser,0)
592
f2b59d4d
KS
593/*
594 * Check if the character at the specified position is space.
595 */
596
597#define IS_SPACE_AT(parser,offset) CHECK_AT(parser,' ',(offset))
598
599#define IS_SPACE(parser) IS_SPACE_AT(parser,0)
600
601/*
602 * Check if the character at the specified position is tab.
603 */
604
605#define IS_TAB_AT(parser,offset) CHECK_AT(parser,'\t',(offset))
606
607#define IS_TAB(parser) IS_TAB_AT(parser,0)
608
609/*
610 * Check if the character at the specified position is blank (space or tab).
611 */
612
613#define IS_BLANK_AT(parser,offset) \
614 (IS_SPACE_AT(parser,(offset)) || IS_TAB_AT(parser,(offset)))
615
616#define IS_BLANK(parser) IS_BLANK_AT(parser,0)
617
618/*
619 * Check if the character at the specified position is a line break.
620 */
621
622#define IS_BREAK_AT(parser,offset) \
623 (CHECK_AT(parser,'\r',(offset)) /* CR (#xD)*/ \
624 || CHECK_AT(parser,'\n',(offset)) /* LF (#xA) */ \
625 || (CHECK_AT(parser,'\xC2',(offset)) \
e71095e3 626 && CHECK_AT(parser,'\x85',(offset)+1)) /* NEL (#x85) */ \
f2b59d4d 627 || (CHECK_AT(parser,'\xE2',(offset)) \
e71095e3
KS
628 && CHECK_AT(parser,'\x80',(offset)+1) \
629 && CHECK_AT(parser,'\xA8',(offset)+2)) /* LS (#x2028) */ \
f2b59d4d 630 || (CHECK_AT(parser,'\xE2',(offset)) \
e71095e3 631 && CHECK_AT(parser,'\x80',(offset)+1) \
92d41fe1 632 && CHECK_AT(parser,'\xA9',(offset)+2))) /* PS (#x2029) */
f2b59d4d
KS
633
634#define IS_BREAK(parser) IS_BREAK_AT(parser,0)
635
eb9cceb5
KS
636#define IS_CRLF_AT(parser,offset) \
637 (CHECK_AT(parser,'\r',(offset)) && CHECK_AT(parser,'\n',(offset)+1))
638
639#define IS_CRLF(parser) IS_CRLF_AT(parser,0)
640
f2b59d4d
KS
641/*
642 * Check if the character is a line break or NUL.
643 */
644
645#define IS_BREAKZ_AT(parser,offset) \
646 (IS_BREAK_AT(parser,(offset)) || IS_Z_AT(parser,(offset)))
647
648#define IS_BREAKZ(parser) IS_BREAKZ_AT(parser,0)
649
650/*
651 * Check if the character is a line break, space, or NUL.
652 */
653
654#define IS_SPACEZ_AT(parser,offset) \
655 (IS_SPACE_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset)))
656
657#define IS_SPACEZ(parser) IS_SPACEZ_AT(parser,0)
658
659/*
660 * Check if the character is a line break, space, tab, or NUL.
661 */
662
663#define IS_BLANKZ_AT(parser,offset) \
664 (IS_BLANK_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset)))
665
666#define IS_BLANKZ(parser) IS_BLANKZ_AT(parser,0)
667
eb9cceb5
KS
668/*
669 * Determine the width of the character.
670 */
671
672#define WIDTH_AT(parser,offset) \
673 ((parser->pointer[(offset)] & 0x80) == 0x00 ? 1 : \
674 (parser->pointer[(offset)] & 0xE0) == 0xC0 ? 2 : \
675 (parser->pointer[(offset)] & 0xF0) == 0xE0 ? 3 : \
676 (parser->pointer[(offset)] & 0xF8) == 0xF0 ? 4 : 0)
677
678#define WIDTH(parser) WIDTH_AT(parser,0)
679
680/*
681 * Advance the buffer pointer.
682 */
683
e71095e3 684#define FORWARD(parser) \
eb9cceb5 685 (parser->index ++, \
e71095e3 686 parser->column ++, \
eb9cceb5
KS
687 parser->unread --, \
688 parser->pointer += WIDTH(parser))
689
e71095e3
KS
690#define FORWARD_LINE(parser) \
691 (IS_CRLF(parser) ? \
692 (parser->index += 2, \
693 parser->column = 0, \
92d41fe1 694 parser->line ++, \
e71095e3
KS
695 parser->unread -= 2, \
696 parser->pointer += 2) : \
697 IS_BREAK(parser) ? \
698 (parser->index ++, \
699 parser->column = 0, \
92d41fe1 700 parser->line ++, \
e71095e3
KS
701 parser->unread --, \
702 parser->pointer += WIDTH(parser)) : 0)
703
704/*
705 * Resize a string if needed.
706 */
707
708#define RESIZE(parser,string) \
92d41fe1
KS
709 ((string).pointer-(string).buffer+5 < (string).size ? 1 : \
710 yaml_parser_resize_string(parser, &(string)))
e71095e3
KS
711
712/*
713 * Copy a character to a string buffer and advance pointers.
714 */
715
716#define COPY(parser,string) \
717 (((*parser->pointer & 0x80) == 0x00 ? \
92d41fe1 718 (*((string).pointer++) = *(parser->pointer++)) : \
e71095e3 719 (*parser->pointer & 0xE0) == 0xC0 ? \
92d41fe1
KS
720 (*((string).pointer++) = *(parser->pointer++), \
721 *((string).pointer++) = *(parser->pointer++)) : \
e71095e3 722 (*parser->pointer & 0xF0) == 0xE0 ? \
92d41fe1
KS
723 (*((string).pointer++) = *(parser->pointer++), \
724 *((string).pointer++) = *(parser->pointer++), \
725 *((string).pointer++) = *(parser->pointer++)) : \
e71095e3 726 (*parser->pointer & 0xF8) == 0xF0 ? \
92d41fe1
KS
727 (*((string).pointer++) = *(parser->pointer++), \
728 *((string).pointer++) = *(parser->pointer++), \
729 *((string).pointer++) = *(parser->pointer++), \
730 *((string).pointer++) = *(parser->pointer++)) : 0), \
e71095e3
KS
731 parser->index ++, \
732 parser->column ++, \
733 parser->unread --)
92d41fe1
KS
734
735/*
736 * Copy a line break character to a string buffer and advance pointers.
737 */
738
739#define COPY_LINE(parser,string) \
740 ((CHECK_AT(parser,'\r',0) && CHECK_AT(parser,'\n',1)) ? /* CR LF -> LF */ \
741 (*((string).pointer++) = (yaml_char_t) '\n', \
742 parser->pointer += 2, \
743 parser->index += 2, \
744 parser->column = 0, \
745 parser->line ++, \
746 parser->unread -= 2) : \
747 (CHECK_AT(parser,'\r',0) || CHECK_AT(parser,'\n',0)) ? /* CR|LF -> LF */ \
748 (*((string).pointer++) = (yaml_char_t) '\n', \
749 parser->pointer ++, \
750 parser->index ++, \
751 parser->column = 0, \
752 parser->line ++, \
753 parser->unread --) : \
754 (CHECK_AT(parser,'\xC2',0) && CHECK_AT(parser,'\x85',1)) ? /* NEL -> LF */ \
755 (*((string).pointer++) = (yaml_char_t) '\n', \
756 parser->pointer += 2, \
757 parser->index ++, \
758 parser->column = 0, \
759 parser->line ++, \
760 parser->unread --) : \
761 (CHECK_AT(parser,'\xE2',0) && \
762 CHECK_AT(parser,'\x80',1) && \
763 (CHECK_AT(parser,'\xA8',2) || \
764 CHECK_AT(parser,'\xA9',2))) ? /* LS|PS -> LS|PS */ \
765 (*((string).pointer++) = *(parser->pointer++), \
766 *((string).pointer++) = *(parser->pointer++), \
767 *((string).pointer++) = *(parser->pointer++), \
768 parser->index ++, \
769 parser->column = 0, \
770 parser->line ++, \
771 parser->unread --) : 0)
772
773/*
774 * Append a string to another string and clear the former string.
775 */
776
777#define JOIN(parser,head_string,tail_string) \
778 (yaml_parser_join_string(parser, &(head_string), &(tail_string)) && \
779 yaml_parser_clear_string(parser, &(tail_string)))
e71095e3 780
03be97ab
KS
781/*
782 * Public API declarations.
783 */
784
785YAML_DECLARE(yaml_token_t *)
786yaml_parser_get_token(yaml_parser_t *parser);
787
788YAML_DECLARE(yaml_token_t *)
789yaml_parser_peek_token(yaml_parser_t *parser);
790
f2b59d4d
KS
791/*
792 * Error handling.
793 */
794
795static int
796yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
797 yaml_mark_t context_mark, const char *problem);
798
799static yaml_mark_t
800yaml_parser_get_mark(yaml_parser_t *parser);
801
e71095e3
KS
802/*
803 * Buffers and lists.
804 */
805
806typedef struct {
807 yaml_char_t *buffer;
808 yaml_char_t *pointer;
809 size_t size;
810} yaml_string_t;
811
812static yaml_string_t
813yaml_parser_new_string(yaml_parser_t *parser);
814
815static int
816yaml_parser_resize_string(yaml_parser_t *parser, yaml_string_t *string);
817
92d41fe1
KS
818static int
819yaml_parser_join_string(yaml_parser_t *parser,
820 yaml_string_t *string1, yaml_string_t *string2);
821
822static int
823yaml_parser_clear_string(yaml_parser_t *parser, yaml_string_t *string);
824
e71095e3
KS
825static int
826yaml_parser_resize_list(yaml_parser_t *parser, void **buffer, size_t *size,
827 size_t item_size);
828
03be97ab
KS
829/*
830 * High-level token API.
831 */
832
833static int
834yaml_parser_fetch_more_tokens(yaml_parser_t *parser);
835
836static int
837yaml_parser_fetch_next_token(yaml_parser_t *parser);
838
839/*
840 * Potential simple keys.
841 */
842
843static int
844yaml_parser_stale_simple_keys(yaml_parser_t *parser);
845
846static int
847yaml_parser_save_simple_key(yaml_parser_t *parser);
848
849static int
850yaml_parser_remove_simple_key(yaml_parser_t *parser);
851
eb9cceb5
KS
852static int
853yaml_parser_increase_flow_level(yaml_parser_t *parser);
854
855static int
856yaml_parser_decrease_flow_level(yaml_parser_t *parser);
857
858/*
859 * Token manipulation.
860 */
861
862static int
863yaml_parser_append_token(yaml_parser_t *parser, yaml_token_t *token);
864
865static int
866yaml_parser_insert_token(yaml_parser_t *parser,
867 int number, yaml_token_t *token);
868
03be97ab
KS
869/*
870 * Indentation treatment.
871 */
872
873static int
eb9cceb5
KS
874yaml_parser_roll_indent(yaml_parser_t *parser, int column,
875 int number, yaml_token_type_t type, yaml_mark_t mark);
03be97ab
KS
876
877static int
f2b59d4d 878yaml_parser_unroll_indent(yaml_parser_t *parser, int column);
03be97ab
KS
879
880/*
881 * Token fetchers.
882 */
883
884static int
885yaml_parser_fetch_stream_start(yaml_parser_t *parser);
886
887static int
888yaml_parser_fetch_stream_end(yaml_parser_t *parser);
889
890static int
891yaml_parser_fetch_directive(yaml_parser_t *parser);
892
03be97ab
KS
893static int
894yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
895 yaml_token_type_t type);
896
03be97ab
KS
897static int
898yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
899 yaml_token_type_t type);
900
03be97ab
KS
901static int
902yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
903 yaml_token_type_t type);
904
905static int
906yaml_parser_fetch_flow_entry(yaml_parser_t *parser);
907
908static int
909yaml_parser_fetch_block_entry(yaml_parser_t *parser);
910
911static int
912yaml_parser_fetch_key(yaml_parser_t *parser);
913
914static int
915yaml_parser_fetch_value(yaml_parser_t *parser);
916
917static int
eb9cceb5 918yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type);
03be97ab
KS
919
920static int
921yaml_parser_fetch_tag(yaml_parser_t *parser);
922
03be97ab
KS
923static int
924yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);
925
03be97ab
KS
926static int
927yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);
928
929static int
930yaml_parser_fetch_plain_scalar(yaml_parser_t *parser);
931
932/*
933 * Token scanners.
934 */
935
936static int
937yaml_parser_scan_to_next_token(yaml_parser_t *parser);
938
939static yaml_token_t *
940yaml_parser_scan_directive(yaml_parser_t *parser);
941
942static int
943yaml_parser_scan_directive_name(yaml_parser_t *parser,
944 yaml_mark_t start_mark, yaml_char_t **name);
945
946static int
e71095e3 947yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
03be97ab
KS
948 yaml_mark_t start_mark, int *major, int *minor);
949
950static int
e71095e3 951yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
03be97ab
KS
952 yaml_mark_t start_mark, int *number);
953
954static int
955yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
e71095e3 956 yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix);
03be97ab
KS
957
958static yaml_token_t *
959yaml_parser_scan_anchor(yaml_parser_t *parser,
960 yaml_token_type_t type);
961
962static yaml_token_t *
963yaml_parser_scan_tag(yaml_parser_t *parser);
964
965static int
966yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
967 yaml_mark_t start_mark, yaml_char_t **handle);
968
969static int
970yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive,
e71095e3
KS
971 yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri);
972
973static int
974yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
975 yaml_mark_t start_mark, yaml_string_t *string);
03be97ab
KS
976
977static yaml_token_t *
978yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal);
979
92d41fe1
KS
980static int
981yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
982 int *indent, yaml_string_t *breaks,
983 yaml_mark_t start_mark, yaml_mark_t *end_mark);
984
03be97ab
KS
985static int
986yaml_parser_scan_block_scalar_indicators(yaml_parser_t *parser,
987 yaml_mark_t start_mark, int *chomping, int *increment);
988
989static yaml_token_t *
990yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single);
991
992static yaml_token_t *
993yaml_parser_scan_plain_scalar(yaml_parser_t *parser);
994
f2b59d4d
KS
995/*
996 * Get the next token and remove it from the tokens queue.
997 */
998
999YAML_DECLARE(yaml_token_t *)
1000yaml_parser_get_token(yaml_parser_t *parser)
1001{
1002 yaml_token_t *token;
1003
1004 assert(parser); /* Non-NULL parser object is expected. */
1005 assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */
1006
1007 /* Ensure that the tokens queue contains enough tokens. */
1008
1009 if (!yaml_parser_fetch_more_tokens(parser)) return NULL;
1010
1011 /* Fetch the next token from the queue. */
1012
1013 token = parser->tokens[parser->tokens_head];
1014
1015 /* Move the queue head. */
1016
1017 parser->tokens[parser->tokens_head++] = NULL;
1018 if (parser->tokens_head == parser->tokens_size)
1019 parser->tokens_head = 0;
1020
1021 parser->tokens_parsed++;
1022
1023 return token;
1024}
1025
1026/*
1027 * Get the next token, but don't remove it from the queue.
1028 */
1029
1030YAML_DECLARE(yaml_token_t *)
1031yaml_parser_peek_token(yaml_parser_t *parser)
1032{
1033 assert(parser); /* Non-NULL parser object is expected. */
1034 assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */
1035
1036 /* Ensure that the tokens queue contains enough tokens. */
1037
1038 if (!yaml_parser_fetch_more_tokens(parser)) return NULL;
1039
1040 /* Fetch the next token from the queue. */
1041
1042 return parser->tokens[parser->tokens_head];
1043}
1044
e71095e3
KS
1045/*
1046 * Create a new string.
1047 */
1048
1049static yaml_string_t
1050yaml_parser_new_string(yaml_parser_t *parser)
1051{
1052 yaml_string_t string = { NULL, NULL, 0 };
1053
1054 string.buffer = yaml_malloc(YAML_DEFAULT_SIZE);
1055 if (!string.buffer) {
1056 parser->error = YAML_MEMORY_ERROR;
1057 return string;
1058 }
1059
1060 memset(string.buffer, 0, YAML_DEFAULT_SIZE);
1061 string.pointer = string.buffer;
1062 string.size = YAML_DEFAULT_SIZE;
1063
1064 return string;
1065}
1066
1067/*
1068 * Double the size of a string.
1069 */
1070
1071static int
1072yaml_parser_resize_string(yaml_parser_t *parser, yaml_string_t *string)
1073{
1074 yaml_char_t *new_buffer = yaml_realloc(string->buffer, string->size*2);
1075
1076 if (!new_buffer) {
1077 yaml_free(string->buffer);
1078 string->buffer = NULL;
1079 string->pointer = NULL;
1080 string->size = 0;
1081 parser->error = YAML_MEMORY_ERROR;
1082 return 0;
1083 }
1084
1085 memset(new_buffer+string->size, 0, string->size);
1086
1087 string->pointer = new_buffer + (string->buffer-string->pointer);
1088 string->buffer = new_buffer;
1089 string->size *= 2;
1090
1091 return 1;
1092}
1093
92d41fe1
KS
1094/*
1095 * Append a string to another string.
1096 */
1097
1098static int
1099yaml_parser_join_string(yaml_parser_t *parser,
1100 yaml_string_t *string1, yaml_string_t *string2)
1101{
1102 if (string2->buffer == string2->pointer) return 1;
1103
1104 while (string1->pointer - string1->buffer + string2->pointer - string2->buffer + 1
1105 > string1->size) {
1106 if (!yaml_parser_resize_string(parser, string1)) return 0;
1107 }
1108
1109 memcpy(string1->pointer, string2->buffer, string2->pointer-string2->buffer);
1110
1111 return 1;
1112}
1113
1114/*
1115 * Fill the string with NULs and move the pointer to the beginning.
1116 */
1117
1118static int
1119yaml_parser_clear_string(yaml_parser_t *parser, yaml_string_t *string)
1120{
1121 if (string->buffer == string->pointer) return 1;
1122
1123 memset(string->buffer, 0, string->pointer-string->buffer);
1124
1125 string->pointer = string->buffer;
1126
1127 return 1;
1128}
1129
e71095e3
KS
1130/*
1131 * Double a list.
1132 */
1133
1134static int
1135yaml_parser_resize_list(yaml_parser_t *parser, void **buffer, size_t *size,
1136 size_t item_size)
1137{
1138 void *new_buffer = yaml_realloc(*buffer, item_size*(*size)*2);
1139
1140 if (!new_buffer) {
1141 parser->error = YAML_MEMORY_ERROR;
1142 return 0;
1143 }
1144
1145 memset(new_buffer+(*size), 0, item_size*(*size));
1146
1147 *buffer = new_buffer;
1148 *size *= 2;
1149
1150 return 1;
1151}
1152
f2b59d4d
KS
1153/*
1154 * Set the scanner error and return 0.
1155 */
1156
1157static int
1158yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
1159 yaml_mark_t context_mark, const char *problem)
1160{
1161 parser->error = YAML_SCANNER_ERROR;
1162 parser->context = context;
1163 parser->context_mark = context_mark;
1164 parser->problem = problem;
1165 parser->problem_mark = yaml_parser_get_mark(parser);
1166}
1167
1168/*
1169 * Get the mark for the current buffer position.
1170 */
1171
1172static yaml_mark_t
1173yaml_parser_get_mark(yaml_parser_t *parser)
1174{
1175 yaml_mark_t mark = { parser->index, parser->line, parser->column };
1176
1177 return mark;
1178}
1179
1180
1181/*
1182 * Ensure that the tokens queue contains at least one token which can be
1183 * returned to the Parser.
1184 */
1185
1186static int
1187yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
1188{
1189 int need_more_tokens;
1190 int k;
1191
1192 /* While we need more tokens to fetch, do it. */
1193
1194 while (1)
1195 {
1196 /*
1197 * Check if we really need to fetch more tokens.
1198 */
1199
1200 need_more_tokens = 0;
1201
1202 if (parser->tokens_head == parser->tokens_tail)
1203 {
1204 /* Queue is empty. */
1205
1206 need_more_tokens = 1;
1207 }
1208 else
1209 {
1210 /* Check if any potential simple key may occupy the head position. */
1211
1212 for (k = 0; k <= parser->flow_level; k++) {
1213 yaml_simple_key_t *simple_key = parser->simple_keys[k];
1214 if (simple_key
1215 && (simple_key->token_number == parser->tokens_parsed)) {
1216 need_more_tokens = 1;
1217 break;
1218 }
1219 }
1220 }
1221
1222 /* We are finished. */
1223
1224 if (!need_more_tokens)
1225 break;
1226
1227 /* Fetch the next token. */
1228
1229 if (!yaml_parser_fetch_next_token(parser))
1230 return 0;
1231 }
1232
1233 return 1;
1234}
1235
1236/*
1237 * The dispatcher for token fetchers.
1238 */
1239
1240static int
1241yaml_parser_fetch_next_token(yaml_parser_t *parser)
1242{
1243 /* Ensure that the buffer is initialized. */
1244
1245 if (!UPDATE(parser, 1))
1246 return 0;
1247
1248 /* Check if we just started scanning. Fetch STREAM-START then. */
1249
1250 if (!parser->stream_start_produced)
1251 return yaml_parser_fetch_stream_start(parser);
1252
1253 /* Eat whitespaces and comments until we reach the next token. */
1254
1255 if (!yaml_parser_scan_to_next_token(parser))
1256 return 0;
1257
1258 /* Check the indentation level against the current column. */
1259
1260 if (!yaml_parser_unroll_indent(parser, parser->column))
1261 return 0;
1262
1263 /*
1264 * Ensure that the buffer contains at least 4 characters. 4 is the length
1265 * of the longest indicators ('--- ' and '... ').
1266 */
1267
1268 if (!UPDATE(parser, 4))
1269 return 0;
1270
1271 /* Is it the end of the stream? */
1272
1273 if (IS_Z(parser))
1274 return yaml_parser_fetch_stream_end(parser);
1275
1276 /* Is it a directive? */
1277
1278 if (parser->column == 0 && CHECK(parser, '%'))
1279 return yaml_parser_fetch_directive(parser);
1280
1281 /* Is it the document start indicator? */
1282
1283 if (parser->column == 0
1284 && CHECK_AT(parser, '-', 0)
1285 && CHECK_AT(parser, '-', 1)
1286 && CHECK_AT(parser, '-', 2)
1287 && IS_BLANKZ_AT(parser, 3))
eb9cceb5
KS
1288 return yaml_parser_fetch_document_indicator(parser,
1289 YAML_DOCUMENT_START_TOKEN);
f2b59d4d
KS
1290
1291 /* Is it the document end indicator? */
1292
1293 if (parser->column == 0
1294 && CHECK_AT(parser, '.', 0)
1295 && CHECK_AT(parser, '.', 1)
1296 && CHECK_AT(parser, '.', 2)
1297 && IS_BLANKZ_AT(parser, 3))
eb9cceb5
KS
1298 return yaml_parser_fetch_document_indicator(parser,
1299 YAML_DOCUMENT_END_TOKEN);
f2b59d4d
KS
1300
1301 /* Is it the flow sequence start indicator? */
1302
1303 if (CHECK(parser, '['))
eb9cceb5
KS
1304 return yaml_parser_fetch_flow_collection_start(parser,
1305 YAML_FLOW_SEQUENCE_START_TOKEN);
f2b59d4d
KS
1306
1307 /* Is it the flow mapping start indicator? */
1308
1309 if (CHECK(parser, '{'))
eb9cceb5
KS
1310 return yaml_parser_fetch_flow_collection_start(parser,
1311 YAML_FLOW_MAPPING_START_TOKEN);
f2b59d4d
KS
1312
1313 /* Is it the flow sequence end indicator? */
1314
1315 if (CHECK(parser, ']'))
eb9cceb5
KS
1316 return yaml_parser_fetch_flow_collection_end(parser,
1317 YAML_FLOW_SEQUENCE_END_TOKEN);
f2b59d4d
KS
1318
1319 /* Is it the flow mapping end indicator? */
1320
1321 if (CHECK(parser, '}'))
eb9cceb5
KS
1322 return yaml_parser_fetch_flow_collection_end(parser,
1323 YAML_FLOW_MAPPING_END_TOKEN);
f2b59d4d
KS
1324
1325 /* Is it the flow entry indicator? */
1326
1327 if (CHECK(parser, ','))
1328 return yaml_parser_fetch_flow_entry(parser);
1329
1330 /* Is it the block entry indicator? */
1331
1332 if (CHECK(parser, '-') && IS_BLANKZ_AT(parser, 1))
1333 return yaml_parser_fetch_block_entry(parser);
1334
1335 /* Is it the key indicator? */
1336
1337 if (CHECK(parser, '?') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1)))
1338 return yaml_parser_fetch_key(parser);
1339
1340 /* Is it the value indicator? */
1341
1342 if (CHECK(parser, ':') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1)))
1343 return yaml_parser_fetch_value(parser);
1344
1345 /* Is it an alias? */
1346
1347 if (CHECK(parser, '*'))
eb9cceb5 1348 return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN);
f2b59d4d
KS
1349
1350 /* Is it an anchor? */
1351
1352 if (CHECK(parser, '&'))
eb9cceb5 1353 return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN);
f2b59d4d
KS
1354
1355 /* Is it a tag? */
1356
1357 if (CHECK(parser, '!'))
1358 return yaml_parser_fetch_tag(parser);
1359
1360 /* Is it a literal scalar? */
1361
1362 if (CHECK(parser, '|') && !parser->flow_level)
1363 return yaml_parser_fetch_block_scalar(parser, 1);
1364
1365 /* Is it a folded scalar? */
1366
1367 if (CHECK(parser, '>') && !parser->flow_level)
1368 return yaml_parser_fetch_block_scalar(parser, 0);
1369
1370 /* Is it a single-quoted scalar? */
1371
1372 if (CHECK(parser, '\''))
1373 return yaml_parser_fetch_flow_scalar(parser, 1);
1374
1375 /* Is it a double-quoted scalar? */
1376
1377 if (CHECK(parser, '"'))
1378 return yaml_parser_fetch_flow_scalar(parser, 0);
1379
1380 /*
1381 * Is it a plain scalar?
1382 *
1383 * A plain scalar may start with any non-blank characters except
1384 *
1385 * '-', '?', ':', ',', '[', ']', '{', '}',
1386 * '#', '&', '*', '!', '|', '>', '\'', '\"',
1387 * '%', '@', '`'.
1388 *
1389 * In the block context, it may also start with the characters
1390 *
1391 * '-', '?', ':'
1392 *
1393 * if it is followed by a non-space character.
1394 *
1395 * The last rule is more restrictive than the specification requires.
1396 */
1397
1398 if (!(IS_BLANKZ(parser) || CHECK(parser, '-') || CHECK(parser, '?')
1399 || CHECK(parser, ':') || CHECK(parser, ',') || CHECK(parser, '[')
1400 || CHECK(parser, ']') || CHECK(parser, '{') || CHECK(parser, '}')
1401 || CHECK(parser, '#') || CHECK(parser, '&') || CHECK(parser, '*')
1402 || CHECK(parser, '!') || CHECK(parser, '|') || CHECK(parser, '>')
1403 || CHECK(parser, '\'') || CHECK(parser, '"') || CHECK(parser, '%')
1404 || CHECK(parser, '@') || CHECK(parser, '`')) ||
1405 (!parser->flow_level &&
1406 (CHECK(parser, '-') || CHECK(parser, '?') || CHECK(parser, ':')) &&
1407 IS_BLANKZ_AT(parser, 1)))
1408 return yaml_parser_fetch_plain_scalar(parser);
1409
1410 /*
1411 * If we don't determine the token type so far, it is an error.
1412 */
1413
1414 return yaml_parser_set_scanner_error(parser, "while scanning for the next token",
1415 yaml_parser_get_mark(parser), "found character that cannot start any token");
1416}
1417
eb9cceb5
KS
1418/*
1419 * Check the list of potential simple keys and remove the positions that
1420 * cannot contain simple keys anymore.
1421 */
1422
1423static int
1424yaml_parser_stale_simple_keys(yaml_parser_t *parser)
1425{
1426 int level;
1427
1428 /* Check for a potential simple key for each flow level. */
1429
1430 for (level = 0; level <= parser->flow_level; level++)
1431 {
1432 yaml_simple_key_t *simple_key = parser->simple_keys[level];
1433
1434 /*
1435 * The specification requires that a simple key
1436 *
1437 * - is limited to a single line,
1438 * - is shorter than 1024 characters.
1439 */
1440
1441 if (simple_key && (simple_key->line < parser->line ||
1442 simple_key->index < parser->index+1024)) {
1443
1444 /* Check if the potential simple key to be removed is required. */
1445
1446 if (simple_key->required) {
1447 return yaml_parser_set_scanner_error(parser,
1448 "while scanning a simple key", simple_key->mark,
1449 "could not found expected ':'");
1450 }
1451
1452 yaml_free(simple_key);
1453 parser->simple_keys[level] = NULL;
1454 }
1455 }
1456
1457 return 1;
1458}
1459
1460/*
1461 * Check if a simple key may start at the current position and add it if
1462 * needed.
1463 */
1464
1465static int
1466yaml_parser_save_simple_key(yaml_parser_t *parser)
1467{
1468 /*
1469 * A simple key is required at the current position if the scanner is in
1470 * the block context and the current column coincides with the indentation
1471 * level.
1472 */
1473
1474 int required = (!parser->flow_level && parser->indent == parser->column);
1475
1476 /*
1477 * A simple key is required only when it is the first token in the current
1478 * line. Therefore it is always allowed. But we add a check anyway.
1479 */
1480
1481 assert(parser->simple_key_allowed || !required); /* Impossible. */
1482
1483 /*
1484 * If the current position may start a simple key, save it.
1485 */
1486
1487 if (parser->simple_key_allowed)
1488 {
1489 yaml_simple_key_t simple_key = { required,
1490 parser->tokens_parsed + parser->tokens_tail - parser->tokens_head,
1491 parser->index, parser->line, parser->column,
1492 yaml_parser_get_mark(parser) };
1493
1494 if (!yaml_parser_remove_simple_key(parser)) return 0;
1495
1496 parser->simple_keys[parser->flow_level] =
1497 yaml_malloc(sizeof(yaml_simple_key_t));
1498 if (!parser->simple_keys[parser->flow_level]) {
1499 parser->error = YAML_MEMORY_ERROR;
1500 return 0;
1501 }
1502
1503 *(parser->simple_keys[parser->flow_level]) = simple_key;
1504 }
1505
1506 return 1;
1507}
1508
1509/*
1510 * Remove a potential simple key at the current flow level.
1511 */
1512
1513static int
1514yaml_parser_remove_simple_key(yaml_parser_t *parser)
1515{
1516 yaml_simple_key_t *simple_key = parser->simple_keys[parser->flow_level];
1517
1518 if (simple_key)
1519 {
1520 /* If the key is required, it is an error. */
1521
1522 if (simple_key->required) {
1523 return yaml_parser_set_scanner_error(parser,
1524 "while scanning a simple key", simple_key->mark,
1525 "could not found expected ':'");
1526 }
1527
1528 /* Remove the key from the list. */
1529
1530 yaml_free(simple_key);
1531 parser->simple_keys[parser->flow_level] = NULL;
1532 }
1533
1534 return 1;
1535}
1536
1537/*
1538 * Increase the flow level and resize the simple key list if needed.
1539 */
1540
1541static int
1542yaml_parser_increase_flow_level(yaml_parser_t *parser)
1543{
1544 /* Check if we need to resize the list. */
1545
e71095e3
KS
1546 if (parser->flow_level == parser->simple_keys_size-1) {
1547 if (!yaml_parser_resize_list(parser, (void **)&parser->simple_keys,
1548 &parser->simple_keys_size, sizeof(yaml_simple_key_t *)))
eb9cceb5 1549 return 0;
eb9cceb5
KS
1550 }
1551
1552 /* Increase the flow level and reset the simple key. */
1553
1554 parser->simple_keys[++parser->flow_level] = NULL;
1555
1556 return 1;
1557}
1558
1559/*
1560 * Decrease the flow level.
1561 */
1562
1563static int
1564yaml_parser_decrease_flow_level(yaml_parser_t *parser)
1565{
1566 assert(parser->flow_level); /* Greater than 0. */
1567 assert(!parser->simple_keys[parser->flow_level]); /* Must be removed. */
1568
1569 parser->flow_level --;
1570
1571 return 1;
1572}
1573
1574/*
1575 * Add a token to the tail of the tokens queue.
1576 */
1577
1578static int
1579yaml_parser_append_token(yaml_parser_t *parser, yaml_token_t *token)
1580{
1581 return yaml_parser_insert_token(parser, -1, token);
1582}
1583
1584/*
1585 * Insert the token into the tokens queue. The number parameter is the
1586 * ordinal number of the token. If the number is equal to -1, add the token
1587 * to the tail of the queue.
1588 */
1589
1590static int
1591yaml_parser_insert_token(yaml_parser_t *parser,
1592 int number, yaml_token_t *token)
1593{
1594 /* The index of the token in the queue. */
1595
1596 int index = (number == -1)
1597 ? parser->tokens_tail - parser->tokens_head
1598 : number - parser->tokens_parsed;
1599
1600 assert(index >= 0 && index <= (parser->tokens_tail-parser->tokens_head));
1601
1602 /* Check if we need to resize the queue. */
1603
e71095e3
KS
1604 if (parser->tokens_head == 0 && parser->tokens_tail == parser->tokens_size) {
1605 if (!yaml_parser_resize_list(parser, (void **)&parser->tokens,
1606 &parser->tokens_size, sizeof(yaml_token_t *)))
eb9cceb5 1607 return 0;
eb9cceb5
KS
1608 }
1609
1610 /* Check if we need to move the queue to the beginning of the buffer. */
1611
1612 if (parser->tokens_tail == parser->tokens_size)
1613 {
1614 if (parser->tokens_head < parser->tokens_tail) {
1615 memmove(parser->tokens, parser->tokens+parser->tokens_head,
1616 sizeof(yaml_token_t *)*(parser->tokens_tail-parser->tokens_head));
1617 }
1618 parser->tokens_tail -= parser->tokens_head;
1619 parser->tokens_head = 0;
1620 }
1621
1622 /* Check if we need to free space within the queue. */
1623
1624 if (index < (parser->tokens_tail-parser->tokens_head)) {
1625 memmove(parser->tokens+parser->tokens_head+index+1,
1626 parser->tokens+parser->tokens_head+index,
1627 sizeof(yaml_token_t *)*(parser->tokens_tail-parser->tokens_head-index));
1628 }
1629
1630 /* Insert the token. */
1631
1632 parser->tokens[parser->tokens_head+index] = token;
1633 parser->tokens_tail ++;
1634
1635 return 1;
1636}
1637
1638/*
1639 * Push the current indentation level to the stack and set the new level
1640 * the current column is greater than the indentation level. In this case,
1641 * append or insert the specified token into the token queue.
1642 *
1643 */
1644
1645static int
1646yaml_parser_roll_indent(yaml_parser_t *parser, int column,
1647 int number, yaml_token_type_t type, yaml_mark_t mark)
1648{
1649 yaml_token_t *token;
1650
1651 /* In the flow context, do nothing. */
1652
1653 if (parser->flow_level)
1654 return 1;
1655
1656 if (parser->indent < column)
1657 {
1658 /* Check if we need to expand the indents stack. */
1659
e71095e3
KS
1660 if (parser->indents_length == parser->indents_size) {
1661 if (!yaml_parser_resize_list(parser, (void **)&parser->indents,
1662 &parser->indents_size, sizeof(int)))
eb9cceb5 1663 return 0;
eb9cceb5
KS
1664 }
1665
1666 /*
1667 * Push the current indentation level to the stack and set the new
1668 * indentation level.
1669 */
1670
1671 parser->indents[parser->indents_length++] = parser->indent;
1672 parser->indent = column;
1673
1674 /* Create a token. */
1675
1676 token = yaml_token_new(type, mark, mark);
1677 if (!token) {
1678 parser->error = YAML_MEMORY_ERROR;
1679 return 0;
1680 }
1681
1682 /* Insert the token into the queue. */
1683
1684 if (!yaml_parser_insert_token(parser, number, token)) {
1685 yaml_token_delete(token);
1686 return 0;
1687 }
1688 }
1689
1690 return 1;
1691}
1692
1693/*
1694 * Pop indentation levels from the indents stack until the current level
1695 * becomes less or equal to the column. For each intendation level, append
1696 * the BLOCK-END token.
1697 */
1698
1699
1700static int
1701yaml_parser_unroll_indent(yaml_parser_t *parser, int column)
1702{
1703 yaml_token_t *token;
1704
1705 /* In the flow context, do nothing. */
1706
1707 if (parser->flow_level)
1708 return 1;
1709
1710 /* Loop through the intendation levels in the stack. */
1711
1712 while (parser->indent > column)
1713 {
1714 yaml_mark_t mark = yaml_parser_get_mark(parser);
1715
1716 /* Create a token. */
1717
1718 token = yaml_token_new(YAML_BLOCK_END_TOKEN, mark, mark);
1719 if (!token) {
1720 parser->error = YAML_MEMORY_ERROR;
1721 return 0;
1722 }
1723
1724 /* Append the token to the queue. */
1725
1726 if (!yaml_parser_append_token(parser, token)) {
1727 yaml_token_delete(token);
1728 return 0;
1729 }
1730
1731 /* Pop the indentation level. */
1732
1733 assert(parser->indents_length); /* Non-empty stack expected. */
1734
1735 parser->indent = parser->indents[--parser->indents_length];
1736 }
1737
1738 return 1;
1739}
1740
1741/*
1742 * Initialize the scanner and produce the STREAM-START token.
1743 */
1744
1745static int
1746yaml_parser_fetch_stream_start(yaml_parser_t *parser)
1747{
1748 yaml_mark_t mark = yaml_parser_get_mark(parser);
1749 yaml_token_t *token;
1750
1751 /* Set the initial indentation. */
1752
1753 parser->indent = -1;
1754
1755 /* A simple key is allowed at the beginning of the stream. */
1756
1757 parser->simple_key_allowed = 1;
1758
1759 /* We have started. */
1760
1761 parser->stream_start_produced = 1;
1762
1763 /* Create the STREAM-START token. */
1764
1765 token = yaml_stream_start_token_new(parser->encoding, mark, mark);
1766 if (!token) {
1767 parser->error = YAML_MEMORY_ERROR;
1768 return 0;
1769 }
1770
1771 /* Append the token to the queue. */
1772
1773 if (!yaml_parser_append_token(parser, token)) {
1774 yaml_token_delete(token);
1775 return 0;
1776 }
1777
1778 return 1;
1779}
1780
1781/*
1782 * Produce the STREAM-END token and shut down the scanner.
1783 */
1784
1785static int
1786yaml_parser_fetch_stream_end(yaml_parser_t *parser)
1787{
1788 yaml_mark_t mark = yaml_parser_get_mark(parser);
1789 yaml_token_t *token;
1790
1791 /* Reset the indentation level. */
1792
1793 if (!yaml_parser_unroll_indent(parser, -1))
1794 return 0;
1795
1796 /* We have finished. */
1797
1798 parser->stream_end_produced = 1;
1799
1800 /* Create the STREAM-END token. */
1801
1802 token = yaml_stream_end_token_new(mark, mark);
1803 if (!token) {
1804 parser->error = YAML_MEMORY_ERROR;
1805 return 0;
1806 }
1807
1808 /* Append the token to the queue. */
1809
1810 if (!yaml_parser_append_token(parser, token)) {
1811 yaml_token_delete(token);
1812 return 0;
1813 }
1814
1815 return 1;
1816}
1817
1818/*
1819 * Produce the YAML-DIRECTIVE or TAG-DIRECTIVE token.
1820 */
1821
1822static int
1823yaml_parser_fetch_directive(yaml_parser_t *parser)
1824{
1825 yaml_token_t *token;
1826
1827 /* Reset the indentation level. */
1828
1829 if (!yaml_parser_unroll_indent(parser, -1))
1830 return 0;
1831
1832 /* Reset simple keys. */
1833
1834 if (!yaml_parser_remove_simple_key(parser))
1835 return 0;
1836
1837 parser->simple_key_allowed = 0;
1838
1839 /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */
1840
1841 token = yaml_parser_scan_directive(parser);
1842 if (!token) return 0;
1843
1844 /* Append the token to the queue. */
1845
1846 if (!yaml_parser_append_token(parser, token)) {
1847 yaml_token_delete(token);
1848 return 0;
1849 }
1850
1851 return 1;
1852}
1853
1854/*
1855 * Produce the DOCUMENT-START or DOCUMENT-END token.
1856 */
1857
1858static int
1859yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
1860 yaml_token_type_t type)
1861{
1862 yaml_mark_t start_mark, end_mark;
1863 yaml_token_t *token;
1864
1865 /* Reset the indentation level. */
1866
1867 if (!yaml_parser_unroll_indent(parser, -1))
1868 return 0;
1869
1870 /* Reset simple keys. */
1871
1872 if (!yaml_parser_remove_simple_key(parser))
1873 return 0;
1874
1875 parser->simple_key_allowed = 0;
1876
1877 /* Consume the token. */
1878
1879 start_mark = yaml_parser_get_mark(parser);
1880
1881 FORWARD(parser);
1882 FORWARD(parser);
1883 FORWARD(parser);
1884
1885 end_mark = yaml_parser_get_mark(parser);
1886
1887 /* Create the DOCUMENT-START or DOCUMENT-END token. */
1888
1889 token = yaml_token_new(type, start_mark, end_mark);
1890 if (!token) {
1891 parser->error = YAML_MEMORY_ERROR;
1892 return 0;
1893 }
1894
1895 /* Append the token to the queue. */
1896
1897 if (!yaml_parser_append_token(parser, token)) {
1898 yaml_token_delete(token);
1899 return 0;
1900 }
1901
1902 return 1;
1903}
1904
1905/*
1906 * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
1907 */
1908
1909static int
1910yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
1911 yaml_token_type_t type)
1912{
1913 yaml_mark_t start_mark, end_mark;
1914 yaml_token_t *token;
1915
1916 /* The indicators '[' and '{' may start a simple key. */
1917
1918 if (!yaml_parser_save_simple_key(parser))
1919 return 0;
1920
1921 /* Increase the flow level. */
1922
1923 if (!yaml_parser_increase_flow_level(parser))
1924 return 0;
1925
1926 /* A simple key may follow the indicators '[' and '{'. */
1927
1928 parser->simple_key_allowed = 1;
1929
1930 /* Consume the token. */
1931
1932 start_mark = yaml_parser_get_mark(parser);
1933 FORWARD(parser);
1934 end_mark = yaml_parser_get_mark(parser);
1935
1936 /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */
1937
1938 token = yaml_token_new(type, start_mark, end_mark);
1939 if (!token) {
1940 parser->error = YAML_MEMORY_ERROR;
1941 return 0;
1942 }
1943
1944 /* Append the token to the queue. */
1945
1946 if (!yaml_parser_append_token(parser, token)) {
1947 yaml_token_delete(token);
1948 return 0;
1949 }
1950
1951 return 1;
1952}
1953
1954/*
1955 * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
1956 */
1957
1958static int
1959yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
1960 yaml_token_type_t type)
1961{
1962 yaml_mark_t start_mark, end_mark;
1963 yaml_token_t *token;
1964
1965 /* Reset any potential simple key on the current flow level. */
1966
1967 if (!yaml_parser_remove_simple_key(parser))
1968 return 0;
1969
1970 /* Decrease the flow level. */
1971
1972 if (!yaml_parser_decrease_flow_level(parser))
1973 return 0;
1974
1975 /* No simple keys after the indicators ']' and '}'. */
1976
1977 parser->simple_key_allowed = 0;
1978
1979 /* Consume the token. */
1980
1981 start_mark = yaml_parser_get_mark(parser);
1982 FORWARD(parser);
1983 end_mark = yaml_parser_get_mark(parser);
1984
1985 /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */
1986
1987 token = yaml_token_new(type, start_mark, end_mark);
1988 if (!token) {
1989 parser->error = YAML_MEMORY_ERROR;
1990 return 0;
1991 }
1992
1993 /* Append the token to the queue. */
1994
1995 if (!yaml_parser_append_token(parser, token)) {
1996 yaml_token_delete(token);
1997 return 0;
1998 }
1999
2000 return 1;
2001}
2002
2003/*
2004 * Produce the FLOW-ENTRY token.
2005 */
2006
2007static int
2008yaml_parser_fetch_flow_entry(yaml_parser_t *parser)
2009{
2010 yaml_mark_t start_mark, end_mark;
2011 yaml_token_t *token;
2012
2013 /* Reset any potential simple keys on the current flow level. */
2014
2015 if (!yaml_parser_remove_simple_key(parser))
2016 return 0;
2017
2018 /* Simple keys are allowed after ','. */
2019
2020 parser->simple_key_allowed = 1;
2021
2022 /* Consume the token. */
2023
2024 start_mark = yaml_parser_get_mark(parser);
2025 FORWARD(parser);
2026 end_mark = yaml_parser_get_mark(parser);
2027
2028 /* Create the FLOW-ENTRY token. */
2029
2030 token = yaml_token_new(YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark);
2031 if (!token) {
2032 parser->error = YAML_MEMORY_ERROR;
2033 return 0;
2034 }
2035
2036 /* Append the token to the queue. */
2037
2038 if (!yaml_parser_append_token(parser, token)) {
2039 yaml_token_delete(token);
2040 return 0;
2041 }
2042
2043 return 1;
2044}
2045
2046/*
2047 * Produce the BLOCK-ENTRY token.
2048 */
2049
2050static int
2051yaml_parser_fetch_block_entry(yaml_parser_t *parser)
2052{
2053 yaml_mark_t start_mark, end_mark;
2054 yaml_token_t *token;
2055
2056 /* Check if the scanner is in the block context. */
2057
2058 if (!parser->flow_level)
2059 {
2060 /* Check if we are allowed to start a new entry. */
2061
2062 if (!parser->simple_key_allowed) {
2063 return yaml_parser_set_scanner_error(parser, NULL,
2064 yaml_parser_get_mark(parser),
2065 "block sequence entries are not allowed in this context");
2066 }
2067
2068 /* Add the BLOCK-SEQUENCE-START token if needed. */
2069
2070 if (!yaml_parser_roll_indent(parser, parser->column, -1,
2071 YAML_BLOCK_SEQUENCE_START_TOKEN, yaml_parser_get_mark(parser)))
2072 return 0;
2073 }
2074 else
2075 {
2076 /*
2077 * It is an error for the '-' indicator to occur in the flow context,
2078 * but we let the Parser detect and report about it because the Parser
2079 * is able to point to the context.
2080 */
2081 }
2082
2083 /* Reset any potential simple keys on the current flow level. */
2084
2085 if (!yaml_parser_remove_simple_key(parser))
2086 return 0;
2087
2088 /* Simple keys are allowed after '-'. */
2089
2090 parser->simple_key_allowed = 1;
2091
2092 /* Consume the token. */
2093
2094 start_mark = yaml_parser_get_mark(parser);
2095 FORWARD(parser);
2096 end_mark = yaml_parser_get_mark(parser);
2097
2098 /* Create the BLOCK-ENTRY token. */
2099
2100 token = yaml_token_new(YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark);
2101 if (!token) {
2102 parser->error = YAML_MEMORY_ERROR;
2103 return 0;
2104 }
2105
2106 /* Append the token to the queue. */
2107
2108 if (!yaml_parser_append_token(parser, token)) {
2109 yaml_token_delete(token);
2110 return 0;
2111 }
2112
2113 return 1;
2114}
2115
2116/*
2117 * Produce the KEY token.
2118 */
2119
2120static int
2121yaml_parser_fetch_key(yaml_parser_t *parser)
2122{
2123 yaml_mark_t start_mark, end_mark;
2124 yaml_token_t *token;
2125
2126 /* In the block context, additional checks are required. */
2127
2128 if (!parser->flow_level)
2129 {
2130 /* Check if we are allowed to start a new key (not nessesary simple). */
2131
2132 if (!parser->simple_key_allowed) {
2133 return yaml_parser_set_scanner_error(parser, NULL,
2134 yaml_parser_get_mark(parser),
2135 "mapping keys are not allowed in this context");
2136 }
2137
2138 /* Add the BLOCK-MAPPING-START token if needed. */
2139
2140 if (!yaml_parser_roll_indent(parser, parser->column, -1,
2141 YAML_BLOCK_MAPPING_START_TOKEN, yaml_parser_get_mark(parser)))
2142 return 0;
2143 }
2144
2145 /* Reset any potential simple keys on the current flow level. */
2146
2147 if (!yaml_parser_remove_simple_key(parser))
2148 return 0;
2149
2150 /* Simple keys are allowed after '?' in the block context. */
2151
2152 parser->simple_key_allowed = (!parser->flow_level);
2153
2154 /* Consume the token. */
2155
2156 start_mark = yaml_parser_get_mark(parser);
2157 FORWARD(parser);
2158 end_mark = yaml_parser_get_mark(parser);
2159
2160 /* Create the KEY token. */
2161
2162 token = yaml_token_new(YAML_KEY_TOKEN, start_mark, end_mark);
2163 if (!token) {
2164 parser->error = YAML_MEMORY_ERROR;
2165 return 0;
2166 }
2167
2168 /* Append the token to the queue. */
2169
2170 if (!yaml_parser_append_token(parser, token)) {
2171 yaml_token_delete(token);
2172 return 0;
2173 }
2174
2175 return 1;
2176}
2177
2178/*
2179 * Produce the VALUE token.
2180 */
2181
2182static int
2183yaml_parser_fetch_value(yaml_parser_t *parser)
2184{
2185 yaml_mark_t start_mark, end_mark;
2186 yaml_token_t *token;
2187
2188 /* Have we found a simple key? */
2189
2190 if (parser->simple_keys[parser->flow_level])
2191 {
2192 yaml_simple_key_t *simple_key = parser->simple_keys[parser->flow_level];
2193
2194 /* Create the KEY token. */
2195
2196 token = yaml_token_new(YAML_KEY_TOKEN, simple_key->mark, simple_key->mark);
2197 if (!token) {
2198 parser->error = YAML_MEMORY_ERROR;
2199 return 0;
2200 }
2201
2202 /* Insert the token into the queue. */
2203
2204 if (!yaml_parser_insert_token(parser, simple_key->token_number, token)) {
2205 yaml_token_delete(token);
2206 return 0;
2207 }
2208
2209 /* In the block context, we may need to add the BLOCK-MAPPING-START token. */
2210
2211 if (!yaml_parser_roll_indent(parser, parser->column,
2212 simple_key->token_number,
2213 YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark))
2214 return 0;
2215
2216 /* Remove the simple key from the list. */
2217
e71095e3
KS
2218 yaml_free(simple_key);
2219 parser->simple_keys[parser->flow_level] = NULL;
eb9cceb5
KS
2220
2221 /* A simple key cannot follow another simple key. */
2222
2223 parser->simple_key_allowed = 0;
2224 }
2225 else
2226 {
2227 /* The ':' indicator follows a complex key. */
2228
2229 /* In the block context, extra checks are required. */
2230
2231 if (!parser->flow_level)
2232 {
2233 /* Check if we are allowed to start a complex value. */
2234
2235 if (!parser->simple_key_allowed) {
2236 return yaml_parser_set_scanner_error(parser, NULL,
2237 yaml_parser_get_mark(parser),
2238 "mapping values are not allowed in this context");
2239 }
2240
2241 /* Add the BLOCK-MAPPING-START token if needed. */
2242
2243 if (!yaml_parser_roll_indent(parser, parser->column, -1,
2244 YAML_BLOCK_MAPPING_START_TOKEN, yaml_parser_get_mark(parser)))
2245 return 0;
2246 }
2247
eb9cceb5
KS
2248 /* Simple keys after ':' are allowed in the block context. */
2249
2250 parser->simple_key_allowed = (!parser->flow_level);
2251 }
2252
2253 /* Consume the token. */
2254
2255 start_mark = yaml_parser_get_mark(parser);
2256 FORWARD(parser);
2257 end_mark = yaml_parser_get_mark(parser);
2258
2259 /* Create the VALUE token. */
2260
2261 token = yaml_token_new(YAML_VALUE_TOKEN, start_mark, end_mark);
2262 if (!token) {
2263 parser->error = YAML_MEMORY_ERROR;
2264 return 0;
2265 }
2266
2267 /* Append the token to the queue. */
2268
2269 if (!yaml_parser_append_token(parser, token)) {
2270 yaml_token_delete(token);
2271 return 0;
2272 }
2273
2274 return 1;
2275}
2276
2277/*
2278 * Produce the ALIAS or ANCHOR token.
2279 */
2280
2281static int
2282yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type)
2283{
2284 yaml_token_t *token;
2285
2286 /* An anchor or an alias could be a simple key. */
2287
2288 if (!yaml_parser_save_simple_key(parser))
2289 return 0;
2290
2291 /* A simple key cannot follow an anchor or an alias. */
2292
2293 parser->simple_key_allowed = 0;
2294
2295 /* Create the ALIAS or ANCHOR token. */
2296
2297 token = yaml_parser_scan_anchor(parser, type);
2298 if (!token) return 0;
2299
2300 /* Append the token to the queue. */
2301
2302 if (!yaml_parser_append_token(parser, token)) {
2303 yaml_token_delete(token);
2304 return 0;
2305 }
2306
2307 return 1;
2308}
2309
2310/*
2311 * Produce the TAG token.
2312 */
2313
2314static int
2315yaml_parser_fetch_tag(yaml_parser_t *parser)
2316{
2317 yaml_token_t *token;
2318
2319 /* A tag could be a simple key. */
2320
2321 if (!yaml_parser_save_simple_key(parser))
2322 return 0;
2323
2324 /* A simple key cannot follow a tag. */
2325
2326 parser->simple_key_allowed = 0;
2327
2328 /* Create the TAG token. */
2329
2330 token = yaml_parser_scan_tag(parser);
2331 if (!token) return 0;
2332
2333 /* Append the token to the queue. */
2334
2335 if (!yaml_parser_append_token(parser, token)) {
2336 yaml_token_delete(token);
2337 return 0;
2338 }
2339
2340 return 1;
2341}
2342
2343/*
2344 * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
2345 */
2346
2347static int
2348yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal)
2349{
2350 yaml_token_t *token;
2351
2352 /* Remove any potential simple keys. */
2353
2354 if (!yaml_parser_remove_simple_key(parser))
2355 return 0;
2356
2357 /* A simple key may follow a block scalar. */
2358
2359 parser->simple_key_allowed = 1;
2360
2361 /* Create the SCALAR token. */
2362
2363 token = yaml_parser_scan_block_scalar(parser, literal);
2364 if (!token) return 0;
2365
2366 /* Append the token to the queue. */
2367
2368 if (!yaml_parser_append_token(parser, token)) {
2369 yaml_token_delete(token);
2370 return 0;
2371 }
2372
2373 return 1;
2374}
2375
2376/*
2377 * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
2378 */
2379
2380static int
2381yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single)
2382{
2383 yaml_token_t *token;
2384
2385 /* A plain scalar could be a simple key. */
2386
2387 if (!yaml_parser_save_simple_key(parser))
2388 return 0;
2389
2390 /* A simple key cannot follow a flow scalar. */
2391
2392 parser->simple_key_allowed = 0;
2393
2394 /* Create the SCALAR token. */
2395
2396 token = yaml_parser_scan_flow_scalar(parser, single);
2397 if (!token) return 0;
2398
2399 /* Append the token to the queue. */
2400
2401 if (!yaml_parser_append_token(parser, token)) {
2402 yaml_token_delete(token);
2403 return 0;
2404 }
2405
2406 return 1;
2407}
2408
2409/*
2410 * Produce the SCALAR(...,plain) token.
2411 */
2412
2413static int
2414yaml_parser_fetch_plain_scalar(yaml_parser_t *parser)
2415{
2416 yaml_token_t *token;
2417
2418 /* A plain scalar could be a simple key. */
2419
2420 if (!yaml_parser_save_simple_key(parser))
2421 return 0;
2422
2423 /* A simple key cannot follow a flow scalar. */
2424
2425 parser->simple_key_allowed = 0;
2426
2427 /* Create the SCALAR token. */
2428
2429 token = yaml_parser_scan_plain_scalar(parser);
2430 if (!token) return 0;
2431
2432 /* Append the token to the queue. */
2433
2434 if (!yaml_parser_append_token(parser, token)) {
2435 yaml_token_delete(token);
2436 return 0;
2437 }
2438
2439 return 1;
2440}
2441
e71095e3
KS
2442/*
2443 * Eat whitespaces and comments until the next token is found.
2444 */
2445
2446static int
2447yaml_parser_scan_to_next_token(yaml_parser_t *parser)
2448{
2449 /* Until the next token is not found. */
2450
2451 while (1)
2452 {
2453 /* Allow the BOM mark to start a line. */
2454
2455 if (!UPDATE(parser, 1)) return 0;
2456
2457 if (parser->column == 0 && IS_BOM(parser))
2458 FORWARD(parser);
2459
2460 /*
2461 * Eat whitespaces.
2462 *
2463 * Tabs are allowed:
2464 *
2465 * - in the flow context;
2466 * - in the block context, but not at the beginning of the line or
2467 * after '-', '?', or ':' (complex value).
2468 */
2469
2470 if (!UPDATE(parser, 1)) return 0;
2471
2472 while (CHECK(parser,' ') ||
2473 ((parser->flow_level || !parser->simple_key_allowed) &&
2474 CHECK(parser, '\t'))) {
2475 FORWARD(parser);
2476 if (!UPDATE(parser, 1)) return 0;
2477 }
2478
2479 /* Eat a comment until a line break. */
2480
2481 if (CHECK(parser, '#')) {
2482 while (!IS_BREAKZ(parser)) {
2483 FORWARD(parser);
2484 if (!UPDATE(parser, 1)) return 0;
2485 }
2486 }
2487
2488 /* If it is a line break, eat it. */
2489
2490 if (IS_BREAK(parser))
2491 {
2492 if (!UPDATE(parser, 2)) return 0;
2493 FORWARD_LINE(parser);
2494
2495 /* In the block context, a new line may start a simple key. */
2496
2497 if (!parser->flow_level) {
2498 parser->simple_key_allowed = 1;
2499 }
2500 }
2501 else
2502 {
2503 /* We have found a token. */
2504
2505 break;
2506 }
2507 }
2508
2509 return 1;
2510}
2511
2512/*
2513 * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
2514 *
2515 * Scope:
2516 * %YAML 1.1 # a comment \n
2517 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2518 * %TAG !yaml! tag:yaml.org,2002: \n
2519 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2520 */
2521
2522static yaml_token_t *
2523yaml_parser_scan_directive(yaml_parser_t *parser)
2524{
2525 yaml_mark_t start_mark, end_mark;
2526 yaml_char_t *name = NULL;
2527 int major, minor;
2528 yaml_char_t *handle = NULL, *prefix = NULL;
2529 yaml_token_t *token = NULL;
2530
2531 /* Eat '%'. */
2532
2533 start_mark = yaml_parser_get_mark(parser);
2534
2535 FORWARD(parser);
2536
2537 /* Scan the directive name. */
2538
2539 if (!yaml_parser_scan_directive_name(parser, start_mark, &name))
2540 goto error;
2541
2542 /* Is it a YAML directive? */
2543
2544 if (strcmp((char *)name, "YAML") == 0)
2545 {
2546 /* Scan the VERSION directive value. */
2547
2548 if (!yaml_parser_scan_version_directive_value(parser, start_mark,
2549 &major, &minor))
2550 goto error;
2551
2552 end_mark = yaml_parser_get_mark(parser);
2553
2554 /* Create a VERSION-DIRECTIVE token. */
2555
2556 token = yaml_version_directive_token_new(major, minor,
2557 start_mark, end_mark);
92d41fe1
KS
2558 if (!token) {
2559 parser->error = YAML_MEMORY_ERROR;
2560 return 0;
2561 }
e71095e3
KS
2562 }
2563
2564 /* Is it a TAG directive? */
2565
2566 else if (strcmp((char *)name, "TAG") == 0)
2567 {
2568 /* Scan the TAG directive value. */
2569
2570 if (!yaml_parser_scan_tag_directive_value(parser, start_mark,
2571 &handle, &prefix))
2572 goto error;
2573
2574 end_mark = yaml_parser_get_mark(parser);
2575
2576 /* Create a TAG-DIRECTIVE token. */
2577
2578 token = yaml_tag_directive_token_new(handle, prefix,
2579 start_mark, end_mark);
92d41fe1
KS
2580 if (!token) {
2581 parser->error = YAML_MEMORY_ERROR;
2582 return 0;
2583 }
e71095e3
KS
2584 }
2585
2586 /* Unknown directive. */
2587
2588 else
2589 {
92d41fe1 2590 yaml_parser_set_scanner_error(parser, "while scanning a directive",
e71095e3
KS
2591 start_mark, "found uknown directive name");
2592 goto error;
2593 }
2594
2595 /* Eat the rest of the line including any comments. */
2596
2597 while (IS_BLANK(parser)) {
2598 FORWARD(parser);
2599 if (!UPDATE(parser, 1)) goto error;
2600 }
2601
2602 if (CHECK(parser, '#')) {
2603 while (!IS_BREAKZ(parser)) {
2604 FORWARD(parser);
2605 if (!UPDATE(parser, 1)) goto error;
2606 }
2607 }
2608
2609 /* Check if we are at the end of the line. */
2610
2611 if (!IS_BREAKZ(parser)) {
92d41fe1 2612 yaml_parser_set_scanner_error(parser, "while scanning a directive",
e71095e3
KS
2613 start_mark, "did not found expected comment or line break");
2614 goto error;
2615 }
2616
2617 /* Eat a line break. */
2618
2619 if (IS_BREAK(parser)) {
2620 if (!UPDATE(parser, 2)) goto error;
2621 FORWARD_LINE(parser);
2622 }
2623
2624 yaml_free(name);
2625
2626 return token;
2627
2628error:
2629 yaml_free(token);
2630 yaml_free(prefix);
2631 yaml_free(handle);
2632 yaml_free(name);
2633 return NULL;
2634}
2635
2636/*
2637 * Scan the directive name.
2638 *
2639 * Scope:
2640 * %YAML 1.1 # a comment \n
2641 * ^^^^
2642 * %TAG !yaml! tag:yaml.org,2002: \n
2643 * ^^^
2644 */
2645
2646static int
2647yaml_parser_scan_directive_name(yaml_parser_t *parser,
2648 yaml_mark_t start_mark, yaml_char_t **name)
2649{
2650 yaml_string_t string = yaml_parser_new_string(parser);
2651
2652 if (!string.buffer) goto error;
2653
2654 /* Consume the directive name. */
2655
2656 if (!UPDATE(parser, 1)) goto error;
2657
2658 while (IS_ALPHA(parser))
2659 {
2660 if (!RESIZE(parser, string)) goto error;
2661 COPY(parser, string);
2662 if (!UPDATE(parser, 1)) goto error;
2663 }
2664
2665 /* Check if the name is empty. */
2666
2667 if (string.buffer == string.pointer) {
2668 yaml_parser_set_scanner_error(parser, "while scanning a directive",
2669 start_mark, "cannot found expected directive name");
2670 goto error;
2671 }
2672
2673 /* Check for an blank character after the name. */
2674
2675 if (!IS_BLANKZ(parser)) {
2676 yaml_parser_set_scanner_error(parser, "while scanning a directive",
2677 start_mark, "found unexpected non-alphabetical character");
2678 goto error;
2679 }
2680
2681 *name = string.buffer;
2682
2683 return 1;
2684
2685error:
2686 yaml_free(string.buffer);
2687 return 0;
2688}
2689
2690/*
2691 * Scan the value of VERSION-DIRECTIVE.
2692 *
2693 * Scope:
2694 * %YAML 1.1 # a comment \n
2695 * ^^^^^^
2696 */
2697
2698static int
2699yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
2700 yaml_mark_t start_mark, int *major, int *minor)
2701{
2702 /* Eat whitespaces. */
2703
2704 if (!UPDATE(parser, 1)) return 0;
2705
2706 while (IS_BLANK(parser)) {
2707 FORWARD(parser);
2708 if (!UPDATE(parser, 1)) return 0;
2709 }
2710
2711 /* Consume the major version number. */
2712
2713 if (!yaml_parser_scan_version_directive_number(parser, start_mark, major))
2714 return 0;
2715
2716 /* Eat '.'. */
2717
2718 if (!CHECK(parser, '.')) {
2719 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2720 start_mark, "did not find expected digit or '.' character");
2721 }
2722
2723 FORWARD(parser);
2724
2725 /* Consume the minor version number. */
2726
2727 if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor))
2728 return 0;
2729}
2730
2731#define MAX_NUMBER_LENGTH 9
2732
2733/*
2734 * Scan the version number of VERSION-DIRECTIVE.
2735 *
2736 * Scope:
2737 * %YAML 1.1 # a comment \n
2738 * ^
2739 * %YAML 1.1 # a comment \n
2740 * ^
2741 */
2742
2743static int
2744yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
2745 yaml_mark_t start_mark, int *number)
2746{
2747 int value = 0;
2748 size_t length = 0;
2749
2750 /* Repeat while the next character is digit. */
2751
2752 if (!UPDATE(parser, 1)) return 0;
2753
2754 while (IS_DIGIT(parser))
2755 {
2756 /* Check if the number is too long. */
2757
2758 if (++length > MAX_NUMBER_LENGTH) {
2759 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2760 start_mark, "found extremely long version number");
2761 }
2762
2763 value = value*10 + AS_DIGIT(parser);
2764
2765 FORWARD(parser);
2766
2767 if (!UPDATE(parser, 1)) return 0;
2768 }
2769
2770 /* Check if the number was present. */
2771
2772 if (!length) {
2773 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2774 start_mark, "did not find expected version number");
2775 }
2776
2777 *number = value;
2778
2779 return 1;
2780}
2781
2782/*
2783 * Scan the value of a TAG-DIRECTIVE token.
2784 *
2785 * Scope:
2786 * %TAG !yaml! tag:yaml.org,2002: \n
2787 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2788 */
2789
2790static int
2791yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
2792 yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix)
2793{
2794 yaml_char_t *handle_value = NULL;
2795 yaml_char_t *prefix_value = NULL;
2796
2797 /* Eat whitespaces. */
2798
2799 if (!UPDATE(parser, 1)) goto error;
2800
2801 while (IS_BLANK(parser)) {
2802 FORWARD(parser);
2803 if (!UPDATE(parser, 1)) goto error;
2804 }
2805
2806 /* Scan a handle. */
2807
2808 if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value))
2809 goto error;
2810
2811 /* Expect a whitespace. */
2812
2813 if (!UPDATE(parser, 1)) goto error;
2814
2815 if (!IS_BLANK(parser)) {
2816 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2817 start_mark, "did not find expected whitespace");
2818 goto error;
2819 }
2820
2821 /* Eat whitespaces. */
2822
2823 while (IS_BLANK(parser)) {
2824 FORWARD(parser);
2825 if (!UPDATE(parser, 1)) goto error;
2826 }
2827
2828 /* Scan a prefix. */
2829
2830 if (!yaml_parser_scan_tag_uri(parser, 1, NULL, start_mark, &prefix_value))
2831 goto error;
2832
2833 /* Expect a whitespace or line break. */
2834
2835 if (!UPDATE(parser, 1)) goto error;
2836
2837 if (!IS_BLANKZ(parser)) {
2838 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2839 start_mark, "did not find expected whitespace or line break");
2840 goto error;
2841 }
2842
2843 *handle = handle_value;
2844 *prefix = prefix_value;
2845
2846 return 1;
2847
2848error:
2849 yaml_free(handle_value);
2850 yaml_free(prefix_value);
2851 return 0;
2852}
2853
2854static yaml_token_t *
2855yaml_parser_scan_anchor(yaml_parser_t *parser,
2856 yaml_token_type_t type)
2857{
2858 int length = 0;
2859 yaml_mark_t start_mark, end_mark;
2860 yaml_token_t *token = NULL;
2861 yaml_string_t string = yaml_parser_new_string(parser);
2862
2863 if (!string.buffer) goto error;
2864
2865 /* Eat the indicator character. */
2866
2867 start_mark = yaml_parser_get_mark(parser);
2868
2869 FORWARD(parser);
2870
2871 /* Consume the value. */
2872
2873 if (!UPDATE(parser, 1)) goto error;
2874
2875 while (IS_ALPHA(parser)) {
2876 if (!RESIZE(parser, string)) goto error;
2877 COPY(parser, string);
2878 if (!UPDATE(parser, 1)) goto error;
2879 length ++;
2880 }
2881
2882 end_mark = yaml_parser_get_mark(parser);
2883
2884 /*
2885 * Check if length of the anchor is greater than 0 and it is followed by
2886 * a whitespace character or one of the indicators:
2887 *
2888 * '?', ':', ',', ']', '}', '%', '@', '`'.
2889 */
2890
2891 if (!length || !(IS_BLANKZ(parser) || CHECK(parser, '?') || CHECK(parser, ':') ||
2892 CHECK(parser, ',') || CHECK(parser, ']') || CHECK(parser, '}') ||
2893 CHECK(parser, '%') || CHECK(parser, '@') || CHECK(parser, '`'))) {
2894 yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ?
2895 "while scanning an anchor" : "while scanning an alias", start_mark,
2896 "did not find expected alphabetic or numeric character");
2897 goto error;
2898 }
2899
2900 /* Create a token. */
2901
2902 token = type == YAML_ANCHOR_TOKEN ?
2903 yaml_anchor_token_new(string.buffer, start_mark, end_mark) :
2904 yaml_alias_token_new(string.buffer, start_mark, end_mark);
92d41fe1
KS
2905 if (!token) {
2906 parser->error = YAML_MEMORY_ERROR;
2907 return 0;
2908 }
e71095e3
KS
2909
2910 return token;
2911
2912error:
2913 yaml_free(string.buffer);
2914 yaml_free(token);
2915 return 0;
2916}
2917
2918/*
2919 * Scan a TAG token.
2920 */
2921
2922static yaml_token_t *
2923yaml_parser_scan_tag(yaml_parser_t *parser)
2924{
2925 yaml_char_t *handle = NULL;
2926 yaml_char_t *suffix = NULL;
2927 yaml_token_t *token = NULL;
2928 yaml_mark_t start_mark, end_mark;
2929
2930 start_mark = yaml_parser_get_mark(parser);
2931
2932 /* Check if the tag is in the canonical form. */
2933
2934 if (!UPDATE(parser, 2)) goto error;
2935
2936 if (CHECK_AT(parser, '<', 1))
2937 {
2938 /* Set the handle to '' */
2939
2940 handle = yaml_malloc(1);
2941 if (!handle) goto error;
2942 handle[0] = '\0';
2943
2944 /* Eat '!<' */
2945
2946 FORWARD(parser);
2947 FORWARD(parser);
2948
2949 /* Consume the tag value. */
2950
2951 if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
2952 goto error;
2953
2954 /* Check for '>' and eat it. */
2955
2956 if (!CHECK(parser, '>')) {
2957 yaml_parser_set_scanner_error(parser, "while scanning a tag",
2958 start_mark, "did not find the expected '>'");
2959 goto error;
2960 }
2961
2962 FORWARD(parser);
2963 }
2964 else
2965 {
2966 /* The tag has either the '!suffix' or the '!handle!suffix' form. */
2967
2968 /* First, try to scan a handle. */
2969
2970 if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle))
2971 goto error;
2972
2973 /* Check if it is, indeed, handle. */
2974
2975 if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!')
2976 {
2977 /* Scan the suffix now. */
2978
2979 if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
2980 goto error;
2981 }
2982 else
2983 {
2984 /* It wasn't a handle after all. Scan the rest of the tag. */
2985
2986 if (!yaml_parser_scan_tag_uri(parser, 0, handle, start_mark, &suffix))
2987 goto error;
2988
2989 /* Set the handle to '!'. */
2990
2991 yaml_free(handle);
2992 handle = yaml_malloc(2);
2993 if (!handle) goto error;
2994 handle[0] = '!';
2995 handle[1] = '\0';
2996 }
2997 }
2998
2999 /* Check the character which ends the tag. */
3000
3001 if (!UPDATE(parser, 1)) goto error;
3002
3003 if (!IS_BLANKZ(parser)) {
3004 yaml_parser_set_scanner_error(parser, "while scanning a tag",
3005 start_mark, "did not found expected whitespace or line break");
3006 goto error;
3007 }
3008
3009 end_mark = yaml_parser_get_mark(parser);
3010
3011 /* Create a token. */
3012
3013 token = yaml_tag_token_new(handle, suffix, start_mark, end_mark);
92d41fe1
KS
3014 if (!token) {
3015 parser->error = YAML_MEMORY_ERROR;
3016 return 0;
3017 }
e71095e3
KS
3018
3019 return token;
3020
3021error:
3022 yaml_free(handle);
3023 yaml_free(suffix);
3024 return NULL;
3025}
3026
3027/*
3028 * Scan a tag handle.
3029 */
3030
3031static int
3032yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
3033 yaml_mark_t start_mark, yaml_char_t **handle)
3034{
3035 yaml_string_t string = yaml_parser_new_string(parser);
3036
3037 if (!string.buffer) goto error;
3038
3039 /* Check the initial '!' character. */
3040
3041 if (!UPDATE(parser, 1)) goto error;
3042
3043 if (!CHECK(parser, '!')) {
3044 yaml_parser_set_scanner_error(parser, directive ?
3045 "while scanning a tag directive" : "while scanning a tag",
3046 start_mark, "did not find expected '!'");
3047 goto error;
3048 }
3049
3050 /* Copy the '!' character. */
3051
3052 COPY(parser, string);
3053
3054 /* Copy all subsequent alphabetical and numerical characters. */
3055
3056 if (!UPDATE(parser, 1)) goto error;
3057
3058 while (IS_ALPHA(parser))
3059 {
3060 if (!RESIZE(parser, string)) goto error;
3061 COPY(parser, string);
3062 if (!UPDATE(parser, 1)) goto error;
3063 }
3064
3065 /* Check if the trailing character is '!' and copy it. */
3066
3067 if (CHECK(parser, '!'))
3068 {
3069 if (!RESIZE(parser, string)) goto error;
3070 COPY(parser, string);
3071 }
3072 else
3073 {
3074 /*
3075 * It's not really a tag handle. If it's a %TAG directive, it's an
3076 * error. If it's a tag token, it must be a part of URI.
3077 */
3078
3079 if (directive) {
3080 yaml_parser_set_scanner_error(parser, "while parsing a directive",
3081 start_mark, "did not find expected '!'");
3082 goto error;
3083 }
3084 }
3085
3086 *handle = string.buffer;
3087
3088 return 1;
3089
3090error:
3091 yaml_free(string.buffer);
3092 return 0;
3093}
3094
3095/*
3096 * Scan a tag.
3097 */
3098
3099static int
3100yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive,
3101 yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
3102{
3103 size_t length = head ? strlen((char *)head) : 0;
3104 yaml_string_t string = yaml_parser_new_string(parser);
3105
3106 if (!string.buffer) goto error;
3107
3108 /* Resize the string to include the head. */
3109
3110 while (string.size <= length) {
3111 if (!yaml_parser_resize_string(parser, &string)) goto error;
3112 }
3113
3114 /* Copy the head if needed. */
3115
3116 if (length) {
3117 memcpy(string.buffer, head, length);
3118 string.pointer += length;
3119 }
3120
3121 /* Scan the tag. */
3122
3123 if (!UPDATE(parser, 1)) goto error;
3124
3125 /*
3126 * The set of characters that may appear in URI is as follows:
3127 *
3128 * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
3129 * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
3130 * '%'.
3131 */
3132
3133 while (IS_ALPHA(parser) || CHECK(parser, ';') || CHECK(parser, '/') ||
3134 CHECK(parser, '?') || CHECK(parser, ':') || CHECK(parser, '@') ||
3135 CHECK(parser, '&') || CHECK(parser, '=') || CHECK(parser, '+') ||
3136 CHECK(parser, '$') || CHECK(parser, ',') || CHECK(parser, '.') ||
3137 CHECK(parser, '!') || CHECK(parser, '~') || CHECK(parser, '*') ||
3138 CHECK(parser, '\'') || CHECK(parser, '(') || CHECK(parser, ')') ||
3139 CHECK(parser, '[') || CHECK(parser, ']') || CHECK(parser, '%'))
3140 {
3141 if (!RESIZE(parser, string)) goto error;
3142
3143 /* Check if it is a URI-escape sequence. */
3144
3145 if (CHECK(parser, '%')) {
3146 if (!yaml_parser_scan_uri_escapes(parser,
3147 directive, start_mark, &string)) goto error;
3148 }
3149 else {
3150 COPY(parser, string);
3151 }
3152
3153 length ++;
3154 if (!UPDATE(parser, 1)) goto error;
3155 }
3156
3157 /* Check if the tag is non-empty. */
3158
3159 if (!length) {
3160 yaml_parser_set_scanner_error(parser, directive ?
3161 "while parsing a %TAG directive" : "while parsing a tag",
3162 start_mark, "did not find expected tag URI");
3163 goto error;
3164 }
3165
3166 *uri = string.buffer;
3167
3168 return 1;
3169
3170error:
3171 yaml_free(string.buffer);
3172 return 0;
3173}
3174
3175/*
3176 * Decode an URI-escape sequence corresponding to a single UTF-8 character.
3177 */
3178
3179static int
3180yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
3181 yaml_mark_t start_mark, yaml_string_t *string)
3182{
3183 int width = 0;
3184
3185 /* Decode the required number of characters. */
3186
3187 do {
3188
3189 unsigned char octet = 0;
3190
3191 /* Check for a URI-escaped octet. */
3192
3193 if (!UPDATE(parser, 3)) return 0;
3194
3195 if (!(CHECK(parser, '%') && IS_HEX_AT(parser, 1) && IS_HEX_AT(parser, 2))) {
3196 return yaml_parser_set_scanner_error(parser, directive ?
3197 "while parsing a %TAG directive" : "while parsing a tag",
3198 start_mark, "did not find URI escaped octet");
3199 }
3200
3201 /* Get the octet. */
3202
3203 octet = (AS_HEX_AT(parser, 1) << 4) + AS_HEX_AT(parser, 2);
3204
3205 /* If it is the leading octet, determine the length of the UTF-8 sequence. */
3206
3207 if (!width)
3208 {
3209 width = (octet & 0x80) == 0x00 ? 1 :
3210 (octet & 0xE0) == 0xC0 ? 2 :
3211 (octet & 0xF0) == 0xE0 ? 3 :
3212 (octet & 0xF8) == 0xF0 ? 4 : 0;
3213 if (!width) {
3214 return yaml_parser_set_scanner_error(parser, directive ?
3215 "while parsing a %TAG directive" : "while parsing a tag",
3216 start_mark, "found an incorrect leading UTF-8 octet");
3217 }
3218 }
3219 else
3220 {
3221 /* Check if the trailing octet is correct. */
3222
3223 if ((octet & 0xC0) != 0x80) {
3224 return yaml_parser_set_scanner_error(parser, directive ?
3225 "while parsing a %TAG directive" : "while parsing a tag",
3226 start_mark, "found an incorrect trailing UTF-8 octet");
3227 }
3228 }
3229
3230 /* Copy the octet and move the pointers. */
3231
3232 *(string->pointer++) = octet;
3233 FORWARD(parser);
3234 FORWARD(parser);
3235 FORWARD(parser);
3236
3237 } while (--width);
3238
3239 return 1;
3240}
3241
92d41fe1
KS
3242/*
3243 * Scan a block scalar.
3244 */
3245
3246static yaml_token_t *
3247yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal)
3248{
3249 yaml_mark_t start_mark;
3250 yaml_mark_t end_mark;
3251 yaml_string_t string = yaml_parser_new_string(parser);
3252 yaml_string_t line_break = yaml_parser_new_string(parser);
3253 yaml_string_t breaks = yaml_parser_new_string(parser);
3254 yaml_token_t *token = NULL;
3255 int chomping = 0;
3256 int increment = 0;
3257 int indent = 0;
3258 int leading_blank = 0;
3259 int trailing_blank = 0;
3260
3261 if (!string.buffer) goto error;
3262 if (!line_break.buffer) goto error;
3263 if (!breaks.buffer) goto error;
3264
3265 /* Eat the indicator '|' or '>'. */
3266
3267 start_mark = yaml_parser_get_mark(parser);
3268
3269 FORWARD(parser);
3270
3271 /* Scan the additional block scalar indicators. */
3272
3273 if (!UPDATE(parser, 1)) goto error;
3274
3275 /* Check for a chomping indicator. */
3276
3277 if (CHECK(parser, '+') || CHECK(parser, '-'))
3278 {
3279 /* Set the chomping method and eat the indicator. */
3280
3281 chomping = CHECK(parser, '+') ? +1 : -1;
3282
3283 FORWARD(parser);
3284
3285 /* Check for an indentation indicator. */
3286
3287 if (!UPDATE(parser, 1)) goto error;
3288
3289 if (IS_DIGIT(parser))
3290 {
3291 /* Check that the intendation is greater than 0. */
3292
3293 if (CHECK(parser, '0')) {
3294 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
3295 start_mark, "found an intendation indicator equal to 0");
3296 goto error;
3297 }
3298
3299 /* Get the intendation level and eat the indicator. */
3300
3301 increment = AS_DIGIT(parser);
3302
3303 FORWARD(parser);
3304 }
3305 }
3306
3307 /* Do the same as above, but in the opposite order. */
3308
3309 else if (IS_DIGIT(parser))
3310 {
3311 if (CHECK(parser, '0')) {
3312 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
3313 start_mark, "found an intendation indicator equal to 0");
3314 goto error;
3315 }
3316
3317 increment = AS_DIGIT(parser);
3318
3319 FORWARD(parser);
3320
3321 if (!UPDATE(parser, 1)) goto error;
3322
3323 if (CHECK(parser, '+') || CHECK(parser, '-')) {
3324 chomping = CHECK(parser, '+') ? +1 : -1;
3325 FORWARD(parser);
3326 }
3327 }
3328
3329 /* Eat whitespaces and comments to the end of the line. */
3330
3331 if (!UPDATE(parser, 1)) goto error;
3332
3333 while (IS_BLANK(parser)) {
3334 FORWARD(parser);
3335 if (!UPDATE(parser, 1)) goto error;
3336 }
3337
3338 if (CHECK(parser, '#')) {
3339 while (!IS_BREAKZ(parser)) {
3340 FORWARD(parser);
3341 if (!UPDATE(parser, 1)) goto error;
3342 }
3343 }
3344
3345 /* Check if we are at the end of the line. */
3346
3347 if (!IS_BREAKZ(parser)) {
3348 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
3349 start_mark, "did not found expected comment or line break");
3350 goto error;
3351 }
3352
3353 /* Eat a line break. */
3354
3355 if (IS_BREAK(parser)) {
3356 if (!UPDATE(parser, 2)) goto error;
3357 FORWARD_LINE(parser);
3358 }
3359
3360 end_mark = yaml_parser_get_mark(parser);
3361
3362 /* Set the intendation level if it was specified. */
3363
3364 if (increment) {
3365 indent = parser->indent >= 0 ? parser->indent+increment : increment;
3366 }
3367
3368 /* Scan the leading line breaks and determine the indentation level if needed. */
3369
3370 if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &breaks,
3371 start_mark, &end_mark)) goto error;
3372
3373 /* Scan the block scalar content. */
3374
3375 if (!UPDATE(parser, 1)) goto error;
3376
3377 while (parser->column == indent && !IS_Z(parser))
3378 {
3379 /*
3380 * We are at the beginning of a non-empty line.
3381 */
3382
3383 /* Is it a trailing whitespace? */
3384
3385 trailing_blank = IS_BLANK(parser);
3386
3387 /* Check if we need to fold the leading line break. */
3388
3389 if (!literal && (*line_break.buffer == '\n')
3390 && !leading_blank && !trailing_blank)
3391 {
3392 /* Do we need to join the lines by space? */
3393
3394 if (*breaks.buffer == '\0') {
3395 if (!RESIZE(parser, string)) goto error;
3396 *(string.pointer ++) = ' ';
3397 }
3398
3399 yaml_parser_clear_string(parser, &line_break);
3400 }
3401 else {
3402 if (!JOIN(parser, string, line_break)) goto error;
3403 }
3404
3405 /* Append the remaining line breaks. */
3406
3407 if (!JOIN(parser, string, breaks)) goto error;
3408
3409 /* Is it a leading whitespace? */
3410
3411 leading_blank = IS_BLANK(parser);
3412
3413 /* Consume the current line. */
3414
3415 while (!IS_BREAKZ(parser)) {
3416 if (!RESIZE(parser, string)) goto error;
3417 COPY(parser, string);
3418 if (!UPDATE(parser, 1)) goto error;
3419 }
3420
3421 /* Consume the line break. */
3422
3423 if (!UPDATE(parser, 2)) goto error;
3424
3425 COPY_LINE(parser, line_break);
3426
3427 /* Eat the following intendation spaces and line breaks. */
3428
3429 if (!yaml_parser_scan_block_scalar_breaks(parser,
3430 &indent, &breaks, start_mark, &end_mark)) goto error;
3431 }
3432
3433 /* Chomp the tail. */
3434
3435 if (chomping != -1) {
3436 if (!JOIN(parser, string, line_break)) goto error;
3437 }
3438 if (chomping == 1) {
3439 if (!JOIN(parser, string, breaks)) goto error;
3440 }
3441
3442 /* Create a token. */
3443
3444 token = yaml_scalar_token_new(string.buffer, string.pointer-string.buffer,
3445 literal ? YAML_LITERAL_SCALAR_STYLE : YAML_FOLDED_SCALAR_STYLE,
3446 start_mark, end_mark);
3447 if (!token) {
3448 parser->error = YAML_MEMORY_ERROR;
3449 return 0;
3450 }
3451
3452 yaml_free(line_break.buffer);
3453 yaml_free(breaks.buffer);
3454
3455 return token;
3456
3457error:
3458 yaml_free(string.buffer);
3459 yaml_free(line_break.buffer);
3460 yaml_free(breaks.buffer);
3461
3462 return NULL;
3463}
3464
3465/*
3466 * Scan intendation spaces and line breaks for a block scalar. Determine the
3467 * intendation level if needed.
3468 */
3469
3470static int
3471yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
3472 int *indent, yaml_string_t *breaks,
3473 yaml_mark_t start_mark, yaml_mark_t *end_mark)
3474{
3475 int max_indent = 0;
3476
3477 *end_mark = yaml_parser_get_mark(parser);
3478
3479 /* Eat the intendation spaces and line breaks. */
3480
3481 while (1)
3482 {
3483 /* Eat the intendation spaces. */
3484
3485 if (!UPDATE(parser, 1)) return 0;
3486
3487 while ((!*indent || parser->column < *indent) && IS_SPACE(parser)) {
3488 FORWARD(parser);
3489 if (!UPDATE(parser, 1)) return 0;
3490 }
3491
3492 if (parser->column > max_indent)
3493 max_indent = parser->column;
3494
3495 /* Check for a tab character messing the intendation. */
3496
3497 if ((!*indent || parser->column < *indent) && IS_TAB(parser)) {
3498 return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
3499 start_mark, "found a tab character where an intendation space is expected");
3500 }
3501
3502 /* Have we found a non-empty line? */
3503
3504 if (!IS_BREAK(parser)) break;
3505
3506 /* Consume the line break. */
3507
3508 if (!UPDATE(parser, 2)) return 0;
3509 if (!RESIZE(parser, *breaks)) return 0;
3510 COPY_LINE(parser, *breaks);
3511 *end_mark = yaml_parser_get_mark(parser);
3512 }
3513
3514 /* Determine the indentation level if needed. */
3515
3516 if (!*indent) {
3517 *indent = max_indent;
3518 if (*indent < parser->indent + 1)
3519 *indent = parser->indent + 1;
3520 if (*indent < 1)
3521 *indent = 1;
3522 }
3523
3524 return 1;
3525}
3526
This page took 0.567766 seconds and 5 git commands to generate.