]> andersk Git - libyaml.git/blame - src/scanner.c
Complete the Parser (it requires refactoring though) and fix some bugs.
[libyaml.git] / src / scanner.c
CommitLineData
03be97ab
KS
1
2/*
3 * Introduction
4 * ************
5 *
6 * The following notes assume that you are familiar with the YAML specification
7 * (http://yaml.org/spec/cvs/current.html). We mostly follow it, although in
8 * some cases we are less restrictive that it requires.
9 *
10 * The process of transforming a YAML stream into a sequence of events is
11 * divided on two steps: Scanning and Parsing.
12 *
13 * The Scanner transforms the input stream into a sequence of tokens, while the
14 * parser transform the sequence of tokens produced by the Scanner into a
15 * sequence of parsing events.
16 *
17 * The Scanner is rather clever and complicated. The Parser, on the contrary,
18 * is a straightforward implementation of a recursive-descendant parser (or,
19 * LL(1) parser, as it is usually called).
20 *
21 * Actually there are two issues of Scanning that might be called "clever", the
22 * rest is quite straightforward. The issues are "block collection start" and
23 * "simple keys". Both issues are explained below in details.
24 *
25 * Here the Scanning step is explained and implemented. We start with the list
26 * of all the tokens produced by the Scanner together with short descriptions.
27 *
28 * Now, tokens:
29 *
30 * STREAM-START(encoding) # The stream start.
31 * STREAM-END # The stream end.
32 * VERSION-DIRECTIVE(major,minor) # The '%YAML' directive.
33 * TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive.
34 * DOCUMENT-START # '---'
35 * DOCUMENT-END # '...'
36 * BLOCK-SEQUENCE-START # Indentation increase denoting a block
37 * BLOCK-MAPPING-START # sequence or a block mapping.
38 * BLOCK-END # Indentation decrease.
39 * FLOW-SEQUENCE-START # '['
40 * FLOW-SEQUENCE-END # ']'
41 * BLOCK-SEQUENCE-START # '{'
42 * BLOCK-SEQUENCE-END # '}'
43 * BLOCK-ENTRY # '-'
44 * FLOW-ENTRY # ','
45 * KEY # '?' or nothing (simple keys).
46 * VALUE # ':'
47 * ALIAS(anchor) # '*anchor'
48 * ANCHOR(anchor) # '&anchor'
49 * TAG(handle,suffix) # '!handle!suffix'
50 * SCALAR(value,style) # A scalar.
51 *
52 * The following two tokens are "virtual" tokens denoting the beginning and the
53 * end of the stream:
54 *
55 * STREAM-START(encoding)
56 * STREAM-END
57 *
58 * We pass the information about the input stream encoding with the
59 * STREAM-START token.
60 *
61 * The next two tokens are responsible for tags:
62 *
63 * VERSION-DIRECTIVE(major,minor)
64 * TAG-DIRECTIVE(handle,prefix)
65 *
66 * Example:
67 *
68 * %YAML 1.1
69 * %TAG ! !foo
70 * %TAG !yaml! tag:yaml.org,2002:
71 * ---
72 *
73 * The correspoding sequence of tokens:
74 *
75 * STREAM-START(utf-8)
76 * VERSION-DIRECTIVE(1,1)
77 * TAG-DIRECTIVE("!","!foo")
78 * TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
79 * DOCUMENT-START
80 * STREAM-END
81 *
82 * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
83 * line.
84 *
85 * The document start and end indicators are represented by:
86 *
87 * DOCUMENT-START
88 * DOCUMENT-END
89 *
90 * Note that if a YAML stream contains an implicit document (without '---'
91 * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
92 * produced.
93 *
94 * In the following examples, we present whole documents together with the
95 * produced tokens.
96 *
97 * 1. An implicit document:
98 *
99 * 'a scalar'
100 *
101 * Tokens:
102 *
103 * STREAM-START(utf-8)
104 * SCALAR("a scalar",single-quoted)
105 * STREAM-END
106 *
107 * 2. An explicit document:
108 *
109 * ---
110 * 'a scalar'
111 * ...
112 *
113 * Tokens:
114 *
115 * STREAM-START(utf-8)
116 * DOCUMENT-START
117 * SCALAR("a scalar",single-quoted)
118 * DOCUMENT-END
119 * STREAM-END
120 *
121 * 3. Several documents in a stream:
122 *
123 * 'a scalar'
124 * ---
125 * 'another scalar'
126 * ---
127 * 'yet another scalar'
128 *
129 * Tokens:
130 *
131 * STREAM-START(utf-8)
132 * SCALAR("a scalar",single-quoted)
133 * DOCUMENT-START
134 * SCALAR("another scalar",single-quoted)
135 * DOCUMENT-START
136 * SCALAR("yet another scalar",single-quoted)
137 * STREAM-END
138 *
139 * We have already introduced the SCALAR token above. The following tokens are
140 * used to describe aliases, anchors, tag, and scalars:
141 *
142 * ALIAS(anchor)
143 * ANCHOR(anchor)
144 * TAG(handle,suffix)
145 * SCALAR(value,style)
146 *
147 * The following series of examples illustrate the usage of these tokens:
148 *
149 * 1. A recursive sequence:
150 *
151 * &A [ *A ]
152 *
153 * Tokens:
154 *
155 * STREAM-START(utf-8)
156 * ANCHOR("A")
157 * FLOW-SEQUENCE-START
158 * ALIAS("A")
159 * FLOW-SEQUENCE-END
160 * STREAM-END
161 *
162 * 2. A tagged scalar:
163 *
164 * !!float "3.14" # A good approximation.
165 *
166 * Tokens:
167 *
168 * STREAM-START(utf-8)
169 * TAG("!!","float")
170 * SCALAR("3.14",double-quoted)
171 * STREAM-END
172 *
173 * 3. Various scalar styles:
174 *
175 * --- # Implicit empty plain scalars do not produce tokens.
176 * --- a plain scalar
177 * --- 'a single-quoted scalar'
178 * --- "a double-quoted scalar"
179 * --- |-
180 * a literal scalar
181 * --- >-
182 * a folded
183 * scalar
184 *
185 * Tokens:
186 *
187 * STREAM-START(utf-8)
188 * DOCUMENT-START
189 * DOCUMENT-START
190 * SCALAR("a plain scalar",plain)
191 * DOCUMENT-START
192 * SCALAR("a single-quoted scalar",single-quoted)
193 * DOCUMENT-START
194 * SCALAR("a double-quoted scalar",double-quoted)
195 * DOCUMENT-START
196 * SCALAR("a literal scalar",literal)
197 * DOCUMENT-START
198 * SCALAR("a folded scalar",folded)
199 * STREAM-END
200 *
201 * Now it's time to review collection-related tokens. We will start with
202 * flow collections:
203 *
204 * FLOW-SEQUENCE-START
205 * FLOW-SEQUENCE-END
206 * FLOW-MAPPING-START
207 * FLOW-MAPPING-END
208 * FLOW-ENTRY
209 * KEY
210 * VALUE
211 *
212 * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
213 * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
214 * correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the
215 * indicators '?' and ':', which are used for denoting mapping keys and values,
216 * are represented by the KEY and VALUE tokens.
217 *
218 * The following examples show flow collections:
219 *
220 * 1. A flow sequence:
221 *
222 * [item 1, item 2, item 3]
223 *
224 * Tokens:
225 *
226 * STREAM-START(utf-8)
227 * FLOW-SEQUENCE-START
228 * SCALAR("item 1",plain)
229 * FLOW-ENTRY
230 * SCALAR("item 2",plain)
231 * FLOW-ENTRY
232 * SCALAR("item 3",plain)
233 * FLOW-SEQUENCE-END
234 * STREAM-END
235 *
236 * 2. A flow mapping:
237 *
238 * {
239 * a simple key: a value, # Note that the KEY token is produced.
240 * ? a complex key: another value,
241 * }
242 *
243 * Tokens:
244 *
245 * STREAM-START(utf-8)
246 * FLOW-MAPPING-START
247 * KEY
248 * SCALAR("a simple key",plain)
249 * VALUE
250 * SCALAR("a value",plain)
251 * FLOW-ENTRY
252 * KEY
253 * SCALAR("a complex key",plain)
254 * VALUE
255 * SCALAR("another value",plain)
256 * FLOW-ENTRY
257 * FLOW-MAPPING-END
258 * STREAM-END
259 *
260 * A simple key is a key which is not denoted by the '?' indicator. Note that
261 * the Scanner still produce the KEY token whenever it encounters a simple key.
262 *
263 * For scanning block collections, the following tokens are used (note that we
264 * repeat KEY and VALUE here):
265 *
266 * BLOCK-SEQUENCE-START
267 * BLOCK-MAPPING-START
268 * BLOCK-END
269 * BLOCK-ENTRY
270 * KEY
271 * VALUE
272 *
273 * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
274 * increase that precedes a block collection (cf. the INDENT token in Python).
275 * The token BLOCK-END denote indentation decrease that ends a block collection
276 * (cf. the DEDENT token in Python). However YAML has some syntax pecularities
277 * that makes detections of these tokens more complex.
278 *
279 * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
280 * '-', '?', and ':' correspondingly.
281 *
282 * The following examples show how the tokens BLOCK-SEQUENCE-START,
283 * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
284 *
285 * 1. Block sequences:
286 *
287 * - item 1
288 * - item 2
289 * -
290 * - item 3.1
291 * - item 3.2
292 * -
293 * key 1: value 1
294 * key 2: value 2
295 *
296 * Tokens:
297 *
298 * STREAM-START(utf-8)
299 * BLOCK-SEQUENCE-START
300 * BLOCK-ENTRY
301 * SCALAR("item 1",plain)
302 * BLOCK-ENTRY
303 * SCALAR("item 2",plain)
304 * BLOCK-ENTRY
305 * BLOCK-SEQUENCE-START
306 * BLOCK-ENTRY
307 * SCALAR("item 3.1",plain)
308 * BLOCK-ENTRY
309 * SCALAR("item 3.2",plain)
310 * BLOCK-END
311 * BLOCK-ENTRY
312 * BLOCK-MAPPING-START
313 * KEY
314 * SCALAR("key 1",plain)
315 * VALUE
316 * SCALAR("value 1",plain)
317 * KEY
318 * SCALAR("key 2",plain)
319 * VALUE
320 * SCALAR("value 2",plain)
321 * BLOCK-END
322 * BLOCK-END
323 * STREAM-END
324 *
325 * 2. Block mappings:
326 *
327 * a simple key: a value # The KEY token is produced here.
328 * ? a complex key
329 * : another value
330 * a mapping:
331 * key 1: value 1
332 * key 2: value 2
333 * a sequence:
334 * - item 1
335 * - item 2
336 *
337 * Tokens:
338 *
339 * STREAM-START(utf-8)
340 * BLOCK-MAPPING-START
341 * KEY
342 * SCALAR("a simple key",plain)
343 * VALUE
344 * SCALAR("a value",plain)
345 * KEY
346 * SCALAR("a complex key",plain)
347 * VALUE
348 * SCALAR("another value",plain)
349 * KEY
350 * SCALAR("a mapping",plain)
351 * BLOCK-MAPPING-START
352 * KEY
353 * SCALAR("key 1",plain)
354 * VALUE
355 * SCALAR("value 1",plain)
356 * KEY
357 * SCALAR("key 2",plain)
358 * VALUE
359 * SCALAR("value 2",plain)
360 * BLOCK-END
361 * KEY
362 * SCALAR("a sequence",plain)
363 * VALUE
364 * BLOCK-SEQUENCE-START
365 * BLOCK-ENTRY
366 * SCALAR("item 1",plain)
367 * BLOCK-ENTRY
368 * SCALAR("item 2",plain)
369 * BLOCK-END
370 * BLOCK-END
371 * STREAM-END
372 *
373 * YAML does not always require to start a new block collection from a new
374 * line. If the current line contains only '-', '?', and ':' indicators, a new
375 * block collection may start at the current line. The following examples
376 * illustrate this case:
377 *
378 * 1. Collections in a sequence:
379 *
380 * - - item 1
381 * - item 2
382 * - key 1: value 1
383 * key 2: value 2
384 * - ? complex key
385 * : complex value
386 *
387 * Tokens:
388 *
389 * STREAM-START(utf-8)
390 * BLOCK-SEQUENCE-START
391 * BLOCK-ENTRY
392 * BLOCK-SEQUENCE-START
393 * BLOCK-ENTRY
394 * SCALAR("item 1",plain)
395 * BLOCK-ENTRY
396 * SCALAR("item 2",plain)
397 * BLOCK-END
398 * BLOCK-ENTRY
399 * BLOCK-MAPPING-START
400 * KEY
401 * SCALAR("key 1",plain)
402 * VALUE
403 * SCALAR("value 1",plain)
404 * KEY
405 * SCALAR("key 2",plain)
406 * VALUE
407 * SCALAR("value 2",plain)
408 * BLOCK-END
409 * BLOCK-ENTRY
410 * BLOCK-MAPPING-START
411 * KEY
412 * SCALAR("complex key")
413 * VALUE
414 * SCALAR("complex value")
415 * BLOCK-END
416 * BLOCK-END
417 * STREAM-END
418 *
419 * 2. Collections in a mapping:
420 *
421 * ? a sequence
422 * : - item 1
423 * - item 2
424 * ? a mapping
425 * : key 1: value 1
426 * key 2: value 2
427 *
428 * Tokens:
429 *
430 * STREAM-START(utf-8)
431 * BLOCK-MAPPING-START
432 * KEY
433 * SCALAR("a sequence",plain)
434 * VALUE
435 * BLOCK-SEQUENCE-START
436 * BLOCK-ENTRY
437 * SCALAR("item 1",plain)
438 * BLOCK-ENTRY
439 * SCALAR("item 2",plain)
440 * BLOCK-END
441 * KEY
442 * SCALAR("a mapping",plain)
443 * VALUE
444 * BLOCK-MAPPING-START
445 * KEY
446 * SCALAR("key 1",plain)
447 * VALUE
448 * SCALAR("value 1",plain)
449 * KEY
450 * SCALAR("key 2",plain)
451 * VALUE
452 * SCALAR("value 2",plain)
453 * BLOCK-END
454 * BLOCK-END
455 * STREAM-END
456 *
457 * YAML also permits non-indented sequences if they are included into a block
458 * mapping. In this case, the token BLOCK-SEQUENCE-START is not produced:
459 *
460 * key:
461 * - item 1 # BLOCK-SEQUENCE-START is NOT produced here.
462 * - item 2
463 *
464 * Tokens:
465 *
466 * STREAM-START(utf-8)
467 * BLOCK-MAPPING-START
468 * KEY
469 * SCALAR("key",plain)
470 * VALUE
471 * BLOCK-ENTRY
472 * SCALAR("item 1",plain)
473 * BLOCK-ENTRY
474 * SCALAR("item 2",plain)
475 * BLOCK-END
476 */
477
478#if HAVE_CONFIG_H
479#include <config.h>
480#endif
481
5eff53a4 482#include <yaml.h>
03be97ab
KS
483
484#include <assert.h>
485
f2b59d4d
KS
486/*
487 * Ensure that the buffer contains the required number of characters.
488 * Return 1 on success, 0 on failure (reader error or memory error).
489 */
490
491#define UPDATE(parser,length) \
492 (parser->unread >= (length) \
493 ? 1 \
494 : yaml_parser_update_buffer(parser, (length)))
495
496/*
497 * Check the octet at the specified position.
498 */
499
500#define CHECK_AT(parser,octet,offset) \
eb9cceb5 501 (parser->pointer[offset] == (yaml_char_t)(octet))
f2b59d4d
KS
502
503/*
504 * Check the current octet in the buffer.
505 */
506
507#define CHECK(parser,octet) CHECK_AT(parser,(octet),0)
508
e71095e3
KS
509/*
510 * Check if the character at the specified position is an alphabetical
511 * character, a digit, '_', or '-'.
512 */
513
514#define IS_ALPHA_AT(parser,offset) \
515 ((parser->pointer[offset] >= (yaml_char_t) '0' && \
516 parser->pointer[offset] <= (yaml_char_t) '9') || \
517 (parser->pointer[offset] >= (yaml_char_t) 'A' && \
518 parser->pointer[offset] <= (yaml_char_t) 'Z') || \
519 (parser->pointer[offset] >= (yaml_char_t) 'a' && \
520 parser->pointer[offset] <= (yaml_char_t) 'z') || \
521 parser->pointer[offset] == '_' || \
522 parser->pointer[offset] == '-')
523
524#define IS_ALPHA(parser) IS_ALPHA_AT(parser,0)
525
526/*
527 * Check if the character at the specified position is a digit.
528 */
529
530#define IS_DIGIT_AT(parser,offset) \
531 ((parser->pointer[offset] >= (yaml_char_t) '0' && \
532 parser->pointer[offset] <= (yaml_char_t) '9'))
533
534#define IS_DIGIT(parser) IS_DIGIT_AT(parser,0)
535
536/*
537 * Get the value of a digit.
538 */
539
540#define AS_DIGIT_AT(parser,offset) \
541 (parser->pointer[offset] - (yaml_char_t) '0')
542
543#define AS_DIGIT(parser) AS_DIGIT_AT(parser,0)
544
545/*
546 * Check if the character at the specified position is a hex-digit.
547 */
548
549#define IS_HEX_AT(parser,offset) \
550 ((parser->pointer[offset] >= (yaml_char_t) '0' && \
551 parser->pointer[offset] <= (yaml_char_t) '9') || \
552 (parser->pointer[offset] >= (yaml_char_t) 'A' && \
553 parser->pointer[offset] <= (yaml_char_t) 'F') || \
554 (parser->pointer[offset] >= (yaml_char_t) 'a' && \
555 parser->pointer[offset] <= (yaml_char_t) 'f'))
556
557#define IS_HEX(parser) IS_HEX_AT(parser,0)
558
559/*
560 * Get the value of a hex-digit.
561 */
562
563#define AS_HEX_AT(parser,offset) \
564 ((parser->pointer[offset] >= (yaml_char_t) 'A' && \
565 parser->pointer[offset] <= (yaml_char_t) 'F') ? \
566 (parser->pointer[offset] - (yaml_char_t) 'A' + 10) : \
567 (parser->pointer[offset] >= (yaml_char_t) 'a' && \
568 parser->pointer[offset] <= (yaml_char_t) 'f') ? \
569 (parser->pointer[offset] - (yaml_char_t) 'a' + 10) : \
570 (parser->pointer[offset] - (yaml_char_t) '0'))
571
572#define AS_HEX(parser) AS_HEX_AT(parser,0)
573
f2b59d4d
KS
574/*
575 * Check if the character at the specified position is NUL.
576 */
577
578#define IS_Z_AT(parser,offset) CHECK_AT(parser,'\0',(offset))
579
580#define IS_Z(parser) IS_Z_AT(parser,0)
581
e71095e3
KS
582/*
583 * Check if the character at the specified position is BOM.
584 */
585
586#define IS_BOM_AT(parser,offset) \
587 (CHECK_AT(parser,'\xEF',(offset)) \
588 && CHECK_AT(parser,'\xBB',(offset)+1) \
589 && CHECK_AT(parser,'\xBF',(offset)+1)) /* BOM (#xFEFF) */
590
591#define IS_BOM(parser) IS_BOM_AT(parser,0)
592
f2b59d4d
KS
593/*
594 * Check if the character at the specified position is space.
595 */
596
597#define IS_SPACE_AT(parser,offset) CHECK_AT(parser,' ',(offset))
598
599#define IS_SPACE(parser) IS_SPACE_AT(parser,0)
600
601/*
602 * Check if the character at the specified position is tab.
603 */
604
605#define IS_TAB_AT(parser,offset) CHECK_AT(parser,'\t',(offset))
606
607#define IS_TAB(parser) IS_TAB_AT(parser,0)
608
609/*
610 * Check if the character at the specified position is blank (space or tab).
611 */
612
613#define IS_BLANK_AT(parser,offset) \
614 (IS_SPACE_AT(parser,(offset)) || IS_TAB_AT(parser,(offset)))
615
616#define IS_BLANK(parser) IS_BLANK_AT(parser,0)
617
618/*
619 * Check if the character at the specified position is a line break.
620 */
621
622#define IS_BREAK_AT(parser,offset) \
623 (CHECK_AT(parser,'\r',(offset)) /* CR (#xD)*/ \
624 || CHECK_AT(parser,'\n',(offset)) /* LF (#xA) */ \
625 || (CHECK_AT(parser,'\xC2',(offset)) \
e71095e3 626 && CHECK_AT(parser,'\x85',(offset)+1)) /* NEL (#x85) */ \
f2b59d4d 627 || (CHECK_AT(parser,'\xE2',(offset)) \
e71095e3
KS
628 && CHECK_AT(parser,'\x80',(offset)+1) \
629 && CHECK_AT(parser,'\xA8',(offset)+2)) /* LS (#x2028) */ \
f2b59d4d 630 || (CHECK_AT(parser,'\xE2',(offset)) \
e71095e3 631 && CHECK_AT(parser,'\x80',(offset)+1) \
92d41fe1 632 && CHECK_AT(parser,'\xA9',(offset)+2))) /* PS (#x2029) */
f2b59d4d
KS
633
634#define IS_BREAK(parser) IS_BREAK_AT(parser,0)
635
eb9cceb5
KS
636#define IS_CRLF_AT(parser,offset) \
637 (CHECK_AT(parser,'\r',(offset)) && CHECK_AT(parser,'\n',(offset)+1))
638
639#define IS_CRLF(parser) IS_CRLF_AT(parser,0)
640
f2b59d4d
KS
641/*
642 * Check if the character is a line break or NUL.
643 */
644
645#define IS_BREAKZ_AT(parser,offset) \
646 (IS_BREAK_AT(parser,(offset)) || IS_Z_AT(parser,(offset)))
647
648#define IS_BREAKZ(parser) IS_BREAKZ_AT(parser,0)
649
650/*
651 * Check if the character is a line break, space, or NUL.
652 */
653
654#define IS_SPACEZ_AT(parser,offset) \
655 (IS_SPACE_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset)))
656
657#define IS_SPACEZ(parser) IS_SPACEZ_AT(parser,0)
658
659/*
660 * Check if the character is a line break, space, tab, or NUL.
661 */
662
663#define IS_BLANKZ_AT(parser,offset) \
664 (IS_BLANK_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset)))
665
666#define IS_BLANKZ(parser) IS_BLANKZ_AT(parser,0)
667
eb9cceb5
KS
668/*
669 * Determine the width of the character.
670 */
671
672#define WIDTH_AT(parser,offset) \
673 ((parser->pointer[(offset)] & 0x80) == 0x00 ? 1 : \
674 (parser->pointer[(offset)] & 0xE0) == 0xC0 ? 2 : \
675 (parser->pointer[(offset)] & 0xF0) == 0xE0 ? 3 : \
676 (parser->pointer[(offset)] & 0xF8) == 0xF0 ? 4 : 0)
677
678#define WIDTH(parser) WIDTH_AT(parser,0)
679
680/*
681 * Advance the buffer pointer.
682 */
683
e71095e3 684#define FORWARD(parser) \
eb9cceb5 685 (parser->index ++, \
e71095e3 686 parser->column ++, \
eb9cceb5
KS
687 parser->unread --, \
688 parser->pointer += WIDTH(parser))
689
e71095e3
KS
690#define FORWARD_LINE(parser) \
691 (IS_CRLF(parser) ? \
692 (parser->index += 2, \
693 parser->column = 0, \
92d41fe1 694 parser->line ++, \
e71095e3
KS
695 parser->unread -= 2, \
696 parser->pointer += 2) : \
697 IS_BREAK(parser) ? \
698 (parser->index ++, \
699 parser->column = 0, \
92d41fe1 700 parser->line ++, \
e71095e3
KS
701 parser->unread --, \
702 parser->pointer += WIDTH(parser)) : 0)
703
704/*
705 * Resize a string if needed.
706 */
707
708#define RESIZE(parser,string) \
92d41fe1
KS
709 ((string).pointer-(string).buffer+5 < (string).size ? 1 : \
710 yaml_parser_resize_string(parser, &(string)))
e71095e3
KS
711
712/*
713 * Copy a character to a string buffer and advance pointers.
714 */
715
716#define COPY(parser,string) \
717 (((*parser->pointer & 0x80) == 0x00 ? \
92d41fe1 718 (*((string).pointer++) = *(parser->pointer++)) : \
e71095e3 719 (*parser->pointer & 0xE0) == 0xC0 ? \
92d41fe1
KS
720 (*((string).pointer++) = *(parser->pointer++), \
721 *((string).pointer++) = *(parser->pointer++)) : \
e71095e3 722 (*parser->pointer & 0xF0) == 0xE0 ? \
92d41fe1
KS
723 (*((string).pointer++) = *(parser->pointer++), \
724 *((string).pointer++) = *(parser->pointer++), \
725 *((string).pointer++) = *(parser->pointer++)) : \
e71095e3 726 (*parser->pointer & 0xF8) == 0xF0 ? \
92d41fe1
KS
727 (*((string).pointer++) = *(parser->pointer++), \
728 *((string).pointer++) = *(parser->pointer++), \
729 *((string).pointer++) = *(parser->pointer++), \
730 *((string).pointer++) = *(parser->pointer++)) : 0), \
e71095e3
KS
731 parser->index ++, \
732 parser->column ++, \
733 parser->unread --)
92d41fe1
KS
734
735/*
736 * Copy a line break character to a string buffer and advance pointers.
737 */
738
739#define COPY_LINE(parser,string) \
740 ((CHECK_AT(parser,'\r',0) && CHECK_AT(parser,'\n',1)) ? /* CR LF -> LF */ \
741 (*((string).pointer++) = (yaml_char_t) '\n', \
742 parser->pointer += 2, \
743 parser->index += 2, \
744 parser->column = 0, \
745 parser->line ++, \
746 parser->unread -= 2) : \
747 (CHECK_AT(parser,'\r',0) || CHECK_AT(parser,'\n',0)) ? /* CR|LF -> LF */ \
748 (*((string).pointer++) = (yaml_char_t) '\n', \
749 parser->pointer ++, \
750 parser->index ++, \
751 parser->column = 0, \
752 parser->line ++, \
753 parser->unread --) : \
754 (CHECK_AT(parser,'\xC2',0) && CHECK_AT(parser,'\x85',1)) ? /* NEL -> LF */ \
755 (*((string).pointer++) = (yaml_char_t) '\n', \
756 parser->pointer += 2, \
757 parser->index ++, \
758 parser->column = 0, \
759 parser->line ++, \
760 parser->unread --) : \
761 (CHECK_AT(parser,'\xE2',0) && \
762 CHECK_AT(parser,'\x80',1) && \
763 (CHECK_AT(parser,'\xA8',2) || \
764 CHECK_AT(parser,'\xA9',2))) ? /* LS|PS -> LS|PS */ \
765 (*((string).pointer++) = *(parser->pointer++), \
766 *((string).pointer++) = *(parser->pointer++), \
767 *((string).pointer++) = *(parser->pointer++), \
768 parser->index ++, \
769 parser->column = 0, \
770 parser->line ++, \
771 parser->unread --) : 0)
772
773/*
774 * Append a string to another string and clear the former string.
775 */
776
777#define JOIN(parser,head_string,tail_string) \
778 (yaml_parser_join_string(parser, &(head_string), &(tail_string)) && \
779 yaml_parser_clear_string(parser, &(tail_string)))
e71095e3 780
03be97ab
KS
781/*
782 * Public API declarations.
783 */
784
785YAML_DECLARE(yaml_token_t *)
786yaml_parser_get_token(yaml_parser_t *parser);
787
788YAML_DECLARE(yaml_token_t *)
789yaml_parser_peek_token(yaml_parser_t *parser);
790
f2b59d4d
KS
791/*
792 * Error handling.
793 */
794
795static int
796yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
797 yaml_mark_t context_mark, const char *problem);
798
799static yaml_mark_t
800yaml_parser_get_mark(yaml_parser_t *parser);
801
e71095e3
KS
802/*
803 * Buffers and lists.
804 */
805
806typedef struct {
807 yaml_char_t *buffer;
808 yaml_char_t *pointer;
809 size_t size;
810} yaml_string_t;
811
812static yaml_string_t
813yaml_parser_new_string(yaml_parser_t *parser);
814
815static int
816yaml_parser_resize_string(yaml_parser_t *parser, yaml_string_t *string);
817
92d41fe1
KS
818static int
819yaml_parser_join_string(yaml_parser_t *parser,
820 yaml_string_t *string1, yaml_string_t *string2);
821
822static int
823yaml_parser_clear_string(yaml_parser_t *parser, yaml_string_t *string);
824
e71095e3
KS
825static int
826yaml_parser_resize_list(yaml_parser_t *parser, void **buffer, size_t *size,
827 size_t item_size);
828
03be97ab
KS
829/*
830 * High-level token API.
831 */
832
833static int
834yaml_parser_fetch_more_tokens(yaml_parser_t *parser);
835
836static int
837yaml_parser_fetch_next_token(yaml_parser_t *parser);
838
839/*
840 * Potential simple keys.
841 */
842
843static int
844yaml_parser_stale_simple_keys(yaml_parser_t *parser);
845
846static int
847yaml_parser_save_simple_key(yaml_parser_t *parser);
848
849static int
850yaml_parser_remove_simple_key(yaml_parser_t *parser);
851
eb9cceb5
KS
852static int
853yaml_parser_increase_flow_level(yaml_parser_t *parser);
854
855static int
856yaml_parser_decrease_flow_level(yaml_parser_t *parser);
857
858/*
859 * Token manipulation.
860 */
861
862static int
863yaml_parser_append_token(yaml_parser_t *parser, yaml_token_t *token);
864
865static int
866yaml_parser_insert_token(yaml_parser_t *parser,
867 int number, yaml_token_t *token);
868
03be97ab
KS
869/*
870 * Indentation treatment.
871 */
872
873static int
eb9cceb5
KS
874yaml_parser_roll_indent(yaml_parser_t *parser, int column,
875 int number, yaml_token_type_t type, yaml_mark_t mark);
03be97ab
KS
876
877static int
f2b59d4d 878yaml_parser_unroll_indent(yaml_parser_t *parser, int column);
03be97ab
KS
879
880/*
881 * Token fetchers.
882 */
883
884static int
885yaml_parser_fetch_stream_start(yaml_parser_t *parser);
886
887static int
888yaml_parser_fetch_stream_end(yaml_parser_t *parser);
889
890static int
891yaml_parser_fetch_directive(yaml_parser_t *parser);
892
03be97ab
KS
893static int
894yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
895 yaml_token_type_t type);
896
03be97ab
KS
897static int
898yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
899 yaml_token_type_t type);
900
03be97ab
KS
901static int
902yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
903 yaml_token_type_t type);
904
905static int
906yaml_parser_fetch_flow_entry(yaml_parser_t *parser);
907
908static int
909yaml_parser_fetch_block_entry(yaml_parser_t *parser);
910
911static int
912yaml_parser_fetch_key(yaml_parser_t *parser);
913
914static int
915yaml_parser_fetch_value(yaml_parser_t *parser);
916
917static int
eb9cceb5 918yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type);
03be97ab
KS
919
920static int
921yaml_parser_fetch_tag(yaml_parser_t *parser);
922
03be97ab
KS
923static int
924yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);
925
03be97ab
KS
926static int
927yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);
928
929static int
930yaml_parser_fetch_plain_scalar(yaml_parser_t *parser);
931
932/*
933 * Token scanners.
934 */
935
936static int
937yaml_parser_scan_to_next_token(yaml_parser_t *parser);
938
939static yaml_token_t *
940yaml_parser_scan_directive(yaml_parser_t *parser);
941
942static int
943yaml_parser_scan_directive_name(yaml_parser_t *parser,
944 yaml_mark_t start_mark, yaml_char_t **name);
945
946static int
e71095e3 947yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
03be97ab
KS
948 yaml_mark_t start_mark, int *major, int *minor);
949
950static int
e71095e3 951yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
03be97ab
KS
952 yaml_mark_t start_mark, int *number);
953
954static int
955yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
e71095e3 956 yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix);
03be97ab
KS
957
958static yaml_token_t *
959yaml_parser_scan_anchor(yaml_parser_t *parser,
960 yaml_token_type_t type);
961
962static yaml_token_t *
963yaml_parser_scan_tag(yaml_parser_t *parser);
964
965static int
966yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
967 yaml_mark_t start_mark, yaml_char_t **handle);
968
969static int
970yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive,
e71095e3
KS
971 yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri);
972
973static int
974yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
975 yaml_mark_t start_mark, yaml_string_t *string);
03be97ab
KS
976
977static yaml_token_t *
978yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal);
979
92d41fe1
KS
980static int
981yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
982 int *indent, yaml_string_t *breaks,
983 yaml_mark_t start_mark, yaml_mark_t *end_mark);
984
03be97ab
KS
985static yaml_token_t *
986yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single);
987
988static yaml_token_t *
989yaml_parser_scan_plain_scalar(yaml_parser_t *parser);
990
f2b59d4d
KS
991/*
992 * Get the next token and remove it from the tokens queue.
993 */
994
995YAML_DECLARE(yaml_token_t *)
996yaml_parser_get_token(yaml_parser_t *parser)
997{
998 yaml_token_t *token;
999
1000 assert(parser); /* Non-NULL parser object is expected. */
1001 assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */
1002
1003 /* Ensure that the tokens queue contains enough tokens. */
1004
1005 if (!yaml_parser_fetch_more_tokens(parser)) return NULL;
1006
1007 /* Fetch the next token from the queue. */
1008
1009 token = parser->tokens[parser->tokens_head];
1010
1011 /* Move the queue head. */
1012
1013 parser->tokens[parser->tokens_head++] = NULL;
f2b59d4d
KS
1014
1015 parser->tokens_parsed++;
1016
7e32c194
KS
1017 if (token->type == YAML_STREAM_END_TOKEN) {
1018 parser->stream_end_produced = 1;
1019 }
1020
f2b59d4d
KS
1021 return token;
1022}
1023
1024/*
1025 * Get the next token, but don't remove it from the queue.
1026 */
1027
1028YAML_DECLARE(yaml_token_t *)
1029yaml_parser_peek_token(yaml_parser_t *parser)
1030{
1031 assert(parser); /* Non-NULL parser object is expected. */
1032 assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */
1033
1034 /* Ensure that the tokens queue contains enough tokens. */
1035
1036 if (!yaml_parser_fetch_more_tokens(parser)) return NULL;
1037
1038 /* Fetch the next token from the queue. */
1039
1040 return parser->tokens[parser->tokens_head];
1041}
1042
e71095e3
KS
1043/*
1044 * Create a new string.
1045 */
1046
1047static yaml_string_t
1048yaml_parser_new_string(yaml_parser_t *parser)
1049{
1050 yaml_string_t string = { NULL, NULL, 0 };
1051
1052 string.buffer = yaml_malloc(YAML_DEFAULT_SIZE);
1053 if (!string.buffer) {
1054 parser->error = YAML_MEMORY_ERROR;
1055 return string;
1056 }
1057
1058 memset(string.buffer, 0, YAML_DEFAULT_SIZE);
1059 string.pointer = string.buffer;
1060 string.size = YAML_DEFAULT_SIZE;
1061
1062 return string;
1063}
1064
1065/*
1066 * Double the size of a string.
1067 */
1068
1069static int
1070yaml_parser_resize_string(yaml_parser_t *parser, yaml_string_t *string)
1071{
1072 yaml_char_t *new_buffer = yaml_realloc(string->buffer, string->size*2);
1073
1074 if (!new_buffer) {
1075 yaml_free(string->buffer);
1076 string->buffer = NULL;
1077 string->pointer = NULL;
1078 string->size = 0;
1079 parser->error = YAML_MEMORY_ERROR;
1080 return 0;
1081 }
1082
1083 memset(new_buffer+string->size, 0, string->size);
1084
7e32c194 1085 string->pointer = new_buffer + (string->pointer-string->buffer);
e71095e3
KS
1086 string->buffer = new_buffer;
1087 string->size *= 2;
1088
1089 return 1;
1090}
1091
92d41fe1
KS
1092/*
1093 * Append a string to another string.
1094 */
1095
1096static int
1097yaml_parser_join_string(yaml_parser_t *parser,
1098 yaml_string_t *string1, yaml_string_t *string2)
1099{
1100 if (string2->buffer == string2->pointer) return 1;
1101
1102 while (string1->pointer - string1->buffer + string2->pointer - string2->buffer + 1
1103 > string1->size) {
1104 if (!yaml_parser_resize_string(parser, string1)) return 0;
1105 }
1106
1107 memcpy(string1->pointer, string2->buffer, string2->pointer-string2->buffer);
7e32c194 1108 string1->pointer += string2->pointer-string2->buffer;
92d41fe1
KS
1109
1110 return 1;
1111}
1112
1113/*
1114 * Fill the string with NULs and move the pointer to the beginning.
1115 */
1116
1117static int
1118yaml_parser_clear_string(yaml_parser_t *parser, yaml_string_t *string)
1119{
1120 if (string->buffer == string->pointer) return 1;
1121
1122 memset(string->buffer, 0, string->pointer-string->buffer);
1123
1124 string->pointer = string->buffer;
1125
1126 return 1;
1127}
1128
e71095e3
KS
1129/*
1130 * Double a list.
1131 */
1132
1133static int
1134yaml_parser_resize_list(yaml_parser_t *parser, void **buffer, size_t *size,
1135 size_t item_size)
1136{
1137 void *new_buffer = yaml_realloc(*buffer, item_size*(*size)*2);
1138
1139 if (!new_buffer) {
1140 parser->error = YAML_MEMORY_ERROR;
1141 return 0;
1142 }
1143
7e32c194 1144 memset(new_buffer+item_size*(*size), 0, item_size*(*size));
e71095e3
KS
1145
1146 *buffer = new_buffer;
1147 *size *= 2;
1148
1149 return 1;
1150}
1151
f2b59d4d
KS
1152/*
1153 * Set the scanner error and return 0.
1154 */
1155
1156static int
1157yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
1158 yaml_mark_t context_mark, const char *problem)
1159{
1160 parser->error = YAML_SCANNER_ERROR;
1161 parser->context = context;
1162 parser->context_mark = context_mark;
1163 parser->problem = problem;
1164 parser->problem_mark = yaml_parser_get_mark(parser);
7e32c194
KS
1165
1166 return 0;
f2b59d4d
KS
1167}
1168
1169/*
1170 * Get the mark for the current buffer position.
1171 */
1172
1173static yaml_mark_t
1174yaml_parser_get_mark(yaml_parser_t *parser)
1175{
1176 yaml_mark_t mark = { parser->index, parser->line, parser->column };
1177
1178 return mark;
1179}
1180
1181
1182/*
1183 * Ensure that the tokens queue contains at least one token which can be
1184 * returned to the Parser.
1185 */
1186
1187static int
1188yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
1189{
1190 int need_more_tokens;
1191 int k;
1192
1193 /* While we need more tokens to fetch, do it. */
1194
1195 while (1)
1196 {
1197 /*
1198 * Check if we really need to fetch more tokens.
1199 */
1200
1201 need_more_tokens = 0;
1202
1203 if (parser->tokens_head == parser->tokens_tail)
1204 {
1205 /* Queue is empty. */
1206
1207 need_more_tokens = 1;
1208 }
1209 else
1210 {
1211 /* Check if any potential simple key may occupy the head position. */
1212
7e32c194
KS
1213 if (!yaml_parser_stale_simple_keys(parser))
1214 return 0;
1215
f2b59d4d
KS
1216 for (k = 0; k <= parser->flow_level; k++) {
1217 yaml_simple_key_t *simple_key = parser->simple_keys[k];
1218 if (simple_key
1219 && (simple_key->token_number == parser->tokens_parsed)) {
1220 need_more_tokens = 1;
1221 break;
1222 }
1223 }
1224 }
1225
1226 /* We are finished. */
1227
1228 if (!need_more_tokens)
1229 break;
1230
1231 /* Fetch the next token. */
1232
1233 if (!yaml_parser_fetch_next_token(parser))
1234 return 0;
1235 }
1236
1237 return 1;
1238}
1239
1240/*
1241 * The dispatcher for token fetchers.
1242 */
1243
1244static int
1245yaml_parser_fetch_next_token(yaml_parser_t *parser)
1246{
1247 /* Ensure that the buffer is initialized. */
1248
1249 if (!UPDATE(parser, 1))
1250 return 0;
1251
1252 /* Check if we just started scanning. Fetch STREAM-START then. */
1253
1254 if (!parser->stream_start_produced)
1255 return yaml_parser_fetch_stream_start(parser);
1256
1257 /* Eat whitespaces and comments until we reach the next token. */
1258
1259 if (!yaml_parser_scan_to_next_token(parser))
1260 return 0;
1261
7e32c194
KS
1262 /* Remove obsolete potential simple keys. */
1263
1264 if (!yaml_parser_stale_simple_keys(parser))
1265 return 0;
1266
f2b59d4d
KS
1267 /* Check the indentation level against the current column. */
1268
1269 if (!yaml_parser_unroll_indent(parser, parser->column))
1270 return 0;
1271
1272 /*
1273 * Ensure that the buffer contains at least 4 characters. 4 is the length
1274 * of the longest indicators ('--- ' and '... ').
1275 */
1276
1277 if (!UPDATE(parser, 4))
1278 return 0;
1279
1280 /* Is it the end of the stream? */
1281
1282 if (IS_Z(parser))
1283 return yaml_parser_fetch_stream_end(parser);
1284
1285 /* Is it a directive? */
1286
1287 if (parser->column == 0 && CHECK(parser, '%'))
1288 return yaml_parser_fetch_directive(parser);
1289
1290 /* Is it the document start indicator? */
1291
1292 if (parser->column == 0
1293 && CHECK_AT(parser, '-', 0)
1294 && CHECK_AT(parser, '-', 1)
1295 && CHECK_AT(parser, '-', 2)
1296 && IS_BLANKZ_AT(parser, 3))
eb9cceb5
KS
1297 return yaml_parser_fetch_document_indicator(parser,
1298 YAML_DOCUMENT_START_TOKEN);
f2b59d4d
KS
1299
1300 /* Is it the document end indicator? */
1301
1302 if (parser->column == 0
1303 && CHECK_AT(parser, '.', 0)
1304 && CHECK_AT(parser, '.', 1)
1305 && CHECK_AT(parser, '.', 2)
1306 && IS_BLANKZ_AT(parser, 3))
eb9cceb5
KS
1307 return yaml_parser_fetch_document_indicator(parser,
1308 YAML_DOCUMENT_END_TOKEN);
f2b59d4d
KS
1309
1310 /* Is it the flow sequence start indicator? */
1311
1312 if (CHECK(parser, '['))
eb9cceb5
KS
1313 return yaml_parser_fetch_flow_collection_start(parser,
1314 YAML_FLOW_SEQUENCE_START_TOKEN);
f2b59d4d
KS
1315
1316 /* Is it the flow mapping start indicator? */
1317
1318 if (CHECK(parser, '{'))
eb9cceb5
KS
1319 return yaml_parser_fetch_flow_collection_start(parser,
1320 YAML_FLOW_MAPPING_START_TOKEN);
f2b59d4d
KS
1321
1322 /* Is it the flow sequence end indicator? */
1323
1324 if (CHECK(parser, ']'))
eb9cceb5
KS
1325 return yaml_parser_fetch_flow_collection_end(parser,
1326 YAML_FLOW_SEQUENCE_END_TOKEN);
f2b59d4d
KS
1327
1328 /* Is it the flow mapping end indicator? */
1329
1330 if (CHECK(parser, '}'))
eb9cceb5
KS
1331 return yaml_parser_fetch_flow_collection_end(parser,
1332 YAML_FLOW_MAPPING_END_TOKEN);
f2b59d4d
KS
1333
1334 /* Is it the flow entry indicator? */
1335
1336 if (CHECK(parser, ','))
1337 return yaml_parser_fetch_flow_entry(parser);
1338
1339 /* Is it the block entry indicator? */
1340
1341 if (CHECK(parser, '-') && IS_BLANKZ_AT(parser, 1))
1342 return yaml_parser_fetch_block_entry(parser);
1343
1344 /* Is it the key indicator? */
1345
7e32c194 1346 if (CHECK(parser, '?') && (parser->flow_level || IS_BLANKZ_AT(parser, 1)))
f2b59d4d
KS
1347 return yaml_parser_fetch_key(parser);
1348
1349 /* Is it the value indicator? */
1350
7e32c194 1351 if (CHECK(parser, ':') && (parser->flow_level || IS_BLANKZ_AT(parser, 1)))
f2b59d4d
KS
1352 return yaml_parser_fetch_value(parser);
1353
1354 /* Is it an alias? */
1355
1356 if (CHECK(parser, '*'))
eb9cceb5 1357 return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN);
f2b59d4d
KS
1358
1359 /* Is it an anchor? */
1360
1361 if (CHECK(parser, '&'))
eb9cceb5 1362 return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN);
f2b59d4d
KS
1363
1364 /* Is it a tag? */
1365
1366 if (CHECK(parser, '!'))
1367 return yaml_parser_fetch_tag(parser);
1368
1369 /* Is it a literal scalar? */
1370
1371 if (CHECK(parser, '|') && !parser->flow_level)
1372 return yaml_parser_fetch_block_scalar(parser, 1);
1373
1374 /* Is it a folded scalar? */
1375
1376 if (CHECK(parser, '>') && !parser->flow_level)
1377 return yaml_parser_fetch_block_scalar(parser, 0);
1378
1379 /* Is it a single-quoted scalar? */
1380
1381 if (CHECK(parser, '\''))
1382 return yaml_parser_fetch_flow_scalar(parser, 1);
1383
1384 /* Is it a double-quoted scalar? */
1385
1386 if (CHECK(parser, '"'))
1387 return yaml_parser_fetch_flow_scalar(parser, 0);
1388
1389 /*
1390 * Is it a plain scalar?
1391 *
1392 * A plain scalar may start with any non-blank characters except
1393 *
1394 * '-', '?', ':', ',', '[', ']', '{', '}',
1395 * '#', '&', '*', '!', '|', '>', '\'', '\"',
1396 * '%', '@', '`'.
1397 *
7e32c194
KS
1398 * In the block context (and, for the '-' indicator, in the flow context
1399 * too), it may also start with the characters
f2b59d4d
KS
1400 *
1401 * '-', '?', ':'
1402 *
1403 * if it is followed by a non-space character.
1404 *
1405 * The last rule is more restrictive than the specification requires.
1406 */
1407
1408 if (!(IS_BLANKZ(parser) || CHECK(parser, '-') || CHECK(parser, '?')
1409 || CHECK(parser, ':') || CHECK(parser, ',') || CHECK(parser, '[')
1410 || CHECK(parser, ']') || CHECK(parser, '{') || CHECK(parser, '}')
1411 || CHECK(parser, '#') || CHECK(parser, '&') || CHECK(parser, '*')
1412 || CHECK(parser, '!') || CHECK(parser, '|') || CHECK(parser, '>')
1413 || CHECK(parser, '\'') || CHECK(parser, '"') || CHECK(parser, '%')
1414 || CHECK(parser, '@') || CHECK(parser, '`')) ||
7e32c194 1415 (CHECK(parser, '-') && !IS_BLANK_AT(parser, 1)) ||
f2b59d4d 1416 (!parser->flow_level &&
7e32c194 1417 (CHECK(parser, '?') || CHECK(parser, ':')) && !IS_BLANKZ_AT(parser, 1)))
f2b59d4d
KS
1418 return yaml_parser_fetch_plain_scalar(parser);
1419
1420 /*
1421 * If we don't determine the token type so far, it is an error.
1422 */
1423
1424 return yaml_parser_set_scanner_error(parser, "while scanning for the next token",
1425 yaml_parser_get_mark(parser), "found character that cannot start any token");
1426}
1427
eb9cceb5
KS
1428/*
1429 * Check the list of potential simple keys and remove the positions that
1430 * cannot contain simple keys anymore.
1431 */
1432
1433static int
1434yaml_parser_stale_simple_keys(yaml_parser_t *parser)
1435{
1436 int level;
1437
1438 /* Check for a potential simple key for each flow level. */
1439
1440 for (level = 0; level <= parser->flow_level; level++)
1441 {
1442 yaml_simple_key_t *simple_key = parser->simple_keys[level];
1443
1444 /*
1445 * The specification requires that a simple key
1446 *
1447 * - is limited to a single line,
1448 * - is shorter than 1024 characters.
1449 */
1450
1451 if (simple_key && (simple_key->line < parser->line ||
7e32c194 1452 simple_key->index+1024 < parser->index)) {
eb9cceb5
KS
1453
1454 /* Check if the potential simple key to be removed is required. */
1455
1456 if (simple_key->required) {
1457 return yaml_parser_set_scanner_error(parser,
1458 "while scanning a simple key", simple_key->mark,
1459 "could not found expected ':'");
1460 }
1461
1462 yaml_free(simple_key);
1463 parser->simple_keys[level] = NULL;
1464 }
1465 }
1466
1467 return 1;
1468}
1469
1470/*
1471 * Check if a simple key may start at the current position and add it if
1472 * needed.
1473 */
1474
1475static int
1476yaml_parser_save_simple_key(yaml_parser_t *parser)
1477{
1478 /*
1479 * A simple key is required at the current position if the scanner is in
1480 * the block context and the current column coincides with the indentation
1481 * level.
1482 */
1483
1484 int required = (!parser->flow_level && parser->indent == parser->column);
1485
1486 /*
1487 * A simple key is required only when it is the first token in the current
1488 * line. Therefore it is always allowed. But we add a check anyway.
1489 */
1490
1491 assert(parser->simple_key_allowed || !required); /* Impossible. */
1492
1493 /*
1494 * If the current position may start a simple key, save it.
1495 */
1496
1497 if (parser->simple_key_allowed)
1498 {
1499 yaml_simple_key_t simple_key = { required,
1500 parser->tokens_parsed + parser->tokens_tail - parser->tokens_head,
1501 parser->index, parser->line, parser->column,
1502 yaml_parser_get_mark(parser) };
1503
1504 if (!yaml_parser_remove_simple_key(parser)) return 0;
1505
1506 parser->simple_keys[parser->flow_level] =
1507 yaml_malloc(sizeof(yaml_simple_key_t));
1508 if (!parser->simple_keys[parser->flow_level]) {
1509 parser->error = YAML_MEMORY_ERROR;
1510 return 0;
1511 }
1512
1513 *(parser->simple_keys[parser->flow_level]) = simple_key;
1514 }
1515
1516 return 1;
1517}
1518
1519/*
1520 * Remove a potential simple key at the current flow level.
1521 */
1522
1523static int
1524yaml_parser_remove_simple_key(yaml_parser_t *parser)
1525{
1526 yaml_simple_key_t *simple_key = parser->simple_keys[parser->flow_level];
1527
1528 if (simple_key)
1529 {
1530 /* If the key is required, it is an error. */
1531
1532 if (simple_key->required) {
1533 return yaml_parser_set_scanner_error(parser,
1534 "while scanning a simple key", simple_key->mark,
1535 "could not found expected ':'");
1536 }
1537
1538 /* Remove the key from the list. */
1539
1540 yaml_free(simple_key);
1541 parser->simple_keys[parser->flow_level] = NULL;
1542 }
1543
1544 return 1;
1545}
1546
1547/*
1548 * Increase the flow level and resize the simple key list if needed.
1549 */
1550
1551static int
1552yaml_parser_increase_flow_level(yaml_parser_t *parser)
1553{
1554 /* Check if we need to resize the list. */
1555
e71095e3
KS
1556 if (parser->flow_level == parser->simple_keys_size-1) {
1557 if (!yaml_parser_resize_list(parser, (void **)&parser->simple_keys,
1558 &parser->simple_keys_size, sizeof(yaml_simple_key_t *)))
eb9cceb5 1559 return 0;
eb9cceb5
KS
1560 }
1561
1562 /* Increase the flow level and reset the simple key. */
1563
1564 parser->simple_keys[++parser->flow_level] = NULL;
1565
1566 return 1;
1567}
1568
1569/*
1570 * Decrease the flow level.
1571 */
1572
1573static int
1574yaml_parser_decrease_flow_level(yaml_parser_t *parser)
1575{
1576 assert(parser->flow_level); /* Greater than 0. */
1577 assert(!parser->simple_keys[parser->flow_level]); /* Must be removed. */
1578
1579 parser->flow_level --;
1580
1581 return 1;
1582}
1583
1584/*
1585 * Add a token to the tail of the tokens queue.
1586 */
1587
1588static int
1589yaml_parser_append_token(yaml_parser_t *parser, yaml_token_t *token)
1590{
1591 return yaml_parser_insert_token(parser, -1, token);
1592}
1593
1594/*
1595 * Insert the token into the tokens queue. The number parameter is the
1596 * ordinal number of the token. If the number is equal to -1, add the token
1597 * to the tail of the queue.
1598 */
1599
1600static int
1601yaml_parser_insert_token(yaml_parser_t *parser,
1602 int number, yaml_token_t *token)
1603{
1604 /* The index of the token in the queue. */
1605
1606 int index = (number == -1)
1607 ? parser->tokens_tail - parser->tokens_head
1608 : number - parser->tokens_parsed;
1609
1610 assert(index >= 0 && index <= (parser->tokens_tail-parser->tokens_head));
1611
1612 /* Check if we need to resize the queue. */
1613
e71095e3
KS
1614 if (parser->tokens_head == 0 && parser->tokens_tail == parser->tokens_size) {
1615 if (!yaml_parser_resize_list(parser, (void **)&parser->tokens,
1616 &parser->tokens_size, sizeof(yaml_token_t *)))
eb9cceb5 1617 return 0;
eb9cceb5
KS
1618 }
1619
1620 /* Check if we need to move the queue to the beginning of the buffer. */
1621
1622 if (parser->tokens_tail == parser->tokens_size)
1623 {
1624 if (parser->tokens_head < parser->tokens_tail) {
1625 memmove(parser->tokens, parser->tokens+parser->tokens_head,
1626 sizeof(yaml_token_t *)*(parser->tokens_tail-parser->tokens_head));
1627 }
1628 parser->tokens_tail -= parser->tokens_head;
1629 parser->tokens_head = 0;
1630 }
1631
1632 /* Check if we need to free space within the queue. */
1633
1634 if (index < (parser->tokens_tail-parser->tokens_head)) {
1635 memmove(parser->tokens+parser->tokens_head+index+1,
1636 parser->tokens+parser->tokens_head+index,
1637 sizeof(yaml_token_t *)*(parser->tokens_tail-parser->tokens_head-index));
1638 }
1639
1640 /* Insert the token. */
1641
1642 parser->tokens[parser->tokens_head+index] = token;
1643 parser->tokens_tail ++;
1644
1645 return 1;
1646}
1647
1648/*
1649 * Push the current indentation level to the stack and set the new level
1650 * the current column is greater than the indentation level. In this case,
1651 * append or insert the specified token into the token queue.
1652 *
1653 */
1654
1655static int
1656yaml_parser_roll_indent(yaml_parser_t *parser, int column,
1657 int number, yaml_token_type_t type, yaml_mark_t mark)
1658{
1659 yaml_token_t *token;
1660
1661 /* In the flow context, do nothing. */
1662
1663 if (parser->flow_level)
1664 return 1;
1665
1666 if (parser->indent < column)
1667 {
1668 /* Check if we need to expand the indents stack. */
1669
e71095e3
KS
1670 if (parser->indents_length == parser->indents_size) {
1671 if (!yaml_parser_resize_list(parser, (void **)&parser->indents,
1672 &parser->indents_size, sizeof(int)))
eb9cceb5 1673 return 0;
eb9cceb5
KS
1674 }
1675
1676 /*
1677 * Push the current indentation level to the stack and set the new
1678 * indentation level.
1679 */
1680
1681 parser->indents[parser->indents_length++] = parser->indent;
1682 parser->indent = column;
1683
1684 /* Create a token. */
1685
1686 token = yaml_token_new(type, mark, mark);
1687 if (!token) {
1688 parser->error = YAML_MEMORY_ERROR;
1689 return 0;
1690 }
1691
1692 /* Insert the token into the queue. */
1693
1694 if (!yaml_parser_insert_token(parser, number, token)) {
1695 yaml_token_delete(token);
1696 return 0;
1697 }
1698 }
1699
1700 return 1;
1701}
1702
1703/*
1704 * Pop indentation levels from the indents stack until the current level
1705 * becomes less or equal to the column. For each intendation level, append
1706 * the BLOCK-END token.
1707 */
1708
1709
1710static int
1711yaml_parser_unroll_indent(yaml_parser_t *parser, int column)
1712{
1713 yaml_token_t *token;
1714
1715 /* In the flow context, do nothing. */
1716
1717 if (parser->flow_level)
1718 return 1;
1719
1720 /* Loop through the intendation levels in the stack. */
1721
1722 while (parser->indent > column)
1723 {
1724 yaml_mark_t mark = yaml_parser_get_mark(parser);
1725
1726 /* Create a token. */
1727
1728 token = yaml_token_new(YAML_BLOCK_END_TOKEN, mark, mark);
1729 if (!token) {
1730 parser->error = YAML_MEMORY_ERROR;
1731 return 0;
1732 }
1733
1734 /* Append the token to the queue. */
1735
1736 if (!yaml_parser_append_token(parser, token)) {
1737 yaml_token_delete(token);
1738 return 0;
1739 }
1740
1741 /* Pop the indentation level. */
1742
1743 assert(parser->indents_length); /* Non-empty stack expected. */
1744
1745 parser->indent = parser->indents[--parser->indents_length];
1746 }
1747
1748 return 1;
1749}
1750
1751/*
1752 * Initialize the scanner and produce the STREAM-START token.
1753 */
1754
1755static int
1756yaml_parser_fetch_stream_start(yaml_parser_t *parser)
1757{
1758 yaml_mark_t mark = yaml_parser_get_mark(parser);
1759 yaml_token_t *token;
1760
1761 /* Set the initial indentation. */
1762
1763 parser->indent = -1;
1764
1765 /* A simple key is allowed at the beginning of the stream. */
1766
1767 parser->simple_key_allowed = 1;
1768
1769 /* We have started. */
1770
1771 parser->stream_start_produced = 1;
1772
1773 /* Create the STREAM-START token. */
1774
1775 token = yaml_stream_start_token_new(parser->encoding, mark, mark);
1776 if (!token) {
1777 parser->error = YAML_MEMORY_ERROR;
1778 return 0;
1779 }
1780
1781 /* Append the token to the queue. */
1782
1783 if (!yaml_parser_append_token(parser, token)) {
1784 yaml_token_delete(token);
1785 return 0;
1786 }
1787
1788 return 1;
1789}
1790
1791/*
1792 * Produce the STREAM-END token and shut down the scanner.
1793 */
1794
1795static int
1796yaml_parser_fetch_stream_end(yaml_parser_t *parser)
1797{
1798 yaml_mark_t mark = yaml_parser_get_mark(parser);
1799 yaml_token_t *token;
1800
1801 /* Reset the indentation level. */
1802
1803 if (!yaml_parser_unroll_indent(parser, -1))
1804 return 0;
1805
7e32c194 1806 /* Reset simple keys. */
eb9cceb5 1807
7e32c194
KS
1808 if (!yaml_parser_remove_simple_key(parser))
1809 return 0;
1810
1811 parser->simple_key_allowed = 0;
eb9cceb5
KS
1812
1813 /* Create the STREAM-END token. */
1814
1815 token = yaml_stream_end_token_new(mark, mark);
1816 if (!token) {
1817 parser->error = YAML_MEMORY_ERROR;
1818 return 0;
1819 }
1820
1821 /* Append the token to the queue. */
1822
1823 if (!yaml_parser_append_token(parser, token)) {
1824 yaml_token_delete(token);
1825 return 0;
1826 }
1827
1828 return 1;
1829}
1830
1831/*
1832 * Produce the YAML-DIRECTIVE or TAG-DIRECTIVE token.
1833 */
1834
1835static int
1836yaml_parser_fetch_directive(yaml_parser_t *parser)
1837{
1838 yaml_token_t *token;
1839
1840 /* Reset the indentation level. */
1841
1842 if (!yaml_parser_unroll_indent(parser, -1))
1843 return 0;
1844
1845 /* Reset simple keys. */
1846
1847 if (!yaml_parser_remove_simple_key(parser))
1848 return 0;
1849
1850 parser->simple_key_allowed = 0;
1851
1852 /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */
1853
1854 token = yaml_parser_scan_directive(parser);
1855 if (!token) return 0;
1856
1857 /* Append the token to the queue. */
1858
1859 if (!yaml_parser_append_token(parser, token)) {
1860 yaml_token_delete(token);
1861 return 0;
1862 }
1863
1864 return 1;
1865}
1866
1867/*
1868 * Produce the DOCUMENT-START or DOCUMENT-END token.
1869 */
1870
1871static int
1872yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
1873 yaml_token_type_t type)
1874{
1875 yaml_mark_t start_mark, end_mark;
1876 yaml_token_t *token;
1877
1878 /* Reset the indentation level. */
1879
1880 if (!yaml_parser_unroll_indent(parser, -1))
1881 return 0;
1882
1883 /* Reset simple keys. */
1884
1885 if (!yaml_parser_remove_simple_key(parser))
1886 return 0;
1887
1888 parser->simple_key_allowed = 0;
1889
1890 /* Consume the token. */
1891
1892 start_mark = yaml_parser_get_mark(parser);
1893
1894 FORWARD(parser);
1895 FORWARD(parser);
1896 FORWARD(parser);
1897
1898 end_mark = yaml_parser_get_mark(parser);
1899
1900 /* Create the DOCUMENT-START or DOCUMENT-END token. */
1901
1902 token = yaml_token_new(type, start_mark, end_mark);
1903 if (!token) {
1904 parser->error = YAML_MEMORY_ERROR;
1905 return 0;
1906 }
1907
1908 /* Append the token to the queue. */
1909
1910 if (!yaml_parser_append_token(parser, token)) {
1911 yaml_token_delete(token);
1912 return 0;
1913 }
1914
1915 return 1;
1916}
1917
1918/*
1919 * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
1920 */
1921
1922static int
1923yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
1924 yaml_token_type_t type)
1925{
1926 yaml_mark_t start_mark, end_mark;
1927 yaml_token_t *token;
1928
1929 /* The indicators '[' and '{' may start a simple key. */
1930
1931 if (!yaml_parser_save_simple_key(parser))
1932 return 0;
1933
1934 /* Increase the flow level. */
1935
1936 if (!yaml_parser_increase_flow_level(parser))
1937 return 0;
1938
1939 /* A simple key may follow the indicators '[' and '{'. */
1940
1941 parser->simple_key_allowed = 1;
1942
1943 /* Consume the token. */
1944
1945 start_mark = yaml_parser_get_mark(parser);
1946 FORWARD(parser);
1947 end_mark = yaml_parser_get_mark(parser);
1948
1949 /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */
1950
1951 token = yaml_token_new(type, start_mark, end_mark);
1952 if (!token) {
1953 parser->error = YAML_MEMORY_ERROR;
1954 return 0;
1955 }
1956
1957 /* Append the token to the queue. */
1958
1959 if (!yaml_parser_append_token(parser, token)) {
1960 yaml_token_delete(token);
1961 return 0;
1962 }
1963
1964 return 1;
1965}
1966
1967/*
1968 * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
1969 */
1970
1971static int
1972yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
1973 yaml_token_type_t type)
1974{
1975 yaml_mark_t start_mark, end_mark;
1976 yaml_token_t *token;
1977
1978 /* Reset any potential simple key on the current flow level. */
1979
1980 if (!yaml_parser_remove_simple_key(parser))
1981 return 0;
1982
1983 /* Decrease the flow level. */
1984
1985 if (!yaml_parser_decrease_flow_level(parser))
1986 return 0;
1987
1988 /* No simple keys after the indicators ']' and '}'. */
1989
1990 parser->simple_key_allowed = 0;
1991
1992 /* Consume the token. */
1993
1994 start_mark = yaml_parser_get_mark(parser);
1995 FORWARD(parser);
1996 end_mark = yaml_parser_get_mark(parser);
1997
1998 /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */
1999
2000 token = yaml_token_new(type, start_mark, end_mark);
2001 if (!token) {
2002 parser->error = YAML_MEMORY_ERROR;
2003 return 0;
2004 }
2005
2006 /* Append the token to the queue. */
2007
2008 if (!yaml_parser_append_token(parser, token)) {
2009 yaml_token_delete(token);
2010 return 0;
2011 }
2012
2013 return 1;
2014}
2015
2016/*
2017 * Produce the FLOW-ENTRY token.
2018 */
2019
2020static int
2021yaml_parser_fetch_flow_entry(yaml_parser_t *parser)
2022{
2023 yaml_mark_t start_mark, end_mark;
2024 yaml_token_t *token;
2025
2026 /* Reset any potential simple keys on the current flow level. */
2027
2028 if (!yaml_parser_remove_simple_key(parser))
2029 return 0;
2030
2031 /* Simple keys are allowed after ','. */
2032
2033 parser->simple_key_allowed = 1;
2034
2035 /* Consume the token. */
2036
2037 start_mark = yaml_parser_get_mark(parser);
2038 FORWARD(parser);
2039 end_mark = yaml_parser_get_mark(parser);
2040
2041 /* Create the FLOW-ENTRY token. */
2042
2043 token = yaml_token_new(YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark);
2044 if (!token) {
2045 parser->error = YAML_MEMORY_ERROR;
2046 return 0;
2047 }
2048
2049 /* Append the token to the queue. */
2050
2051 if (!yaml_parser_append_token(parser, token)) {
2052 yaml_token_delete(token);
2053 return 0;
2054 }
2055
2056 return 1;
2057}
2058
2059/*
2060 * Produce the BLOCK-ENTRY token.
2061 */
2062
2063static int
2064yaml_parser_fetch_block_entry(yaml_parser_t *parser)
2065{
2066 yaml_mark_t start_mark, end_mark;
2067 yaml_token_t *token;
2068
2069 /* Check if the scanner is in the block context. */
2070
2071 if (!parser->flow_level)
2072 {
2073 /* Check if we are allowed to start a new entry. */
2074
2075 if (!parser->simple_key_allowed) {
2076 return yaml_parser_set_scanner_error(parser, NULL,
2077 yaml_parser_get_mark(parser),
2078 "block sequence entries are not allowed in this context");
2079 }
2080
2081 /* Add the BLOCK-SEQUENCE-START token if needed. */
2082
2083 if (!yaml_parser_roll_indent(parser, parser->column, -1,
2084 YAML_BLOCK_SEQUENCE_START_TOKEN, yaml_parser_get_mark(parser)))
2085 return 0;
2086 }
2087 else
2088 {
2089 /*
2090 * It is an error for the '-' indicator to occur in the flow context,
2091 * but we let the Parser detect and report about it because the Parser
2092 * is able to point to the context.
2093 */
2094 }
2095
2096 /* Reset any potential simple keys on the current flow level. */
2097
2098 if (!yaml_parser_remove_simple_key(parser))
2099 return 0;
2100
2101 /* Simple keys are allowed after '-'. */
2102
2103 parser->simple_key_allowed = 1;
2104
2105 /* Consume the token. */
2106
2107 start_mark = yaml_parser_get_mark(parser);
2108 FORWARD(parser);
2109 end_mark = yaml_parser_get_mark(parser);
2110
2111 /* Create the BLOCK-ENTRY token. */
2112
2113 token = yaml_token_new(YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark);
2114 if (!token) {
2115 parser->error = YAML_MEMORY_ERROR;
2116 return 0;
2117 }
2118
2119 /* Append the token to the queue. */
2120
2121 if (!yaml_parser_append_token(parser, token)) {
2122 yaml_token_delete(token);
2123 return 0;
2124 }
2125
2126 return 1;
2127}
2128
2129/*
2130 * Produce the KEY token.
2131 */
2132
2133static int
2134yaml_parser_fetch_key(yaml_parser_t *parser)
2135{
2136 yaml_mark_t start_mark, end_mark;
2137 yaml_token_t *token;
2138
2139 /* In the block context, additional checks are required. */
2140
2141 if (!parser->flow_level)
2142 {
2143 /* Check if we are allowed to start a new key (not nessesary simple). */
2144
2145 if (!parser->simple_key_allowed) {
2146 return yaml_parser_set_scanner_error(parser, NULL,
2147 yaml_parser_get_mark(parser),
2148 "mapping keys are not allowed in this context");
2149 }
2150
2151 /* Add the BLOCK-MAPPING-START token if needed. */
2152
2153 if (!yaml_parser_roll_indent(parser, parser->column, -1,
2154 YAML_BLOCK_MAPPING_START_TOKEN, yaml_parser_get_mark(parser)))
2155 return 0;
2156 }
2157
2158 /* Reset any potential simple keys on the current flow level. */
2159
2160 if (!yaml_parser_remove_simple_key(parser))
2161 return 0;
2162
2163 /* Simple keys are allowed after '?' in the block context. */
2164
2165 parser->simple_key_allowed = (!parser->flow_level);
2166
2167 /* Consume the token. */
2168
2169 start_mark = yaml_parser_get_mark(parser);
2170 FORWARD(parser);
2171 end_mark = yaml_parser_get_mark(parser);
2172
2173 /* Create the KEY token. */
2174
2175 token = yaml_token_new(YAML_KEY_TOKEN, start_mark, end_mark);
2176 if (!token) {
2177 parser->error = YAML_MEMORY_ERROR;
2178 return 0;
2179 }
2180
2181 /* Append the token to the queue. */
2182
2183 if (!yaml_parser_append_token(parser, token)) {
2184 yaml_token_delete(token);
2185 return 0;
2186 }
2187
2188 return 1;
2189}
2190
2191/*
2192 * Produce the VALUE token.
2193 */
2194
2195static int
2196yaml_parser_fetch_value(yaml_parser_t *parser)
2197{
2198 yaml_mark_t start_mark, end_mark;
2199 yaml_token_t *token;
2200
2201 /* Have we found a simple key? */
2202
2203 if (parser->simple_keys[parser->flow_level])
2204 {
2205 yaml_simple_key_t *simple_key = parser->simple_keys[parser->flow_level];
2206
2207 /* Create the KEY token. */
2208
2209 token = yaml_token_new(YAML_KEY_TOKEN, simple_key->mark, simple_key->mark);
2210 if (!token) {
2211 parser->error = YAML_MEMORY_ERROR;
2212 return 0;
2213 }
2214
2215 /* Insert the token into the queue. */
2216
2217 if (!yaml_parser_insert_token(parser, simple_key->token_number, token)) {
2218 yaml_token_delete(token);
2219 return 0;
2220 }
2221
2222 /* In the block context, we may need to add the BLOCK-MAPPING-START token. */
2223
7e32c194 2224 if (!yaml_parser_roll_indent(parser, simple_key->column,
eb9cceb5
KS
2225 simple_key->token_number,
2226 YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark))
2227 return 0;
2228
2229 /* Remove the simple key from the list. */
2230
e71095e3
KS
2231 yaml_free(simple_key);
2232 parser->simple_keys[parser->flow_level] = NULL;
eb9cceb5
KS
2233
2234 /* A simple key cannot follow another simple key. */
2235
2236 parser->simple_key_allowed = 0;
2237 }
2238 else
2239 {
2240 /* The ':' indicator follows a complex key. */
2241
2242 /* In the block context, extra checks are required. */
2243
2244 if (!parser->flow_level)
2245 {
2246 /* Check if we are allowed to start a complex value. */
2247
2248 if (!parser->simple_key_allowed) {
2249 return yaml_parser_set_scanner_error(parser, NULL,
2250 yaml_parser_get_mark(parser),
2251 "mapping values are not allowed in this context");
2252 }
2253
2254 /* Add the BLOCK-MAPPING-START token if needed. */
2255
2256 if (!yaml_parser_roll_indent(parser, parser->column, -1,
2257 YAML_BLOCK_MAPPING_START_TOKEN, yaml_parser_get_mark(parser)))
2258 return 0;
2259 }
2260
eb9cceb5
KS
2261 /* Simple keys after ':' are allowed in the block context. */
2262
2263 parser->simple_key_allowed = (!parser->flow_level);
2264 }
2265
2266 /* Consume the token. */
2267
2268 start_mark = yaml_parser_get_mark(parser);
2269 FORWARD(parser);
2270 end_mark = yaml_parser_get_mark(parser);
2271
2272 /* Create the VALUE token. */
2273
2274 token = yaml_token_new(YAML_VALUE_TOKEN, start_mark, end_mark);
2275 if (!token) {
2276 parser->error = YAML_MEMORY_ERROR;
2277 return 0;
2278 }
2279
2280 /* Append the token to the queue. */
2281
2282 if (!yaml_parser_append_token(parser, token)) {
2283 yaml_token_delete(token);
2284 return 0;
2285 }
2286
2287 return 1;
2288}
2289
2290/*
2291 * Produce the ALIAS or ANCHOR token.
2292 */
2293
2294static int
2295yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type)
2296{
2297 yaml_token_t *token;
2298
2299 /* An anchor or an alias could be a simple key. */
2300
2301 if (!yaml_parser_save_simple_key(parser))
2302 return 0;
2303
2304 /* A simple key cannot follow an anchor or an alias. */
2305
2306 parser->simple_key_allowed = 0;
2307
2308 /* Create the ALIAS or ANCHOR token. */
2309
2310 token = yaml_parser_scan_anchor(parser, type);
2311 if (!token) return 0;
2312
2313 /* Append the token to the queue. */
2314
2315 if (!yaml_parser_append_token(parser, token)) {
2316 yaml_token_delete(token);
2317 return 0;
2318 }
2319
2320 return 1;
2321}
2322
2323/*
2324 * Produce the TAG token.
2325 */
2326
2327static int
2328yaml_parser_fetch_tag(yaml_parser_t *parser)
2329{
2330 yaml_token_t *token;
2331
2332 /* A tag could be a simple key. */
2333
2334 if (!yaml_parser_save_simple_key(parser))
2335 return 0;
2336
2337 /* A simple key cannot follow a tag. */
2338
2339 parser->simple_key_allowed = 0;
2340
2341 /* Create the TAG token. */
2342
2343 token = yaml_parser_scan_tag(parser);
2344 if (!token) return 0;
2345
2346 /* Append the token to the queue. */
2347
2348 if (!yaml_parser_append_token(parser, token)) {
2349 yaml_token_delete(token);
2350 return 0;
2351 }
2352
2353 return 1;
2354}
2355
2356/*
2357 * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
2358 */
2359
2360static int
2361yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal)
2362{
2363 yaml_token_t *token;
2364
2365 /* Remove any potential simple keys. */
2366
2367 if (!yaml_parser_remove_simple_key(parser))
2368 return 0;
2369
2370 /* A simple key may follow a block scalar. */
2371
2372 parser->simple_key_allowed = 1;
2373
2374 /* Create the SCALAR token. */
2375
2376 token = yaml_parser_scan_block_scalar(parser, literal);
2377 if (!token) return 0;
2378
2379 /* Append the token to the queue. */
2380
2381 if (!yaml_parser_append_token(parser, token)) {
2382 yaml_token_delete(token);
2383 return 0;
2384 }
2385
2386 return 1;
2387}
2388
2389/*
2390 * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
2391 */
2392
2393static int
2394yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single)
2395{
2396 yaml_token_t *token;
2397
2398 /* A plain scalar could be a simple key. */
2399
2400 if (!yaml_parser_save_simple_key(parser))
2401 return 0;
2402
2403 /* A simple key cannot follow a flow scalar. */
2404
2405 parser->simple_key_allowed = 0;
2406
2407 /* Create the SCALAR token. */
2408
2409 token = yaml_parser_scan_flow_scalar(parser, single);
2410 if (!token) return 0;
2411
2412 /* Append the token to the queue. */
2413
2414 if (!yaml_parser_append_token(parser, token)) {
2415 yaml_token_delete(token);
2416 return 0;
2417 }
2418
2419 return 1;
2420}
2421
2422/*
2423 * Produce the SCALAR(...,plain) token.
2424 */
2425
2426static int
2427yaml_parser_fetch_plain_scalar(yaml_parser_t *parser)
2428{
2429 yaml_token_t *token;
2430
2431 /* A plain scalar could be a simple key. */
2432
2433 if (!yaml_parser_save_simple_key(parser))
2434 return 0;
2435
2436 /* A simple key cannot follow a flow scalar. */
2437
2438 parser->simple_key_allowed = 0;
2439
2440 /* Create the SCALAR token. */
2441
2442 token = yaml_parser_scan_plain_scalar(parser);
2443 if (!token) return 0;
2444
2445 /* Append the token to the queue. */
2446
2447 if (!yaml_parser_append_token(parser, token)) {
2448 yaml_token_delete(token);
2449 return 0;
2450 }
2451
2452 return 1;
2453}
2454
e71095e3
KS
2455/*
2456 * Eat whitespaces and comments until the next token is found.
2457 */
2458
2459static int
2460yaml_parser_scan_to_next_token(yaml_parser_t *parser)
2461{
2462 /* Until the next token is not found. */
2463
2464 while (1)
2465 {
2466 /* Allow the BOM mark to start a line. */
2467
2468 if (!UPDATE(parser, 1)) return 0;
2469
2470 if (parser->column == 0 && IS_BOM(parser))
2471 FORWARD(parser);
2472
2473 /*
2474 * Eat whitespaces.
2475 *
2476 * Tabs are allowed:
2477 *
2478 * - in the flow context;
2479 * - in the block context, but not at the beginning of the line or
2480 * after '-', '?', or ':' (complex value).
2481 */
2482
2483 if (!UPDATE(parser, 1)) return 0;
2484
2485 while (CHECK(parser,' ') ||
2486 ((parser->flow_level || !parser->simple_key_allowed) &&
2487 CHECK(parser, '\t'))) {
2488 FORWARD(parser);
2489 if (!UPDATE(parser, 1)) return 0;
2490 }
2491
2492 /* Eat a comment until a line break. */
2493
2494 if (CHECK(parser, '#')) {
2495 while (!IS_BREAKZ(parser)) {
2496 FORWARD(parser);
2497 if (!UPDATE(parser, 1)) return 0;
2498 }
2499 }
2500
2501 /* If it is a line break, eat it. */
2502
2503 if (IS_BREAK(parser))
2504 {
2505 if (!UPDATE(parser, 2)) return 0;
2506 FORWARD_LINE(parser);
2507
2508 /* In the block context, a new line may start a simple key. */
2509
2510 if (!parser->flow_level) {
2511 parser->simple_key_allowed = 1;
2512 }
2513 }
2514 else
2515 {
2516 /* We have found a token. */
2517
2518 break;
2519 }
2520 }
2521
2522 return 1;
2523}
2524
2525/*
2526 * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
2527 *
2528 * Scope:
2529 * %YAML 1.1 # a comment \n
2530 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2531 * %TAG !yaml! tag:yaml.org,2002: \n
2532 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2533 */
2534
2535static yaml_token_t *
2536yaml_parser_scan_directive(yaml_parser_t *parser)
2537{
2538 yaml_mark_t start_mark, end_mark;
2539 yaml_char_t *name = NULL;
2540 int major, minor;
2541 yaml_char_t *handle = NULL, *prefix = NULL;
2542 yaml_token_t *token = NULL;
2543
2544 /* Eat '%'. */
2545
2546 start_mark = yaml_parser_get_mark(parser);
2547
2548 FORWARD(parser);
2549
2550 /* Scan the directive name. */
2551
2552 if (!yaml_parser_scan_directive_name(parser, start_mark, &name))
2553 goto error;
2554
2555 /* Is it a YAML directive? */
2556
2557 if (strcmp((char *)name, "YAML") == 0)
2558 {
2559 /* Scan the VERSION directive value. */
2560
2561 if (!yaml_parser_scan_version_directive_value(parser, start_mark,
2562 &major, &minor))
2563 goto error;
2564
2565 end_mark = yaml_parser_get_mark(parser);
2566
2567 /* Create a VERSION-DIRECTIVE token. */
2568
2569 token = yaml_version_directive_token_new(major, minor,
2570 start_mark, end_mark);
92d41fe1
KS
2571 if (!token) {
2572 parser->error = YAML_MEMORY_ERROR;
2573 return 0;
2574 }
e71095e3
KS
2575 }
2576
2577 /* Is it a TAG directive? */
2578
2579 else if (strcmp((char *)name, "TAG") == 0)
2580 {
2581 /* Scan the TAG directive value. */
2582
2583 if (!yaml_parser_scan_tag_directive_value(parser, start_mark,
2584 &handle, &prefix))
2585 goto error;
2586
2587 end_mark = yaml_parser_get_mark(parser);
2588
2589 /* Create a TAG-DIRECTIVE token. */
2590
2591 token = yaml_tag_directive_token_new(handle, prefix,
2592 start_mark, end_mark);
92d41fe1
KS
2593 if (!token) {
2594 parser->error = YAML_MEMORY_ERROR;
2595 return 0;
2596 }
e71095e3
KS
2597 }
2598
2599 /* Unknown directive. */
2600
2601 else
2602 {
92d41fe1 2603 yaml_parser_set_scanner_error(parser, "while scanning a directive",
e71095e3
KS
2604 start_mark, "found uknown directive name");
2605 goto error;
2606 }
2607
2608 /* Eat the rest of the line including any comments. */
2609
2610 while (IS_BLANK(parser)) {
2611 FORWARD(parser);
2612 if (!UPDATE(parser, 1)) goto error;
2613 }
2614
2615 if (CHECK(parser, '#')) {
2616 while (!IS_BREAKZ(parser)) {
2617 FORWARD(parser);
2618 if (!UPDATE(parser, 1)) goto error;
2619 }
2620 }
2621
2622 /* Check if we are at the end of the line. */
2623
2624 if (!IS_BREAKZ(parser)) {
92d41fe1 2625 yaml_parser_set_scanner_error(parser, "while scanning a directive",
e71095e3
KS
2626 start_mark, "did not found expected comment or line break");
2627 goto error;
2628 }
2629
2630 /* Eat a line break. */
2631
2632 if (IS_BREAK(parser)) {
2633 if (!UPDATE(parser, 2)) goto error;
2634 FORWARD_LINE(parser);
2635 }
2636
2637 yaml_free(name);
2638
2639 return token;
2640
2641error:
2642 yaml_free(token);
2643 yaml_free(prefix);
2644 yaml_free(handle);
2645 yaml_free(name);
2646 return NULL;
2647}
2648
2649/*
2650 * Scan the directive name.
2651 *
2652 * Scope:
2653 * %YAML 1.1 # a comment \n
2654 * ^^^^
2655 * %TAG !yaml! tag:yaml.org,2002: \n
2656 * ^^^
2657 */
2658
2659static int
2660yaml_parser_scan_directive_name(yaml_parser_t *parser,
2661 yaml_mark_t start_mark, yaml_char_t **name)
2662{
2663 yaml_string_t string = yaml_parser_new_string(parser);
2664
2665 if (!string.buffer) goto error;
2666
2667 /* Consume the directive name. */
2668
2669 if (!UPDATE(parser, 1)) goto error;
2670
2671 while (IS_ALPHA(parser))
2672 {
2673 if (!RESIZE(parser, string)) goto error;
2674 COPY(parser, string);
2675 if (!UPDATE(parser, 1)) goto error;
2676 }
2677
2678 /* Check if the name is empty. */
2679
2680 if (string.buffer == string.pointer) {
2681 yaml_parser_set_scanner_error(parser, "while scanning a directive",
2682 start_mark, "cannot found expected directive name");
2683 goto error;
2684 }
2685
2686 /* Check for an blank character after the name. */
2687
2688 if (!IS_BLANKZ(parser)) {
2689 yaml_parser_set_scanner_error(parser, "while scanning a directive",
2690 start_mark, "found unexpected non-alphabetical character");
2691 goto error;
2692 }
2693
2694 *name = string.buffer;
2695
2696 return 1;
2697
2698error:
2699 yaml_free(string.buffer);
2700 return 0;
2701}
2702
2703/*
2704 * Scan the value of VERSION-DIRECTIVE.
2705 *
2706 * Scope:
2707 * %YAML 1.1 # a comment \n
2708 * ^^^^^^
2709 */
2710
2711static int
2712yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
2713 yaml_mark_t start_mark, int *major, int *minor)
2714{
2715 /* Eat whitespaces. */
2716
2717 if (!UPDATE(parser, 1)) return 0;
2718
2719 while (IS_BLANK(parser)) {
2720 FORWARD(parser);
2721 if (!UPDATE(parser, 1)) return 0;
2722 }
2723
2724 /* Consume the major version number. */
2725
2726 if (!yaml_parser_scan_version_directive_number(parser, start_mark, major))
2727 return 0;
2728
2729 /* Eat '.'. */
2730
2731 if (!CHECK(parser, '.')) {
2732 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2733 start_mark, "did not find expected digit or '.' character");
2734 }
2735
2736 FORWARD(parser);
2737
2738 /* Consume the minor version number. */
2739
2740 if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor))
2741 return 0;
ab01bac8
KS
2742
2743 return 1;
e71095e3
KS
2744}
2745
2746#define MAX_NUMBER_LENGTH 9
2747
2748/*
2749 * Scan the version number of VERSION-DIRECTIVE.
2750 *
2751 * Scope:
2752 * %YAML 1.1 # a comment \n
2753 * ^
2754 * %YAML 1.1 # a comment \n
2755 * ^
2756 */
2757
2758static int
2759yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
2760 yaml_mark_t start_mark, int *number)
2761{
2762 int value = 0;
2763 size_t length = 0;
2764
2765 /* Repeat while the next character is digit. */
2766
2767 if (!UPDATE(parser, 1)) return 0;
2768
2769 while (IS_DIGIT(parser))
2770 {
2771 /* Check if the number is too long. */
2772
2773 if (++length > MAX_NUMBER_LENGTH) {
2774 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2775 start_mark, "found extremely long version number");
2776 }
2777
2778 value = value*10 + AS_DIGIT(parser);
2779
2780 FORWARD(parser);
2781
2782 if (!UPDATE(parser, 1)) return 0;
2783 }
2784
2785 /* Check if the number was present. */
2786
2787 if (!length) {
2788 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2789 start_mark, "did not find expected version number");
2790 }
2791
2792 *number = value;
2793
2794 return 1;
2795}
2796
2797/*
2798 * Scan the value of a TAG-DIRECTIVE token.
2799 *
2800 * Scope:
2801 * %TAG !yaml! tag:yaml.org,2002: \n
2802 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2803 */
2804
2805static int
2806yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
2807 yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix)
2808{
2809 yaml_char_t *handle_value = NULL;
2810 yaml_char_t *prefix_value = NULL;
2811
2812 /* Eat whitespaces. */
2813
2814 if (!UPDATE(parser, 1)) goto error;
2815
2816 while (IS_BLANK(parser)) {
2817 FORWARD(parser);
2818 if (!UPDATE(parser, 1)) goto error;
2819 }
2820
2821 /* Scan a handle. */
2822
2823 if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value))
2824 goto error;
2825
2826 /* Expect a whitespace. */
2827
2828 if (!UPDATE(parser, 1)) goto error;
2829
2830 if (!IS_BLANK(parser)) {
2831 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2832 start_mark, "did not find expected whitespace");
2833 goto error;
2834 }
2835
2836 /* Eat whitespaces. */
2837
2838 while (IS_BLANK(parser)) {
2839 FORWARD(parser);
2840 if (!UPDATE(parser, 1)) goto error;
2841 }
2842
2843 /* Scan a prefix. */
2844
2845 if (!yaml_parser_scan_tag_uri(parser, 1, NULL, start_mark, &prefix_value))
2846 goto error;
2847
2848 /* Expect a whitespace or line break. */
2849
2850 if (!UPDATE(parser, 1)) goto error;
2851
2852 if (!IS_BLANKZ(parser)) {
2853 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2854 start_mark, "did not find expected whitespace or line break");
2855 goto error;
2856 }
2857
2858 *handle = handle_value;
2859 *prefix = prefix_value;
2860
2861 return 1;
2862
2863error:
2864 yaml_free(handle_value);
2865 yaml_free(prefix_value);
2866 return 0;
2867}
2868
2869static yaml_token_t *
2870yaml_parser_scan_anchor(yaml_parser_t *parser,
2871 yaml_token_type_t type)
2872{
2873 int length = 0;
2874 yaml_mark_t start_mark, end_mark;
2875 yaml_token_t *token = NULL;
2876 yaml_string_t string = yaml_parser_new_string(parser);
2877
2878 if (!string.buffer) goto error;
2879
2880 /* Eat the indicator character. */
2881
2882 start_mark = yaml_parser_get_mark(parser);
2883
2884 FORWARD(parser);
2885
2886 /* Consume the value. */
2887
2888 if (!UPDATE(parser, 1)) goto error;
2889
2890 while (IS_ALPHA(parser)) {
2891 if (!RESIZE(parser, string)) goto error;
2892 COPY(parser, string);
2893 if (!UPDATE(parser, 1)) goto error;
2894 length ++;
2895 }
2896
2897 end_mark = yaml_parser_get_mark(parser);
2898
2899 /*
2900 * Check if length of the anchor is greater than 0 and it is followed by
2901 * a whitespace character or one of the indicators:
2902 *
2903 * '?', ':', ',', ']', '}', '%', '@', '`'.
2904 */
2905
2906 if (!length || !(IS_BLANKZ(parser) || CHECK(parser, '?') || CHECK(parser, ':') ||
2907 CHECK(parser, ',') || CHECK(parser, ']') || CHECK(parser, '}') ||
2908 CHECK(parser, '%') || CHECK(parser, '@') || CHECK(parser, '`'))) {
2909 yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ?
2910 "while scanning an anchor" : "while scanning an alias", start_mark,
2911 "did not find expected alphabetic or numeric character");
2912 goto error;
2913 }
2914
2915 /* Create a token. */
2916
2917 token = type == YAML_ANCHOR_TOKEN ?
2918 yaml_anchor_token_new(string.buffer, start_mark, end_mark) :
2919 yaml_alias_token_new(string.buffer, start_mark, end_mark);
92d41fe1
KS
2920 if (!token) {
2921 parser->error = YAML_MEMORY_ERROR;
2922 return 0;
2923 }
e71095e3
KS
2924
2925 return token;
2926
2927error:
2928 yaml_free(string.buffer);
2929 yaml_free(token);
2930 return 0;
2931}
2932
2933/*
2934 * Scan a TAG token.
2935 */
2936
2937static yaml_token_t *
2938yaml_parser_scan_tag(yaml_parser_t *parser)
2939{
2940 yaml_char_t *handle = NULL;
2941 yaml_char_t *suffix = NULL;
2942 yaml_token_t *token = NULL;
2943 yaml_mark_t start_mark, end_mark;
2944
2945 start_mark = yaml_parser_get_mark(parser);
2946
2947 /* Check if the tag is in the canonical form. */
2948
2949 if (!UPDATE(parser, 2)) goto error;
2950
2951 if (CHECK_AT(parser, '<', 1))
2952 {
2953 /* Set the handle to '' */
2954
2955 handle = yaml_malloc(1);
2956 if (!handle) goto error;
2957 handle[0] = '\0';
2958
2959 /* Eat '!<' */
2960
2961 FORWARD(parser);
2962 FORWARD(parser);
2963
2964 /* Consume the tag value. */
2965
2966 if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
2967 goto error;
2968
2969 /* Check for '>' and eat it. */
2970
2971 if (!CHECK(parser, '>')) {
2972 yaml_parser_set_scanner_error(parser, "while scanning a tag",
2973 start_mark, "did not find the expected '>'");
2974 goto error;
2975 }
2976
2977 FORWARD(parser);
2978 }
2979 else
2980 {
2981 /* The tag has either the '!suffix' or the '!handle!suffix' form. */
2982
2983 /* First, try to scan a handle. */
2984
2985 if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle))
2986 goto error;
2987
2988 /* Check if it is, indeed, handle. */
2989
2990 if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!')
2991 {
2992 /* Scan the suffix now. */
2993
2994 if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
2995 goto error;
2996 }
2997 else
2998 {
2999 /* It wasn't a handle after all. Scan the rest of the tag. */
3000
3001 if (!yaml_parser_scan_tag_uri(parser, 0, handle, start_mark, &suffix))
3002 goto error;
3003
3004 /* Set the handle to '!'. */
3005
3006 yaml_free(handle);
3007 handle = yaml_malloc(2);
3008 if (!handle) goto error;
3009 handle[0] = '!';
3010 handle[1] = '\0';
7e32c194
KS
3011
3012 /*
3013 * A special case: the '!' tag.
3014 */
3015
3016 if (suffix[0] == '\0') {
3017 yaml_char_t *tmp = handle;
3018 handle = suffix;
3019 suffix = tmp;
3020 }
e71095e3
KS
3021 }
3022 }
3023
3024 /* Check the character which ends the tag. */
3025
3026 if (!UPDATE(parser, 1)) goto error;
3027
3028 if (!IS_BLANKZ(parser)) {
3029 yaml_parser_set_scanner_error(parser, "while scanning a tag",
3030 start_mark, "did not found expected whitespace or line break");
3031 goto error;
3032 }
3033
3034 end_mark = yaml_parser_get_mark(parser);
3035
3036 /* Create a token. */
3037
3038 token = yaml_tag_token_new(handle, suffix, start_mark, end_mark);
92d41fe1
KS
3039 if (!token) {
3040 parser->error = YAML_MEMORY_ERROR;
3041 return 0;
3042 }
e71095e3
KS
3043
3044 return token;
3045
3046error:
3047 yaml_free(handle);
3048 yaml_free(suffix);
3049 return NULL;
3050}
3051
3052/*
3053 * Scan a tag handle.
3054 */
3055
3056static int
3057yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
3058 yaml_mark_t start_mark, yaml_char_t **handle)
3059{
3060 yaml_string_t string = yaml_parser_new_string(parser);
3061
3062 if (!string.buffer) goto error;
3063
3064 /* Check the initial '!' character. */
3065
3066 if (!UPDATE(parser, 1)) goto error;
3067
3068 if (!CHECK(parser, '!')) {
3069 yaml_parser_set_scanner_error(parser, directive ?
3070 "while scanning a tag directive" : "while scanning a tag",
3071 start_mark, "did not find expected '!'");
3072 goto error;
3073 }
3074
3075 /* Copy the '!' character. */
3076
3077 COPY(parser, string);
3078
3079 /* Copy all subsequent alphabetical and numerical characters. */
3080
3081 if (!UPDATE(parser, 1)) goto error;
3082
3083 while (IS_ALPHA(parser))
3084 {
3085 if (!RESIZE(parser, string)) goto error;
3086 COPY(parser, string);
3087 if (!UPDATE(parser, 1)) goto error;
3088 }
3089
3090 /* Check if the trailing character is '!' and copy it. */
3091
3092 if (CHECK(parser, '!'))
3093 {
3094 if (!RESIZE(parser, string)) goto error;
3095 COPY(parser, string);
3096 }
3097 else
3098 {
3099 /*
7e32c194
KS
3100 * It's either the '!' tag or not really a tag handle. If it's a %TAG
3101 * directive, it's an error. If it's a tag token, it must be a part of
3102 * URI.
e71095e3
KS
3103 */
3104
7e32c194
KS
3105 if (directive && !(string.buffer[0] == '!' && string.buffer[1] == '\0')) {
3106 yaml_parser_set_scanner_error(parser, "while parsing a tag directive",
e71095e3
KS
3107 start_mark, "did not find expected '!'");
3108 goto error;
3109 }
3110 }
3111
3112 *handle = string.buffer;
3113
3114 return 1;
3115
3116error:
3117 yaml_free(string.buffer);
3118 return 0;
3119}
3120
3121/*
3122 * Scan a tag.
3123 */
3124
3125static int
3126yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive,
3127 yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
3128{
3129 size_t length = head ? strlen((char *)head) : 0;
3130 yaml_string_t string = yaml_parser_new_string(parser);
3131
3132 if (!string.buffer) goto error;
3133
3134 /* Resize the string to include the head. */
3135
3136 while (string.size <= length) {
3137 if (!yaml_parser_resize_string(parser, &string)) goto error;
3138 }
3139
7e32c194
KS
3140 /*
3141 * Copy the head if needed.
3142 *
3143 * Note that we don't copy the leading '!' character.
3144 */
e71095e3 3145
7e32c194
KS
3146 if (length > 1) {
3147 memcpy(string.buffer, head+1, length-1);
3148 string.pointer += length-1;
e71095e3
KS
3149 }
3150
3151 /* Scan the tag. */
3152
3153 if (!UPDATE(parser, 1)) goto error;
3154
3155 /*
3156 * The set of characters that may appear in URI is as follows:
3157 *
3158 * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
3159 * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
3160 * '%'.
3161 */
3162
3163 while (IS_ALPHA(parser) || CHECK(parser, ';') || CHECK(parser, '/') ||
3164 CHECK(parser, '?') || CHECK(parser, ':') || CHECK(parser, '@') ||
3165 CHECK(parser, '&') || CHECK(parser, '=') || CHECK(parser, '+') ||
3166 CHECK(parser, '$') || CHECK(parser, ',') || CHECK(parser, '.') ||
3167 CHECK(parser, '!') || CHECK(parser, '~') || CHECK(parser, '*') ||
3168 CHECK(parser, '\'') || CHECK(parser, '(') || CHECK(parser, ')') ||
3169 CHECK(parser, '[') || CHECK(parser, ']') || CHECK(parser, '%'))
3170 {
3171 if (!RESIZE(parser, string)) goto error;
3172
3173 /* Check if it is a URI-escape sequence. */
3174
3175 if (CHECK(parser, '%')) {
3176 if (!yaml_parser_scan_uri_escapes(parser,
3177 directive, start_mark, &string)) goto error;
3178 }
3179 else {
3180 COPY(parser, string);
3181 }
3182
3183 length ++;
3184 if (!UPDATE(parser, 1)) goto error;
3185 }
3186
3187 /* Check if the tag is non-empty. */
3188
3189 if (!length) {
3190 yaml_parser_set_scanner_error(parser, directive ?
3191 "while parsing a %TAG directive" : "while parsing a tag",
3192 start_mark, "did not find expected tag URI");
3193 goto error;
3194 }
3195
3196 *uri = string.buffer;
3197
3198 return 1;
3199
3200error:
3201 yaml_free(string.buffer);
3202 return 0;
3203}
3204
3205/*
3206 * Decode an URI-escape sequence corresponding to a single UTF-8 character.
3207 */
3208
3209static int
3210yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
3211 yaml_mark_t start_mark, yaml_string_t *string)
3212{
3213 int width = 0;
3214
3215 /* Decode the required number of characters. */
3216
3217 do {
3218
3219 unsigned char octet = 0;
3220
3221 /* Check for a URI-escaped octet. */
3222
3223 if (!UPDATE(parser, 3)) return 0;
3224
3225 if (!(CHECK(parser, '%') && IS_HEX_AT(parser, 1) && IS_HEX_AT(parser, 2))) {
3226 return yaml_parser_set_scanner_error(parser, directive ?
3227 "while parsing a %TAG directive" : "while parsing a tag",
3228 start_mark, "did not find URI escaped octet");
3229 }
3230
3231 /* Get the octet. */
3232
3233 octet = (AS_HEX_AT(parser, 1) << 4) + AS_HEX_AT(parser, 2);
3234
3235 /* If it is the leading octet, determine the length of the UTF-8 sequence. */
3236
3237 if (!width)
3238 {
3239 width = (octet & 0x80) == 0x00 ? 1 :
3240 (octet & 0xE0) == 0xC0 ? 2 :
3241 (octet & 0xF0) == 0xE0 ? 3 :
3242 (octet & 0xF8) == 0xF0 ? 4 : 0;
3243 if (!width) {
3244 return yaml_parser_set_scanner_error(parser, directive ?
3245 "while parsing a %TAG directive" : "while parsing a tag",
3246 start_mark, "found an incorrect leading UTF-8 octet");
3247 }
3248 }
3249 else
3250 {
3251 /* Check if the trailing octet is correct. */
3252
3253 if ((octet & 0xC0) != 0x80) {
3254 return yaml_parser_set_scanner_error(parser, directive ?
3255 "while parsing a %TAG directive" : "while parsing a tag",
3256 start_mark, "found an incorrect trailing UTF-8 octet");
3257 }
3258 }
3259
3260 /* Copy the octet and move the pointers. */
3261
3262 *(string->pointer++) = octet;
3263 FORWARD(parser);
3264 FORWARD(parser);
3265 FORWARD(parser);
3266
3267 } while (--width);
3268
3269 return 1;
3270}
3271
92d41fe1
KS
3272/*
3273 * Scan a block scalar.
3274 */
3275
3276static yaml_token_t *
3277yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal)
3278{
3279 yaml_mark_t start_mark;
3280 yaml_mark_t end_mark;
3281 yaml_string_t string = yaml_parser_new_string(parser);
21fbedd4
KS
3282 yaml_string_t leading_break = yaml_parser_new_string(parser);
3283 yaml_string_t trailing_breaks = yaml_parser_new_string(parser);
92d41fe1
KS
3284 yaml_token_t *token = NULL;
3285 int chomping = 0;
3286 int increment = 0;
3287 int indent = 0;
3288 int leading_blank = 0;
3289 int trailing_blank = 0;
3290
3291 if (!string.buffer) goto error;
21fbedd4
KS
3292 if (!leading_break.buffer) goto error;
3293 if (!trailing_breaks.buffer) goto error;
92d41fe1
KS
3294
3295 /* Eat the indicator '|' or '>'. */
3296
3297 start_mark = yaml_parser_get_mark(parser);
3298
3299 FORWARD(parser);
3300
3301 /* Scan the additional block scalar indicators. */
3302
3303 if (!UPDATE(parser, 1)) goto error;
3304
3305 /* Check for a chomping indicator. */
3306
3307 if (CHECK(parser, '+') || CHECK(parser, '-'))
3308 {
3309 /* Set the chomping method and eat the indicator. */
3310
3311 chomping = CHECK(parser, '+') ? +1 : -1;
3312
3313 FORWARD(parser);
3314
3315 /* Check for an indentation indicator. */
3316
3317 if (!UPDATE(parser, 1)) goto error;
3318
3319 if (IS_DIGIT(parser))
3320 {
3321 /* Check that the intendation is greater than 0. */
3322
3323 if (CHECK(parser, '0')) {
3324 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
3325 start_mark, "found an intendation indicator equal to 0");
3326 goto error;
3327 }
3328
3329 /* Get the intendation level and eat the indicator. */
3330
3331 increment = AS_DIGIT(parser);
3332
3333 FORWARD(parser);
3334 }
3335 }
3336
3337 /* Do the same as above, but in the opposite order. */
3338
3339 else if (IS_DIGIT(parser))
3340 {
3341 if (CHECK(parser, '0')) {
3342 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
3343 start_mark, "found an intendation indicator equal to 0");
3344 goto error;
3345 }
3346
3347 increment = AS_DIGIT(parser);
3348
3349 FORWARD(parser);
3350
3351 if (!UPDATE(parser, 1)) goto error;
3352
3353 if (CHECK(parser, '+') || CHECK(parser, '-')) {
3354 chomping = CHECK(parser, '+') ? +1 : -1;
3355 FORWARD(parser);
3356 }
3357 }
3358
3359 /* Eat whitespaces and comments to the end of the line. */
3360
3361 if (!UPDATE(parser, 1)) goto error;
3362
3363 while (IS_BLANK(parser)) {
3364 FORWARD(parser);
3365 if (!UPDATE(parser, 1)) goto error;
3366 }
3367
3368 if (CHECK(parser, '#')) {
3369 while (!IS_BREAKZ(parser)) {
3370 FORWARD(parser);
3371 if (!UPDATE(parser, 1)) goto error;
3372 }
3373 }
3374
3375 /* Check if we are at the end of the line. */
3376
3377 if (!IS_BREAKZ(parser)) {
3378 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
3379 start_mark, "did not found expected comment or line break");
3380 goto error;
3381 }
3382
3383 /* Eat a line break. */
3384
3385 if (IS_BREAK(parser)) {
3386 if (!UPDATE(parser, 2)) goto error;
3387 FORWARD_LINE(parser);
3388 }
3389
3390 end_mark = yaml_parser_get_mark(parser);
3391
3392 /* Set the intendation level if it was specified. */
3393
3394 if (increment) {
3395 indent = parser->indent >= 0 ? parser->indent+increment : increment;
3396 }
3397
3398 /* Scan the leading line breaks and determine the indentation level if needed. */
3399
21fbedd4 3400 if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks,
92d41fe1
KS
3401 start_mark, &end_mark)) goto error;
3402
3403 /* Scan the block scalar content. */
3404
3405 if (!UPDATE(parser, 1)) goto error;
3406
3407 while (parser->column == indent && !IS_Z(parser))
3408 {
3409 /*
3410 * We are at the beginning of a non-empty line.
3411 */
3412
3413 /* Is it a trailing whitespace? */
3414
3415 trailing_blank = IS_BLANK(parser);
3416
3417 /* Check if we need to fold the leading line break. */
3418
21fbedd4 3419 if (!literal && (*leading_break.buffer == '\n')
92d41fe1
KS
3420 && !leading_blank && !trailing_blank)
3421 {
3422 /* Do we need to join the lines by space? */
3423
21fbedd4 3424 if (*trailing_breaks.buffer == '\0') {
92d41fe1
KS
3425 if (!RESIZE(parser, string)) goto error;
3426 *(string.pointer ++) = ' ';
3427 }
3428
21fbedd4 3429 yaml_parser_clear_string(parser, &leading_break);
92d41fe1
KS
3430 }
3431 else {
21fbedd4 3432 if (!JOIN(parser, string, leading_break)) goto error;
92d41fe1
KS
3433 }
3434
3435 /* Append the remaining line breaks. */
3436
21fbedd4 3437 if (!JOIN(parser, string, trailing_breaks)) goto error;
92d41fe1
KS
3438
3439 /* Is it a leading whitespace? */
3440
3441 leading_blank = IS_BLANK(parser);
3442
3443 /* Consume the current line. */
3444
3445 while (!IS_BREAKZ(parser)) {
3446 if (!RESIZE(parser, string)) goto error;
3447 COPY(parser, string);
3448 if (!UPDATE(parser, 1)) goto error;
3449 }
3450
3451 /* Consume the line break. */
3452
3453 if (!UPDATE(parser, 2)) goto error;
3454
21fbedd4 3455 COPY_LINE(parser, leading_break);
92d41fe1
KS
3456
3457 /* Eat the following intendation spaces and line breaks. */
3458
3459 if (!yaml_parser_scan_block_scalar_breaks(parser,
21fbedd4 3460 &indent, &trailing_breaks, start_mark, &end_mark)) goto error;
92d41fe1
KS
3461 }
3462
3463 /* Chomp the tail. */
3464
3465 if (chomping != -1) {
21fbedd4 3466 if (!JOIN(parser, string, leading_break)) goto error;
92d41fe1
KS
3467 }
3468 if (chomping == 1) {
21fbedd4 3469 if (!JOIN(parser, string, trailing_breaks)) goto error;
92d41fe1
KS
3470 }
3471
3472 /* Create a token. */
3473
3474 token = yaml_scalar_token_new(string.buffer, string.pointer-string.buffer,
3475 literal ? YAML_LITERAL_SCALAR_STYLE : YAML_FOLDED_SCALAR_STYLE,
3476 start_mark, end_mark);
3477 if (!token) {
3478 parser->error = YAML_MEMORY_ERROR;
3479 return 0;
3480 }
3481
21fbedd4
KS
3482 yaml_free(leading_break.buffer);
3483 yaml_free(trailing_breaks.buffer);
92d41fe1
KS
3484
3485 return token;
3486
3487error:
3488 yaml_free(string.buffer);
21fbedd4
KS
3489 yaml_free(leading_break.buffer);
3490 yaml_free(trailing_breaks.buffer);
92d41fe1
KS
3491
3492 return NULL;
3493}
3494
3495/*
3496 * Scan intendation spaces and line breaks for a block scalar. Determine the
3497 * intendation level if needed.
3498 */
3499
3500static int
3501yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
3502 int *indent, yaml_string_t *breaks,
3503 yaml_mark_t start_mark, yaml_mark_t *end_mark)
3504{
3505 int max_indent = 0;
3506
3507 *end_mark = yaml_parser_get_mark(parser);
3508
3509 /* Eat the intendation spaces and line breaks. */
3510
3511 while (1)
3512 {
3513 /* Eat the intendation spaces. */
3514
3515 if (!UPDATE(parser, 1)) return 0;
3516
3517 while ((!*indent || parser->column < *indent) && IS_SPACE(parser)) {
3518 FORWARD(parser);
3519 if (!UPDATE(parser, 1)) return 0;
3520 }
3521
3522 if (parser->column > max_indent)
3523 max_indent = parser->column;
3524
3525 /* Check for a tab character messing the intendation. */
3526
3527 if ((!*indent || parser->column < *indent) && IS_TAB(parser)) {
3528 return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
3529 start_mark, "found a tab character where an intendation space is expected");
3530 }
3531
3532 /* Have we found a non-empty line? */
3533
3534 if (!IS_BREAK(parser)) break;
3535
3536 /* Consume the line break. */
3537
3538 if (!UPDATE(parser, 2)) return 0;
3539 if (!RESIZE(parser, *breaks)) return 0;
3540 COPY_LINE(parser, *breaks);
3541 *end_mark = yaml_parser_get_mark(parser);
3542 }
3543
3544 /* Determine the indentation level if needed. */
3545
3546 if (!*indent) {
3547 *indent = max_indent;
3548 if (*indent < parser->indent + 1)
3549 *indent = parser->indent + 1;
3550 if (*indent < 1)
3551 *indent = 1;
3552 }
3553
3554 return 1;
3555}
3556
21fbedd4
KS
3557/*
3558 * Scan a quoted scalar.
3559 */
3560
3561static yaml_token_t *
3562yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single)
3563{
3564 yaml_mark_t start_mark;
3565 yaml_mark_t end_mark;
3566 yaml_string_t string = yaml_parser_new_string(parser);
3567 yaml_string_t leading_break = yaml_parser_new_string(parser);
3568 yaml_string_t trailing_breaks = yaml_parser_new_string(parser);
3569 yaml_string_t whitespaces = yaml_parser_new_string(parser);
3570 yaml_token_t *token = NULL;
3571 int leading_blanks;
3572
3573 if (!string.buffer) goto error;
3574 if (!leading_break.buffer) goto error;
3575 if (!trailing_breaks.buffer) goto error;
3576 if (!whitespaces.buffer) goto error;
3577
3578 /* Eat the left quote. */
3579
3580 start_mark = yaml_parser_get_mark(parser);
3581
3582 FORWARD(parser);
3583
3584 /* Consume the content of the quoted scalar. */
3585
3586 while (1)
3587 {
3588 /* Check that there are no document indicators at the beginning of the line. */
3589
3590 if (!UPDATE(parser, 4)) goto error;
3591
3592 if (parser->column == 0 &&
3593 ((CHECK_AT(parser, '-', 0) &&
3594 CHECK_AT(parser, '-', 1) &&
3595 CHECK_AT(parser, '-', 2)) ||
3596 (CHECK_AT(parser, '.', 0) &&
3597 CHECK_AT(parser, '.', 1) &&
3598 CHECK_AT(parser, '.', 2))) &&
3599 IS_BLANKZ_AT(parser, 3))
3600 {
3601 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3602 start_mark, "found unexpected document indicator");
3603 goto error;
3604 }
3605
3606 /* Check for EOF. */
3607
3608 if (IS_Z(parser)) {
3609 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3610 start_mark, "found unexpected end of stream");
3611 goto error;
3612 }
3613
3614 /* Consume non-blank characters. */
3615
3616 if (!UPDATE(parser, 2)) goto error;
3617 if (!RESIZE(parser, string)) goto error;
3618
3619 leading_blanks = 0;
3620
3621 while (!IS_BLANKZ(parser))
3622 {
3623 /* Check for an escaped single quote. */
3624
3625 if (single && CHECK_AT(parser, '\'', 0) && CHECK_AT(parser, '\'', 1))
3626 {
3627 *(string.pointer++) = '\'';
3628 FORWARD(parser);
3629 FORWARD(parser);
3630 }
3631
3632 /* Check for the right quote. */
3633
3634 else if (CHECK(parser, single ? '\'' : '"'))
3635 {
3636 break;
3637 }
3638
3639 /* Check for an escaped line break. */
3640
3641 else if (!single && CHECK(parser, '\\') && IS_BREAK_AT(parser, 1))
3642 {
3643 if (!UPDATE(parser, 3)) goto error;
3644 FORWARD(parser);
3645 FORWARD_LINE(parser);
3646 leading_blanks = 1;
3647 break;
3648 }
3649
3650 /* Check for an escape sequence. */
3651
3652 else if (!single && CHECK(parser, '\\'))
3653 {
3654 int code_length = 0;
3655
3656 /* Check the escape character. */
3657
3658 switch (parser->pointer[1])
3659 {
3660 case '0':
3661 *(string.pointer++) = '\0';
3662 break;
3663
3664 case 'a':
3665 *(string.pointer++) = '\x07';
3666 break;
3667
3668 case 'b':
3669 *(string.pointer++) = '\x08';
3670 break;
3671
3672 case 't':
3673 case '\t':
3674 *(string.pointer++) = '\x09';
3675 break;
3676
3677 case 'n':
3678 *(string.pointer++) = '\x0A';
3679 break;
3680
3681 case 'v':
3682 *(string.pointer++) = '\x0B';
3683 break;
3684
3685 case 'f':
3686 *(string.pointer++) = '\x0C';
3687 break;
3688
3689 case 'r':
3690 *(string.pointer++) = '\x0D';
3691 break;
3692
3693 case 'e':
3694 *(string.pointer++) = '\x1B';
3695 break;
3696
3697 case ' ':
3698 *(string.pointer++) = '\x20';
3699 break;
3700
3701 case '"':
3702 *(string.pointer++) = '"';
3703 break;
3704
3705 case '\'':
3706 *(string.pointer++) = '\'';
3707 break;
3708
7e32c194
KS
3709 case '\\':
3710 *(string.pointer++) = '\\';
3711 break;
3712
21fbedd4
KS
3713 case 'N': /* NEL (#x85) */
3714 *(string.pointer++) = '\xC2';
3715 *(string.pointer++) = '\x85';
3716 break;
3717
3718 case '_': /* #xA0 */
3719 *(string.pointer++) = '\xC2';
3720 *(string.pointer++) = '\xA0';
3721 break;
3722
3723 case 'L': /* LS (#x2028) */
3724 *(string.pointer++) = '\xE2';
3725 *(string.pointer++) = '\x80';
3726 *(string.pointer++) = '\xA8';
3727 break;
3728
3729 case 'P': /* PS (#x2029) */
3730 *(string.pointer++) = '\xE2';
3731 *(string.pointer++) = '\x80';
7e32c194 3732 *(string.pointer++) = '\xA9';
21fbedd4
KS
3733 break;
3734
3735 case 'x':
3736 code_length = 2;
3737 break;
3738
3739 case 'u':
3740 code_length = 4;
3741 break;
3742
3743 case 'U':
3744 code_length = 8;
3745 break;
3746
3747 default:
3748 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3749 start_mark, "found unknown escape character");
3750 goto error;
3751 }
3752
3753 FORWARD(parser);
3754 FORWARD(parser);
3755
3756 /* Consume an arbitrary escape code. */
3757
3758 if (code_length)
3759 {
3760 unsigned int value = 0;
3761 int k;
3762
3763 /* Scan the character value. */
3764
3765 if (!UPDATE(parser, code_length)) goto error;
3766
3767 for (k = 0; k < code_length; k ++) {
3768 if (!IS_HEX_AT(parser, k)) {
3769 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3770 start_mark, "did not find expected hexdecimal number");
3771 goto error;
3772 }
3773 value = (value << 4) + AS_HEX_AT(parser, k);
3774 }
3775
3776 /* Check the value and write the character. */
3777
3778 if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
3779 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3780 start_mark, "found invalid Unicode character escape code");
3781 goto error;
3782 }
3783
3784 if (value <= 0x7F) {
3785 *(string.pointer++) = value;
3786 }
3787 else if (value <= 0x7FF) {
3788 *(string.pointer++) = 0xC0 + (value >> 6);
3789 *(string.pointer++) = 0x80 + (value & 0x3F);
3790 }
3791 else if (value <= 0xFFFF) {
3792 *(string.pointer++) = 0xE0 + (value >> 12);
3793 *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3794 *(string.pointer++) = 0x80 + (value & 0x3F);
3795 }
3796 else {
3797 *(string.pointer++) = 0xF0 + (value >> 18);
3798 *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F);
3799 *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3800 *(string.pointer++) = 0x80 + (value & 0x3F);
3801 }
3802
3803 /* Advance the pointer. */
3804
3805 for (k = 0; k < code_length; k ++) {
3806 FORWARD(parser);
3807 }
3808 }
3809 }
3810
3811 else
3812 {
3813 /* It is a non-escaped non-blank character. */
3814
3815 COPY(parser, string);
3816 }
3817
3818 if (!UPDATE(parser, 2)) goto error;
3819 if (!RESIZE(parser, string)) goto error;
3820 }
3821
3822 /* Check if we are at the end of the scalar. */
3823
3824 if (CHECK(parser, single ? '\'' : '"'))
3825 break;
3826
3827 /* Consume blank characters. */
3828
3829 if (!UPDATE(parser, 1)) goto error;
3830
3831 while (IS_BLANK(parser) || IS_BREAK(parser))
3832 {
3833 if (IS_BLANK(parser))
3834 {
3835 /* Consume a space or a tab character. */
3836
3837 if (!leading_blanks) {
3838 if (!RESIZE(parser, whitespaces)) goto error;
3839 COPY(parser, whitespaces);
3840 }
7e32c194
KS
3841 else {
3842 FORWARD(parser);
3843 }
21fbedd4
KS
3844 }
3845 else
3846 {
3847 if (!UPDATE(parser, 2)) goto error;
3848
3849 /* Check if it is a first line break. */
3850
3851 if (!leading_blanks)
3852 {
3853 yaml_parser_clear_string(parser, &whitespaces);
3854 COPY_LINE(parser, leading_break);
3855 leading_blanks = 1;
3856 }
3857 else
3858 {
3859 if (!RESIZE(parser, trailing_breaks)) goto error;
3860 COPY_LINE(parser, trailing_breaks);
3861 }
3862 }
3863 if (!UPDATE(parser, 1)) goto error;
3864 }
3865
3866 /* Join the whitespaces or fold line breaks. */
3867
3868 if (!RESIZE(parser, string)) goto error;
3869
3870 if (leading_blanks)
3871 {
3872 /* Do we need to fold line breaks? */
3873
3874 if (leading_break.buffer[0] == '\n') {
3875 if (trailing_breaks.buffer[0] == '\0') {
3876 *(string.pointer++) = ' ';
3877 }
3878 else {
3879 if (!JOIN(parser, string, trailing_breaks)) goto error;
3880 }
3881 yaml_parser_clear_string(parser, &leading_break);
3882 }
3883 else {
3884 if (!JOIN(parser, string, leading_break)) goto error;
3885 if (!JOIN(parser, string, trailing_breaks)) goto error;
3886 }
3887 }
3888 else
3889 {
3890 if (!JOIN(parser, string, whitespaces)) goto error;
3891 }
3892 }
3893
3894 /* Eat the right quote. */
3895
3896 FORWARD(parser);
3897
3898 end_mark = yaml_parser_get_mark(parser);
3899
3900 /* Create a token. */
3901
3902 token = yaml_scalar_token_new(string.buffer, string.pointer-string.buffer,
3903 single ? YAML_SINGLE_QUOTED_SCALAR_STYLE : YAML_DOUBLE_QUOTED_SCALAR_STYLE,
3904 start_mark, end_mark);
3905 if (!token) {
3906 parser->error = YAML_MEMORY_ERROR;
3907 return 0;
3908 }
3909
3910 yaml_free(leading_break.buffer);
3911 yaml_free(trailing_breaks.buffer);
3912 yaml_free(whitespaces.buffer);
3913
3914 return token;
3915
3916error:
3917 yaml_free(string.buffer);
3918 yaml_free(leading_break.buffer);
3919 yaml_free(trailing_breaks.buffer);
3920 yaml_free(whitespaces.buffer);
3921
3922 return NULL;
3923}
3924
3925/*
3926 * Scan a plain scalar.
3927 */
3928
3929static yaml_token_t *
3930yaml_parser_scan_plain_scalar(yaml_parser_t *parser)
3931{
3932 yaml_mark_t start_mark;
3933 yaml_mark_t end_mark;
3934 yaml_string_t string = yaml_parser_new_string(parser);
3935 yaml_string_t leading_break = yaml_parser_new_string(parser);
3936 yaml_string_t trailing_breaks = yaml_parser_new_string(parser);
3937 yaml_string_t whitespaces = yaml_parser_new_string(parser);
3938 yaml_token_t *token = NULL;
3939 int leading_blanks = 0;
3940 int indent = parser->indent+1;
3941
3942 if (!string.buffer) goto error;
3943 if (!leading_break.buffer) goto error;
3944 if (!trailing_breaks.buffer) goto error;
3945 if (!whitespaces.buffer) goto error;
3946
3947 start_mark = yaml_parser_get_mark(parser);
3948
3949 /* Consume the content of the plain scalar. */
3950
3951 while (1)
3952 {
3953 /* Check for a document indicator. */
3954
3955 if (!UPDATE(parser, 4)) goto error;
3956
3957 if (parser->column == 0 &&
3958 ((CHECK_AT(parser, '-', 0) &&
3959 CHECK_AT(parser, '-', 1) &&
3960 CHECK_AT(parser, '-', 2)) ||
3961 (CHECK_AT(parser, '.', 0) &&
3962 CHECK_AT(parser, '.', 1) &&
3963 CHECK_AT(parser, '.', 2))) &&
3964 IS_BLANKZ_AT(parser, 3)) break;
3965
3966 /* Check for a comment. */
3967
3968 if (CHECK(parser, '#'))
3969 break;
3970
3971 /* Consume non-blank characters. */
3972
3973 while (!IS_BLANKZ(parser))
3974 {
7e32c194 3975 /* Check for 'x:x' in the flow context. TODO: Fix the test "spec-08-13". */
21fbedd4
KS
3976
3977 if (parser->flow_level && CHECK(parser, ':') && !IS_BLANKZ_AT(parser, 1)) {
3978 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3979 start_mark, "found unexpected ':'");
3980 goto error;
3981 }
3982
3983 /* Check for indicators that may end a plain scalar. */
3984
3985 if ((CHECK(parser, ':') && IS_BLANKZ_AT(parser, 1)) ||
3986 (parser->flow_level &&
3987 (CHECK(parser, ',') || CHECK(parser, ':') ||
3988 CHECK(parser, '?') || CHECK(parser, '[') ||
3989 CHECK(parser, ']') || CHECK(parser, '{') ||
3990 CHECK(parser, '}'))))
3991 break;
3992
3993 /* Check if we need to join whitespaces and breaks. */
3994
3995 if (leading_blanks || whitespaces.buffer != whitespaces.pointer)
3996 {
3997 if (!RESIZE(parser, string)) goto error;
3998
3999 if (leading_blanks)
4000 {
4001 /* Do we need to fold line breaks? */
4002
4003 if (leading_break.buffer[0] == '\n') {
4004 if (trailing_breaks.buffer[0] == '\0') {
4005 *(string.pointer++) = ' ';
4006 }
4007 else {
4008 if (!JOIN(parser, string, trailing_breaks)) goto error;
4009 }
4010 yaml_parser_clear_string(parser, &leading_break);
4011 }
4012 else {
4013 if (!JOIN(parser, string, leading_break)) goto error;
4014 if (!JOIN(parser, string, trailing_breaks)) goto error;
4015 }
4016
4017 leading_blanks = 0;
4018 }
4019 else
4020 {
4021 if (!JOIN(parser, string, whitespaces)) goto error;
4022 }
4023 }
4024
4025 /* Copy the character. */
4026
4027 if (!RESIZE(parser, string)) goto error;
4028
4029 COPY(parser, string);
4030
4031 end_mark = yaml_parser_get_mark(parser);
4032
4033 if (!UPDATE(parser, 2)) goto error;
4034 }
4035
4036 /* Is it the end? */
4037
4038 if (!(IS_BLANK(parser) || IS_BREAK(parser)))
4039 break;
4040
4041 /* Consume blank characters. */
4042
4043 if (!UPDATE(parser, 1)) goto error;
4044
4045 while (IS_BLANK(parser) || IS_BREAK(parser))
4046 {
4047 if (IS_BLANK(parser))
4048 {
4049 /* Check for tab character that abuse intendation. */
4050
4051 if (leading_blanks && parser->column < indent && IS_TAB(parser)) {
4052 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
4053 start_mark, "found a tab character that violate intendation");
7e32c194 4054 goto error;
21fbedd4
KS
4055 }
4056
4057 /* Consume a space or a tab character. */
4058
4059 if (!leading_blanks) {
4060 if (!RESIZE(parser, whitespaces)) goto error;
4061 COPY(parser, whitespaces);
4062 }
7e32c194
KS
4063 else {
4064 FORWARD(parser);
4065 }
21fbedd4
KS
4066 }
4067 else
4068 {
4069 if (!UPDATE(parser, 2)) goto error;
4070
4071 /* Check if it is a first line break. */
4072
4073 if (!leading_blanks)
4074 {
4075 yaml_parser_clear_string(parser, &whitespaces);
4076 COPY_LINE(parser, leading_break);
4077 leading_blanks = 1;
4078 }
4079 else
4080 {
4081 if (!RESIZE(parser, trailing_breaks)) goto error;
4082 COPY_LINE(parser, trailing_breaks);
4083 }
4084 }
4085 if (!UPDATE(parser, 1)) goto error;
4086 }
4087
4088 /* Check intendation level. */
4089
7e32c194 4090 if (!parser->flow_level && parser->column < indent)
21fbedd4
KS
4091 break;
4092 }
4093
4094 /* Create a token. */
4095
4096 token = yaml_scalar_token_new(string.buffer, string.pointer-string.buffer,
4097 YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark);
4098 if (!token) {
4099 parser->error = YAML_MEMORY_ERROR;
4100 return 0;
4101 }
4102
4103 /* Note that we change the 'simple_key_allowed' flag. */
4104
4105 if (leading_blanks) {
4106 parser->simple_key_allowed = 1;
4107 }
4108
4109 yaml_free(leading_break.buffer);
4110 yaml_free(trailing_breaks.buffer);
4111 yaml_free(whitespaces.buffer);
4112
4113 return token;
4114
4115error:
4116 yaml_free(string.buffer);
4117 yaml_free(leading_break.buffer);
4118 yaml_free(trailing_breaks.buffer);
4119 yaml_free(whitespaces.buffer);
4120
4121 return NULL;
4122}
4123
This page took 2.10173 seconds and 5 git commands to generate.