2 ** Splint - annotation-assisted static program checker
3 ** Copyright (C) 1994-2002 University of Virginia,
4 ** Massachusetts Institute of Technology
6 ** This program is free software; you can redistribute it and/or modify it
7 ** under the terms of the GNU General Public License as published by the
8 ** Free Software Foundation; either version 2 of the License, or (at your
9 ** option) any later version.
11 ** This program is distributed in the hope that it will be useful, but
12 ** WITHOUT ANY WARRANTY; without even the implied warranty of
13 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 ** General Public License for more details.
16 ** The GNU General Public License is available from http://www.gnu.org/ or
17 ** the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
18 ** MA 02111-1307, USA.
20 ** For information on splint: info@splint.org
21 ** To report a bug: splint-bug@splint.org
22 ** For more information: http://www.splint.org
27 ** MODULE DESCRIPTION:
29 ** This module scans one line of Larch C Interface Language (LCL) input at
32 ** The input is source text, line at a time. The output is a sequence
33 ** of tokens, reported by call-out LSLScanFreshToken.
35 ** This organization allows implementation of line-at-a-time incremental
36 ** scanning. The incremental mechanism is in the driving module scan.c.
38 ** The main loop of the scanner keys on the leading character.
39 ** Within the loop are actions which collect the rest of the
40 ** token starting with the character. Various careful hacks
41 ** show up to disambiguate tokens that break the general pattern
42 ** (Examples, \/ and /\). White space is passed and the loop
43 ** goes once again without calling LSLScanFreshToken ().
44 ** The line ends with a null.
48 ** JPW, GAF, Yang Meng Tan
52 # include "splintMacros.nf"
56 # include "scanline.h"
57 # include "lclscanline.h"
58 # include "lcltokentable.h"
59 # include "lclsyntable.h"
61 /*@constant int CHARSIZE;@*/
62 # define CHARSIZE 256 /* on an 8-bit machine */
65 # define LCLMOVECHAR() \
66 do { *bufPtr++ = currentChar; currentChar = *currentLine++; \
67 colNumber++; } while (FALSE)
70 # define LOOKAHEADCHAR() (*currentLine)
73 # define LOOKAHEADTWICECHAR() (*(currentLine + 1))
75 /*@constant static int MAXCHAR;@*/
76 # define MAXCHAR 512 /* storage for a lexeme */
79 ** Printname for the TokenCode NOTTOKEN (also 1st one reserved)
80 ** Printname for the TokenCode BADTOKEN (also last one reserved)
83 /*@constant static observer char *FIRSTRESERVEDNAME;@*/
84 # define FIRSTRESERVEDNAME "?"
87 ** The scanner establishes lexical boundaries by first switching
88 ** on the leading character of the pending lexeme.
93 STARTCNUM, /* First character of a C number. */
94 STARTCNUMDOT, /* "." only starts a C number if digit follows*/
95 STARTCSTR, /* First character of a C string. */
96 STARTCCHAR, /* First character of a C character. */
97 STARTWIDE, /* slash L starts both string and character. */
98 STARTSLASH, /* "/" starts caret, comment comment, operator */
99 STARTOTHER /* Everything else. */
102 static void ScanCComment (void);
103 static void ScanEscape (void);
104 static void ScanCString (void);
105 static void ScanCChar (void);
106 static void ScanCNumber (void);
107 static void LocalUserError (/*@temp@*/ char *);
110 ** Array to store character class defintions and record end-of-comment
114 static charClassData LCLcharClass[LASTCHAR + 1];
117 ** Data shared between routines LCLScanLine, ScanCString, ScanCChar,
118 ** ScanCNumber. LCLScanLine was getting too big for one routine and
119 ** passing this data was rather cumbersome. Making this data global seemed
120 ** to be the simpliest solution.
123 /* evs - sounds bogus to me! */
125 static int colNumber;
127 static char *currentLine;
128 static char currentChar;
129 static ltokenCode tokenCode;
130 static lsymbol tokenSym;
133 static bool inComment;
134 static /*@only@*/ ltoken commentTok;
135 static ltokenCode prevTokenCode; /* to disambiguate ' */
137 static StartCharType startClass[CHARSIZE] =
139 STARTOTHER, /* ^@ 00x */
140 STARTOTHER, /* ^a 01x */
141 STARTOTHER, /* ^b 02x */
142 STARTOTHER, /* ^c 03x */
143 STARTOTHER, /* ^d 04x */
144 STARTOTHER, /* ^e 05x */
145 STARTOTHER, /* ^f 06x */
146 STARTOTHER, /* ^g BELL 07x */
148 STARTOTHER, /* ^h BACKSPACE 08x */
149 STARTOTHER, /* ^i TAB 09x */
150 STARTOTHER, /* ^j NEWLINE 0Ax */
151 STARTOTHER, /* ^k 0Bx */
152 STARTOTHER, /* ^l FORMFEED 0Cx */
153 STARTOTHER, /* ^m RETURN 0Dx */
154 STARTOTHER, /* ^n 0Ex */
155 STARTOTHER, /* ^o 0Fx */
157 STARTOTHER, /* ^p 10x */
158 STARTOTHER, /* ^q 11x */
159 STARTOTHER, /* ^r 12x */
160 STARTOTHER, /* ^s 13x */
161 STARTOTHER, /* ^t 14x */
162 STARTOTHER, /* ^u 15x */
163 STARTOTHER, /* ^v 16x */
164 STARTOTHER, /* ^w 17x */
166 STARTOTHER, /* ^x 18x */
167 STARTOTHER, /* ^y 19x */
168 STARTOTHER, /* ^z 1Ax */
169 STARTOTHER, /* ^[ ESC 1Bx */
170 STARTOTHER, /* ^slash 1Cx */
171 STARTOTHER, /* ^] 1Dx */
172 STARTOTHER, /* ^^ 1Ex */
173 STARTOTHER, /* ^_ 1Fx */
175 STARTOTHER, /* BLANK 20x */
176 STARTOTHER, /* ! 21x */
177 STARTCSTR, /* " 22x */
178 STARTOTHER, /* # 23x */
179 STARTOTHER, /* $ (may be changed in reset) 24x */
180 STARTOTHER, /* % 25x */
181 STARTOTHER, /* & 26x */
182 STARTCCHAR, /* ' 27x */
184 STARTOTHER, /* ( 28x */
185 STARTOTHER, /* ) 29x */
186 STARTOTHER, /* * 2Ax */
187 STARTOTHER, /* + 2Bx */
188 STARTOTHER, /* , 2Cx */
189 STARTOTHER, /* - 2Dx */
190 STARTCNUMDOT, /* . 2Ex */
191 STARTSLASH, /* / 2Fx */
193 STARTCNUM, /* 0 30x */
194 STARTCNUM, /* 1 31x */
195 STARTCNUM, /* 2 32x */
196 STARTCNUM, /* 3 33x */
197 STARTCNUM, /* 4 34x */
198 STARTCNUM, /* 5 35x */
199 STARTCNUM, /* 6 36x */
200 STARTCNUM, /* 7 37x */
202 STARTCNUM, /* 8 38x */
203 STARTCNUM, /* 9 39x */
204 STARTOTHER, /* : 3Ax */
205 STARTOTHER, /* ; 3Bx */
206 STARTOTHER, /* < 3Cx */
207 STARTOTHER, /* = 3Dx */
208 STARTOTHER, /* > 3Ex */
209 STARTOTHER, /* ? 3Fx */
211 STARTOTHER, /* @ 40x */
212 STARTOTHER, /* A 41x */
213 STARTOTHER, /* B 42x */
214 STARTOTHER, /* C 43x */
215 STARTOTHER, /* D 44x */
216 STARTOTHER, /* E 45x */
217 STARTOTHER, /* F 46x */
218 STARTOTHER, /* G 47x */
220 STARTOTHER, /* H 48x */
221 STARTOTHER, /* I 49x */
222 STARTOTHER, /* J 4Ax */
223 STARTOTHER, /* K 4Bx */
224 STARTOTHER, /* L 4Cx */
225 STARTOTHER, /* M 4Dx */
226 STARTOTHER, /* N 4Ex */
227 STARTOTHER, /* O 4Fx */
229 STARTOTHER, /* P 50x */
230 STARTOTHER, /* Q 51x */
231 STARTOTHER, /* R 52x */
232 STARTOTHER, /* S 53x */
233 STARTOTHER, /* T 54x */
234 STARTOTHER, /* U 55x */
235 STARTOTHER, /* V 56x */
236 STARTOTHER, /* W 57x */
238 STARTOTHER, /* X 58x */
239 STARTOTHER, /* Y 59x */
240 STARTOTHER, /* Z 5Ax */
241 STARTOTHER, /* [ 5Bx */
242 STARTWIDE, /* slash 5Cx */
243 STARTOTHER, /* ] 5Dx */
244 STARTOTHER, /* ^ 5Ex */
245 STARTOTHER, /* _ 5Fx */
247 STARTOTHER, /* ` 60x */
248 STARTOTHER, /* a 61x */
249 STARTOTHER, /* b 62x */
250 STARTOTHER, /* c 63x */
251 STARTOTHER, /* d 64x */
252 STARTOTHER, /* e 65x */
253 STARTOTHER, /* f 66x */
254 STARTOTHER, /* g 67x */
256 STARTOTHER, /* h 68x */
257 STARTOTHER, /* i 69x */
258 STARTOTHER, /* j 6Ax */
259 STARTOTHER, /* k 6Bx */
260 STARTOTHER, /* l 6Cx */
261 STARTOTHER, /* m 6Dx */
262 STARTOTHER, /* n 6Ex */
263 STARTOTHER, /* o 6Fx */
265 STARTOTHER, /* p 70x */
266 STARTOTHER, /* q 71x */
267 STARTOTHER, /* r 72x */
268 STARTOTHER, /* s 73x */
269 STARTOTHER, /* t 74x */
270 STARTOTHER, /* u 75x */
271 STARTOTHER, /* v 76x */
272 STARTOTHER, /* w 77x */
274 STARTOTHER, /* x 78x */
275 STARTOTHER, /* y 79x */
276 STARTOTHER, /* z 7Ax */
277 STARTOTHER, /* { 7Dx */
278 STARTOTHER, /* | 7Cx */
279 STARTOTHER, /* } 7Dx */
280 STARTOTHER, /* ~ 7Ex */
282 STARTOTHER /* RUBOUT 7Fx */
286 ** Given a character code, its status as part of an decimal escape sequence
287 ** can be derived from this table. Digits 0-9 allowed.
290 static bool isDigit[CHARSIZE] =
292 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
293 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
294 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
295 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
296 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
297 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
298 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
299 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
300 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
304 * Given a character code, its status as part of an octal escape sequence
305 * can be derived from this table. Digits 0-7 allowed.
308 static bool isOigit[CHARSIZE] =
310 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
311 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
312 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
313 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
314 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
315 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
316 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
317 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
321 * Given a character code, its status as part of a hex escape sequence
322 * can be derived from this table. Digits, a-f, A-F allowed.
325 static bool isXigit[CHARSIZE] =
327 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
328 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
329 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
331 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
332 FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
333 TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
335 FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE,
336 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
337 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
339 FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE,
340 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
341 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
346 * Given a character code, its status as part of a C string
347 * can be derived from this table. Everything but quotes and newline
351 static bool isStrChar[CHARSIZE] =
353 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
354 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
355 TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
356 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
357 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
358 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE,
359 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
360 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
364 * Given a character code, its status as part of a C Character
365 * can be derived from this table. Everything but quotes and newline
369 static bool isCharChar[CHARSIZE] =
371 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
372 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
373 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
374 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
375 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
376 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE,
377 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
378 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
382 ** Given a character code, its status as part of a string or character
383 ** simple escape sequence ('slash'', 'slash"', 'slash?', 'slashslash',
384 ** 'slasha', 'slashb', 'slashf', 'slashn', 'slasht', and 'slashv')
385 ** can be derived from this table. ''', '"', '?', 'slash', 'a',
386 ** 'b', 'f', 'n', 't', and 'v' are allowed.
389 static bool isSimpleEscape[CHARSIZE] =
391 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
392 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
393 FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
394 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE,
395 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
396 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
397 FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,
398 FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
401 static bool reportEOL;
402 static bool reportComments;
403 static lsymbol firstReserved;
405 static char tokenBuffer[MAXCHAR];
407 static const charClassData charClassDef[] =
409 /* Control characters */
411 { SINGLECHAR, FALSE }, /* 0 NULL */
412 { SINGLECHAR, FALSE }, /* 1 CTRL-A */
413 { SINGLECHAR, FALSE }, /* 2 CTRL-B */
414 { SINGLECHAR, FALSE }, /* 3 CTRL-C */
415 { SINGLECHAR, FALSE }, /* 4 CTRL-D */
416 { SINGLECHAR, FALSE }, /* 5 CTRL-E */
417 { SINGLECHAR, FALSE }, /* 6 CTRL-F */
418 { SINGLECHAR, FALSE }, /* 7 CTRL-G */
419 { SINGLECHAR, FALSE }, /* 8 CTRL-H */
421 /* defined formatting characters */
422 { WHITECHAR, FALSE }, /* 9 CTRL-I TAB */
423 { CHC_NULL, TRUE }, /* 10 CTRL-J EOL */
425 /* more control characters */
426 { SINGLECHAR, FALSE }, /* 11 CTRL-K */
427 { WHITECHAR, FALSE }, /* 12 CTRL-L */
428 { SINGLECHAR, FALSE }, /* 13 CTRL-M */
429 { SINGLECHAR, FALSE }, /* 14 CTRL-N */
430 { SINGLECHAR, FALSE }, /* 15 CTRL-O */
431 { SINGLECHAR, FALSE }, /* 16 CTRL-P */
432 { SINGLECHAR, FALSE }, /* 17 CTRL-Q */
433 { SINGLECHAR, FALSE }, /* 18 CTRL-R */
434 { SINGLECHAR, FALSE }, /* 19 CTRL-S */
435 { SINGLECHAR, FALSE }, /* 20 CTRL-T */
436 { SINGLECHAR, FALSE }, /* 21 CTRL-U */
437 { SINGLECHAR, FALSE }, /* 22 CTRL-V */
438 { SINGLECHAR, FALSE }, /* 23 CTRL-W */
439 { SINGLECHAR, FALSE }, /* 24 CTRL-X */
440 { SINGLECHAR, FALSE }, /* 25 CTRL-Y */
441 { SINGLECHAR, FALSE }, /* 26 CTRL-Z */
442 { SINGLECHAR, FALSE }, /* 27 CTRL-[ ESC */
443 { SINGLECHAR, FALSE }, /* 28 CTRL-slash FS */
444 { SINGLECHAR, FALSE }, /* 29 CTRL-] GS */
445 { SINGLECHAR, FALSE }, /* 30 CTRL-^ RS */
446 { SINGLECHAR, FALSE }, /* 31 CTRL-_ US */
448 /* Special printing characters */
449 { WHITECHAR, FALSE }, /* 32 space */
450 { SINGLECHAR, FALSE }, /* 33 ! */
451 { SINGLECHAR, FALSE }, /* 34 " */
452 { SINGLECHAR, FALSE }, /* 35 # */
453 { SINGLECHAR, FALSE }, /* 36 $ */
454 { SINGLECHAR, FALSE }, /* 37 % */
455 { SINGLECHAR, FALSE }, /* 38 & */
456 { SINGLECHAR, FALSE }, /* 39 ' */
458 /* Reserved characters */
459 { PERMCHAR, FALSE }, /* 40 ( */
460 { PERMCHAR, FALSE }, /* 41 ) */
461 { PERMCHAR, FALSE }, /* 42 * */
462 { OPCHAR, FALSE }, /* 43 + */
463 { PERMCHAR, FALSE }, /* 44 , */
464 { OPCHAR, FALSE }, /* 45 - */
465 { OPCHAR, FALSE }, /* 46 . */
466 { OPCHAR, FALSE }, /* 47 / */
469 { IDCHAR, FALSE }, /* 48 0 */
470 { IDCHAR, FALSE }, /* 49 1 */
471 { IDCHAR, FALSE }, /* 50 2 */
472 { IDCHAR, FALSE }, /* 51 3 */
473 { IDCHAR, FALSE }, /* 52 4 */
474 { IDCHAR, FALSE }, /* 53 5 */
475 { IDCHAR, FALSE }, /* 54 6 */
476 { IDCHAR, FALSE }, /* 55 7 */
477 { IDCHAR, FALSE }, /* 56 8 */
478 { IDCHAR, FALSE }, /* 57 9 */
480 /* More reserved and special printing characters */
481 { PERMCHAR, FALSE }, /* 58 : */
482 { PERMCHAR, FALSE }, /* 59; */
483 { OPCHAR, FALSE }, /* 60 < */
484 { OPCHAR, FALSE }, /* 61 = */
485 { OPCHAR, FALSE }, /* 62 > */
486 { SINGLECHAR, FALSE }, /* 63 ? */
487 { SINGLECHAR, FALSE }, /* 64 @ */
489 /* Uppercase Alphabetics */
490 { IDCHAR, FALSE }, /* 65 A */
491 { IDCHAR, FALSE }, /* 66 B */
492 { IDCHAR, FALSE }, /* 67 C */
493 { IDCHAR, FALSE }, /* 68 D */
494 { IDCHAR, FALSE }, /* 69 E */
495 { IDCHAR, FALSE }, /* 70 F */
496 { IDCHAR, FALSE }, /* 71 G */
497 { IDCHAR, FALSE }, /* 72 H */
498 { IDCHAR, FALSE }, /* 73 I */
499 { IDCHAR, FALSE }, /* 74 J */
500 { IDCHAR, FALSE }, /* 75 K */
501 { IDCHAR, FALSE }, /* 76 L */
502 { IDCHAR, FALSE }, /* 77 M */
503 { IDCHAR, FALSE }, /* 78 N */
504 { IDCHAR, FALSE }, /* 79 O */
505 { IDCHAR, FALSE }, /* 80 P */
506 { IDCHAR, FALSE }, /* 81 Q */
507 { IDCHAR, FALSE }, /* 82 R */
508 { IDCHAR, FALSE }, /* 83 S */
509 { IDCHAR, FALSE }, /* 84 T */
510 { IDCHAR, FALSE }, /* 85 U */
511 { IDCHAR, FALSE }, /* 86 V */
512 { IDCHAR, FALSE }, /* 87 W */
513 { IDCHAR, FALSE }, /* 88 X */
514 { IDCHAR, FALSE }, /* 89 Y */
515 { IDCHAR, FALSE }, /* 90 Z */
517 /* Still more reserved and special printing characters */
518 { PERMCHAR, FALSE }, /* 91 [ */
519 { CHC_EXTENSION, FALSE }, /* 92 slash */
520 { PERMCHAR, FALSE }, /* 93 ] */
521 { SINGLECHAR, FALSE }, /* 94 ^ */
522 { IDCHAR, FALSE }, /* 95 _ */
523 { SINGLECHAR, FALSE }, /* 96 ` */
525 /* Lowercase alphabetics */
526 { IDCHAR, FALSE }, /* 97 a */
527 { IDCHAR, FALSE }, /* 98 b */
528 { IDCHAR, FALSE }, /* 99 c */
529 { IDCHAR, FALSE }, /* 100 d */
530 { IDCHAR, FALSE }, /* 101 e */
531 { IDCHAR, FALSE }, /* 102 f */
532 { IDCHAR, FALSE }, /* 103 g */
533 { IDCHAR, FALSE }, /* 104 h */
534 { IDCHAR, FALSE }, /* 105 i */
535 { IDCHAR, FALSE }, /* 106 j */
536 { IDCHAR, FALSE }, /* 107 k */
537 { IDCHAR, FALSE }, /* 108 l */
538 { IDCHAR, FALSE }, /* 109 m */
539 { IDCHAR, FALSE }, /* 110 n */
540 { IDCHAR, FALSE }, /* 111 o */
541 { IDCHAR, FALSE }, /* 112 p */
542 { IDCHAR, FALSE }, /* 113 q */
543 { IDCHAR, FALSE }, /* 114 r */
544 { IDCHAR, FALSE }, /* 115 s */
545 { IDCHAR, FALSE }, /* 116 t */
546 { IDCHAR, FALSE }, /* 117 u */
547 { IDCHAR, FALSE }, /* 118 v */
548 { IDCHAR, FALSE }, /* 119 w */
549 { IDCHAR, FALSE }, /* 120 x */
550 { IDCHAR, FALSE }, /* 121 y */
551 { IDCHAR, FALSE }, /* 122 z */
553 { SINGLECHAR, FALSE }, /* 123 { */
554 { SINGLECHAR, FALSE }, /* 124 | */
555 { SINGLECHAR, FALSE }, /* 125 } */
556 { SINGLECHAR, FALSE }, /* 126 ~ */
557 { SINGLECHAR, FALSE }, /* 127 DEL */
559 /* MCS - unused in English */
560 { SINGLECHAR, FALSE }, /* 128 */
561 { SINGLECHAR, FALSE }, /* 129 */
562 { SINGLECHAR, FALSE }, /* 130 */
563 { SINGLECHAR, FALSE }, /* 131 */
564 { SINGLECHAR, FALSE }, /* 132 */
565 { SINGLECHAR, FALSE }, /* 133 */
566 { SINGLECHAR, FALSE }, /* 134 */
567 { SINGLECHAR, FALSE }, /* 135 */
568 { SINGLECHAR, FALSE }, /* 136 */
569 { SINGLECHAR, FALSE }, /* 137 */
570 { SINGLECHAR, FALSE }, /* 138 */
571 { SINGLECHAR, FALSE }, /* 139 */
572 { SINGLECHAR, FALSE }, /* 140 */
573 { SINGLECHAR, FALSE }, /* 141 */
574 { SINGLECHAR, FALSE }, /* 142 */
575 { SINGLECHAR, FALSE }, /* 143 */
576 { SINGLECHAR, FALSE }, /* 144 */
577 { SINGLECHAR, FALSE }, /* 145 */
578 { SINGLECHAR, FALSE }, /* 146 */
579 { SINGLECHAR, FALSE }, /* 147 */
580 { SINGLECHAR, FALSE }, /* 148 */
581 { SINGLECHAR, FALSE }, /* 149 */
582 { SINGLECHAR, FALSE }, /* 150 */
583 { SINGLECHAR, FALSE }, /* 151 */
584 { SINGLECHAR, FALSE }, /* 152 */
585 { SINGLECHAR, FALSE }, /* 153 */
586 { SINGLECHAR, FALSE }, /* 154 */
587 { SINGLECHAR, FALSE }, /* 155 */
588 { SINGLECHAR, FALSE }, /* 156 */
589 { SINGLECHAR, FALSE }, /* 157 */
590 { SINGLECHAR, FALSE }, /* 158 */
591 { SINGLECHAR, FALSE }, /* 159 */
592 { SINGLECHAR, FALSE }, /* 160 */
593 { SINGLECHAR, FALSE }, /* 161 */
594 { SINGLECHAR, FALSE }, /* 162 */
595 { SINGLECHAR, FALSE }, /* 163 */
596 { SINGLECHAR, FALSE }, /* 164 */
597 { SINGLECHAR, FALSE }, /* 165 */
598 { SINGLECHAR, FALSE }, /* 166 */
599 { SINGLECHAR, FALSE }, /* 167 */
600 { SINGLECHAR, FALSE }, /* 168 */
601 { SINGLECHAR, FALSE }, /* 169 */
602 { SINGLECHAR, FALSE }, /* 170 */
603 { SINGLECHAR, FALSE }, /* 171 */
604 { SINGLECHAR, FALSE }, /* 172 */
605 { SINGLECHAR, FALSE }, /* 173 */
606 { SINGLECHAR, FALSE }, /* 174 */
607 { SINGLECHAR, FALSE }, /* 175 */
608 { SINGLECHAR, FALSE }, /* 176 */
609 { SINGLECHAR, FALSE }, /* 177 */
610 { SINGLECHAR, FALSE }, /* 178 */
611 { SINGLECHAR, FALSE }, /* 179 */
612 { SINGLECHAR, FALSE }, /* 180 */
613 { SINGLECHAR, FALSE }, /* 181 */
614 { SINGLECHAR, FALSE }, /* 182 */
615 { SINGLECHAR, FALSE }, /* 183 */
616 { SINGLECHAR, FALSE }, /* 184 */
617 { SINGLECHAR, FALSE }, /* 185 */
618 { SINGLECHAR, FALSE }, /* 186 */
619 { SINGLECHAR, FALSE }, /* 187 */
620 { SINGLECHAR, FALSE }, /* 188 */
621 { SINGLECHAR, FALSE }, /* 189 */
622 { SINGLECHAR, FALSE }, /* 190 */
623 { SINGLECHAR, FALSE }, /* 191 */
624 { SINGLECHAR, FALSE }, /* 192 */
625 { SINGLECHAR, FALSE }, /* 193 */
626 { SINGLECHAR, FALSE }, /* 194 */
627 { SINGLECHAR, FALSE }, /* 195 */
628 { SINGLECHAR, FALSE }, /* 196 */
629 { SINGLECHAR, FALSE }, /* 197 */
630 { SINGLECHAR, FALSE }, /* 198 */
631 { SINGLECHAR, FALSE }, /* 199 */
632 { SINGLECHAR, FALSE }, /* 200 */
633 { SINGLECHAR, FALSE }, /* 201 */
634 { SINGLECHAR, FALSE }, /* 202 */
635 { SINGLECHAR, FALSE }, /* 203 */
636 { SINGLECHAR, FALSE }, /* 204 */
637 { SINGLECHAR, FALSE }, /* 205 */
638 { SINGLECHAR, FALSE }, /* 206 */
639 { SINGLECHAR, FALSE }, /* 207 */
640 { SINGLECHAR, FALSE }, /* 208 */
641 { SINGLECHAR, FALSE }, /* 209 */
642 { SINGLECHAR, FALSE }, /* 210 */
643 { SINGLECHAR, FALSE }, /* 211 */
644 { SINGLECHAR, FALSE }, /* 212 */
645 { SINGLECHAR, FALSE }, /* 213 */
646 { SINGLECHAR, FALSE }, /* 214 */
647 { SINGLECHAR, FALSE }, /* 215 */
648 { SINGLECHAR, FALSE }, /* 216 */
649 { SINGLECHAR, FALSE }, /* 217 */
650 { SINGLECHAR, FALSE }, /* 218 */
651 { SINGLECHAR, FALSE }, /* 219 */
652 { SINGLECHAR, FALSE }, /* 220 */
653 { SINGLECHAR, FALSE }, /* 221 */
654 { SINGLECHAR, FALSE }, /* 222 */
655 { SINGLECHAR, FALSE }, /* 223 */
656 { SINGLECHAR, FALSE }, /* 224 */
657 { SINGLECHAR, FALSE }, /* 225 */
658 { SINGLECHAR, FALSE }, /* 226 */
659 { SINGLECHAR, FALSE }, /* 227 */
660 { SINGLECHAR, FALSE }, /* 228 */
661 { SINGLECHAR, FALSE }, /* 229 */
662 { SINGLECHAR, FALSE }, /* 230 */
663 { SINGLECHAR, FALSE }, /* 231 */
664 { SINGLECHAR, FALSE }, /* 232 */
665 { SINGLECHAR, FALSE }, /* 233 */
666 { SINGLECHAR, FALSE }, /* 234 */
667 { SINGLECHAR, FALSE }, /* 235 */
668 { SINGLECHAR, FALSE }, /* 236 */
669 { SINGLECHAR, FALSE }, /* 237 */
670 { SINGLECHAR, FALSE }, /* 238 */
671 { SINGLECHAR, FALSE }, /* 239 */
672 { SINGLECHAR, FALSE }, /* 240 */
673 { SINGLECHAR, FALSE }, /* 241 */
674 { SINGLECHAR, FALSE }, /* 242 */
675 { SINGLECHAR, FALSE }, /* 243 */
676 { SINGLECHAR, FALSE }, /* 244 */
677 { SINGLECHAR, FALSE }, /* 245 */
678 { SINGLECHAR, FALSE }, /* 246 */
679 { SINGLECHAR, FALSE }, /* 247 */
680 { SINGLECHAR, FALSE }, /* 248 */
681 { SINGLECHAR, FALSE }, /* 249 */
682 { SINGLECHAR, FALSE }, /* 250 */
683 { SINGLECHAR, FALSE }, /* 251 */
684 { SINGLECHAR, FALSE }, /* 252 */
685 { SINGLECHAR, FALSE }, /* 253 */
686 { SINGLECHAR, FALSE }, /* 254 */
687 { SINGLECHAR, FALSE } /* 255 */
700 if (currentChar == '/')
706 /*@switchbreak@*/ break;
718 if (isSimpleEscape[(int)currentChar])
720 LCLMOVECHAR (); /* discard simple escape character. */
722 else if (currentChar == 'x')
724 LCLMOVECHAR (); /* discard 'x'. */
725 if (!isXigit[(int)currentChar])
727 LocalUserError ("at least one hex digit must follow '\\x'");
729 while (isXigit[(int)currentChar])
731 LCLMOVECHAR (); /* discard hex digits. */
734 else if (isOigit[(int)currentChar])
736 LCLMOVECHAR (); /* discard first hex digit. */
737 if (isOigit[(int)currentChar])
739 LCLMOVECHAR (); /* discard second hex digit. */
741 if (isOigit[(int)currentChar])
743 LCLMOVECHAR (); /* discard third hex digit. */
748 LocalUserError ("invalid escape sequence in a C string or character");
755 if (currentChar == '\\' && LOOKAHEADCHAR () == 'L')
757 LCLMOVECHAR (); /* discard slash */
758 LCLMOVECHAR (); /* discard 'L'. */
761 if (currentChar == '\"')
763 LCLMOVECHAR (); /* discard opening quote. */
765 while (currentChar != '\"')
767 if (isStrChar[(int)currentChar])
769 LCLMOVECHAR (); /* discard string character. */
771 else if (currentChar == '\\')
773 LCLMOVECHAR (); /* discard slash */
776 else if (currentChar == '\n')
778 LocalUserError ("Unterminated C string");
782 LocalUserError ("Invalid character in C string");
785 LCLMOVECHAR (); /* discard closing quote */
790 LocalUserError ("C string must start with '\"'");
794 *bufPtr = '\0'; /* null terminate in buffer */
795 tokenSym = lsymbol_fromChars (&tokenBuffer[0]);
796 tokenCode = LLT_LCSTRING;
802 if (currentChar == '\\' && LOOKAHEADCHAR () == 'L')
804 LCLMOVECHAR (); /* discard slash */
805 LCLMOVECHAR (); /* discard 'L'. */
808 if (currentChar == '\'')
810 LCLMOVECHAR (); /* discard opening quote */
812 while (currentChar != '\'')
814 if (isCharChar[(int)currentChar])
816 LCLMOVECHAR (); /* discard string character. */
818 else if (currentChar == '\\')
820 LCLMOVECHAR (); /* discard slash */
823 else if (currentChar == '\n')
825 LocalUserError ("Unterminated C character constant");
829 LocalUserError ("Invalid character in C character");
832 LCLMOVECHAR (); /* discard closing quote */
837 LocalUserError ("Invalid C character");
841 *bufPtr = '\0'; /* null terminate in buffer */
842 tokenSym = lsymbol_fromChars (&tokenBuffer[0]);
843 tokenCode = LLT_CCHAR;
849 tokenCode = LLT_CINTEGER;
855 tokenCode = LLT_CFLOAT;
856 if (!isDigit[(int)currentChar])
858 LocalUserError ("at least one digit must follow '.'");
860 while (isDigit[(int)currentChar])
864 if (currentChar == 'e' || currentChar == 'E')
866 LCLMOVECHAR (); /* discard 'e' or 'E'. */
867 if (currentChar == '+' || currentChar == '-')
871 if (!isDigit[(int)currentChar])
873 LocalUserError ("digit must follow exponent");
875 while (isDigit[(int)currentChar])
880 if (currentChar == 'f' || currentChar == 'l' ||
881 currentChar == 'F' || currentChar == 'L')
888 LCLMOVECHAR (); /* discard '0'. */
894 if (!isXigit[(int)currentChar])
896 LocalUserError ("hex digit must follow 'x' or 'X'");
898 while (isXigit[(int)currentChar])
902 /*@switchbreak@*/ break;
906 ** Could either be an octal number or a floating point
907 ** number. Scan decimal digits so don't run into
908 ** problems if turns out problems if it is an fp
909 ** number. Let converter/parser catch bad octal
910 ** numbers. e.g. 018 not caught by scanner.
913 while (isDigit[(int)currentChar])
920 LCLMOVECHAR (); /* discard '.'. */
921 tokenCode = LLT_CFLOAT;
922 while (isDigit[(int)currentChar])
926 if (currentChar == 'e' || currentChar == 'E')
928 LCLMOVECHAR (); /* discard 'e' or 'E'. */
929 if (currentChar == '+' || currentChar == '-')
933 if (!isDigit[(int)currentChar])
935 LocalUserError ("digit must follow exponent");
937 while (isDigit[(int)currentChar])
942 if (currentChar == 'f' ||
943 currentChar == 'l' ||
944 currentChar == 'F' ||
949 /*@switchbreak@*/ break;
954 tokenCode = LLT_CFLOAT;
955 if (currentChar == '+' || currentChar == '-')
959 if (!isDigit[(int)currentChar])
961 LocalUserError ("digit must follow exponent");
963 while (isDigit[(int)currentChar])
967 if (currentChar == 'f' ||
968 currentChar == 'l' ||
969 currentChar == 'F' ||
974 /*@switchbreak@*/ break;
977 /* Scan integer suffix. */
983 if (currentChar == 'l' || currentChar == 'L')
987 /*@switchbreak@*/ break;
991 if (currentChar == 'u' || currentChar == 'U')
996 /*@switchbreak@*/ break;
998 /*@switchbreak@*/ break;
1002 /* Scan integer suffix. */
1003 switch (currentChar)
1008 if (currentChar == 'l' || currentChar == 'L')
1012 /*@switchbreak@*/ break;
1016 if (currentChar == 'u' || currentChar == 'U')
1020 /*@switchbreak@*/ break;
1025 if (isDigit[(int)currentChar])
1027 while (isDigit[(int)currentChar])
1031 switch (currentChar)
1034 LCLMOVECHAR (); /* discard '.'. */
1035 tokenCode = LLT_CFLOAT;
1036 while (isDigit[(int)currentChar])
1040 if (currentChar == 'e' || currentChar == 'E')
1043 if (currentChar == '+' || currentChar == '-')
1047 if (!isDigit[(int)currentChar])
1049 LocalUserError ("digit must follow exponent");
1051 while (isDigit[(int)currentChar])
1056 if (currentChar == 'f' ||
1057 currentChar == 'l' ||
1058 currentChar == 'F' ||
1063 /*@switchbreak@*/ break;
1068 tokenCode = LLT_CFLOAT;
1069 if (currentChar == '+' || currentChar == '-')
1073 if (!isDigit[(int)currentChar])
1075 LocalUserError ("digit must follow exponent");
1077 while (isDigit[(int)currentChar])
1081 if (currentChar == 'f' ||
1082 currentChar == 'l' ||
1083 currentChar == 'F' ||
1089 /*@switchbreak@*/ break;
1091 switch (currentChar)
1096 if (currentChar == 'l' || currentChar == 'L')
1100 /*@switchbreak@*/ break;
1104 if (currentChar == 'u' || currentChar == 'U')
1108 /*@switchbreak@*/ break;
1110 /*@switchbreak@*/ break;
1115 LocalUserError ("invalid C number");
1123 tokenSym = lsymbol_fromChars (&tokenBuffer[0]);
1126 static void ScanOther (void)
1128 switch (LCLScanCharClass (currentChar))
1131 tokenSym = lsymbol_fromChars ("E O L");
1132 tokenCode = LLT_EOL;
1139 while (LCLScanCharClass (currentChar) == IDCHAR)
1140 { /* identifier: find end */
1144 *bufPtr = '\0'; /* null terminate in buffer */
1145 tokenSym = lsymbol_fromChars (&tokenBuffer[0]);
1146 tokenCode = simpleId;
1149 /* one-character tokens */
1155 tokenSym = lsymbol_fromChars (&tokenBuffer[0]);
1156 tokenCode = simpleOp;
1159 /* operator symbols */
1163 if (currentChar == '.' && LOOKAHEADCHAR () == '.' &&
1164 LOOKAHEADTWICECHAR () == '.')
1170 tokenSym = lsymbol_fromChars ("...");
1171 tokenCode = LLT_TELIPSIS;
1175 if (currentChar == '/' && LOOKAHEADCHAR () == '\\')
1182 while (LCLScanCharClass (currentChar) == OPCHAR)
1189 tokenSym = lsymbol_fromChars (&tokenBuffer[0]);
1190 tokenCode = simpleOp;
1196 /*@-loopswitchbreak@*/
1197 /*@-switchswitchbreak@*/
1198 switch (currentChar)
1201 LCLMOVECHAR (); /* tabs only count as one character */
1207 colNumber--; /* does not change column */
1214 /*@=switchswitchbreak@*/
1217 tokenSym = lsymbol_fromChars (&tokenBuffer[0]);
1218 tokenCode = LLT_WHITESPACE;
1226 /*@-switchswitchbreak@*/
1227 switch (currentChar)
1229 /* open and close */
1232 while (LCLScanCharClass (currentChar) == IDCHAR)
1237 tokenSym = lsymbol_fromChars (&tokenBuffer[0]);
1238 tokenCode = openSym;
1243 while (LCLScanCharClass (currentChar) == IDCHAR)
1248 tokenSym = lsymbol_fromChars (&tokenBuffer[0]);
1249 tokenCode = closeSym;
1255 while (LCLScanCharClass (currentChar) == IDCHAR)
1260 tokenSym = lsymbol_fromChars (&tokenBuffer[0]);
1267 while (LCLScanCharClass (currentChar) == IDCHAR)
1272 tokenSym = lsymbol_fromChars (&tokenBuffer[0]);
1273 tokenCode = simpleId;
1277 if (LCLScanCharClass (currentChar) == IDCHAR)
1283 while (LCLScanCharClass (currentChar) == IDCHAR);
1285 tokenSym = lsymbol_fromChars (&tokenBuffer[0]);
1286 tokenCode = simpleOp;
1291 ** Meets none of the above. Take the extension
1292 ** character and the character following and treat
1293 ** together as a SINGLECHAR. SINGLECHARs tranlate into
1299 tokenSym = lsymbol_fromChars (&tokenBuffer[0]);
1300 tokenCode = simpleOp;
1303 /*@=switchswitchbreak@*/
1308 LocalUserError ("unexpected character in input");
1311 /*@=loopswitchbreak@*/
1315 nextCanBeCharLiteral (ltokenCode c)
1319 /* A ' following these tokens starts a C character literal. */
1336 case LLT_CONSTRAINT:
1343 /* A ' following these tokens means post */
1357 /* Neither a C character literal nor post should follow these tokens */
1363 case LLT_VERTICALBAR:
1387 case LLT_TAGGEDUNION:
1397 case LLT_TYPEDEF_NAME:
1409 /* These tokens should have been ignored */
1412 case LLT_WHITESPACE:
1415 llcontbuglit ("scanline: nextCanBeChar");
1424 LCLScanLine (char *line)
1427 lsymbol CCommentSym = lsymbol_fromChars ("/*");
1428 size_t linelength = strlen (line);
1429 static bool inSpecComment = FALSE;
1431 line[(int)linelength] = '\n';
1434 currentChar = *currentLine++;
1435 context_processedSpecLine ();
1447 newToken = ltoken_createRaw (simpleId, lsymbol_fromChars (&tokenBuffer[0]));
1448 LCLScanFreshToken (newToken);
1454 if (currentChar == '*' &&
1455 LOOKAHEADCHAR () == '/')
1459 inSpecComment = FALSE;
1466 if (inSpecComment && currentChar == '*' && LOOKAHEADCHAR () == '/')
1470 inSpecComment = FALSE;
1473 bufPtr = &tokenBuffer[0];
1474 startCol = colNumber;
1477 /*@-loopswitchbreak@*/
1478 switch (startClass[(int)currentChar])
1485 if (isDigit[(int) LOOKAHEADCHAR ()])
1500 if (nextCanBeCharLiteral (prevTokenCode))
1511 if (LOOKAHEADCHAR () == 'L' && LOOKAHEADTWICECHAR () == '\"')
1515 else if (LOOKAHEADCHAR () == 'L' && LOOKAHEADTWICECHAR () == '\'')
1526 if (LOOKAHEADCHAR () == '*')
1531 if (currentChar == '@')
1533 char *s = mstring_createEmpty ();
1537 while (currentChar != '\0' && currentChar != ' '
1538 && currentChar != '*' && currentChar != '\t' &&
1539 currentChar != '\n')
1541 s = mstring_append (s, currentChar);
1545 if (mstring_equal (s, "alt"))
1547 tokenCode = LLT_VERTICALBAR;
1548 tokenSym = lsymbol_fromChars ("|");
1549 inSpecComment = TRUE;
1554 tokenCode = commentSym;
1555 tokenSym = CCommentSym;
1564 tokenCode = commentSym;
1565 tokenSym = CCommentSym;
1579 llcontbuglit ("LCLScanLine: bad case");
1583 /*@=loopswitchbreak@*/
1586 ** Above code only "guessed" at token type. Insert it into the
1587 ** TokenTable. If the token already exists, it is returned as
1588 ** previously defined. If it does not exist, it is inserted as the
1589 ** token code computed above.
1592 newToken = LCLInsertToken (tokenCode, tokenSym, lsymbol_undefined, FALSE);
1595 if (LCLIsSyn (ltoken_getText (newToken)))
1598 ** Token is a synonym. Get the actual token and set the raw
1599 ** text to the synonym name.
1602 newToken = ltoken_copy (LCLGetTokenForSyn (ltoken_getText (newToken)));
1604 ltoken_setRawText (newToken, tokenSym);
1608 newToken = ltoken_copy (newToken);
1611 ltoken_setCol (newToken, startCol);
1612 ltoken_setLine (newToken, inputStream_thisLineNumber (LCLScanSource ()));
1613 ltoken_setFileName (newToken, inputStream_fileName (LCLScanSource ()));
1615 if (ltoken_getCode (newToken) == commentSym)
1617 if (tokenSym == CCommentSym)
1618 { /* C-style comment */
1619 ltoken_free (commentTok);
1620 commentTok = ltoken_copy (newToken);
1622 if (!inComment && reportComments)
1625 ltoken_setRawText (newToken,
1626 lsymbol_fromChars (&tokenBuffer[0]));
1627 LCLScanFreshToken (newToken);
1631 ltoken_free (newToken);
1635 { /* LSL-style comment */
1636 bufPtr = &tokenBuffer[0];
1637 while (!LCLIsEndComment (currentChar))
1641 if (LCLScanCharClass (currentChar) != CHC_NULL)
1643 /* Not EOL character. Toss it out. */
1650 ltoken_setRawText (newToken,
1651 lsymbol_fromChars (&tokenBuffer[0]));
1652 LCLScanFreshToken (newToken);
1656 ltoken_free (newToken);
1660 else if (ltoken_getCode (newToken) == LLT_EOL)
1664 LCLScanFreshToken (newToken);
1668 ltoken_free (newToken);
1671 line[(int) linelength] = '\0';
1674 else if (ltoken_getCode (newToken) != LLT_WHITESPACE)
1676 prevTokenCode = ltoken_getCode (newToken);
1677 LCLScanFreshToken (newToken);
1681 ltoken_free (newToken);
1686 /*@exposed@*/ ltoken
1687 LCLScanEofToken (void)
1689 ltoken t = LCLInsertToken (LEOFTOKEN, lsymbol_fromChars ("E O F"), 0, TRUE);
1693 lclerror (commentTok, cstring_makeLiteral ("Unterminated comment"));
1696 ltoken_setCol (t, colNumber);
1697 ltoken_setLine (t, inputStream_thisLineNumber (LCLScanSource ()));
1698 ltoken_setFileName (t, inputStream_fileName (LCLScanSource ()));
1704 LCLReportEolTokens (bool setting)
1706 reportEOL = setting;
1710 LocalUserError (char *msg)
1712 inputStream s = LCLScanSource ();
1713 llfatalerror (message ("%s:%d,%d: %s",
1714 inputStream_fileName (s),
1715 inputStream_thisLineNumber (s),
1717 cstring_fromChars (msg)));
1721 LCLScanLineInit (void)
1727 reportComments = FALSE;
1729 for (i = 0; i <= LASTCHAR; i++)
1731 LCLcharClass[i] = charClassDef[i];
1737 ** Make sure first postion is never used because use the 0th index to
1741 firstReserved = lsymbol_fromChars (FIRSTRESERVEDNAME);
1744 /* Predefined LSL Tokens */
1746 ltoken_forall = LCLReserveToken (quantifierSym, "\\forall");
1748 ltoken_exists = LCLReserveToken (quantifierSym, "\\exists");
1749 ltoken_implies = LCLReserveToken (logicalOp, "\\implies");
1750 ltoken_eqsep = LCLReserveToken (eqSepSym, "\\eqsep");
1751 ltoken_select = LCLReserveToken (selectSym, "\\select");
1752 ltoken_open = LCLReserveToken (openSym, "\\open");
1753 ltoken_sep = LCLReserveToken (sepSym, "\\,");
1754 ltoken_close = LCLReserveToken (closeSym, "\\close");
1755 ltoken_id = LCLReserveToken (simpleId, "\\:");
1756 ltoken_arrow = LCLReserveToken (mapSym, "\\arrow");
1757 ltoken_marker = LCLReserveToken (markerSym, "\\marker");
1758 ltoken_pre = LCLReserveToken (preSym, "\\pre");
1759 ltoken_post = LCLReserveToken (postSym, "\\post");
1760 ltoken_comment = LCLReserveToken (commentSym, "\\comment");
1761 ltoken_any = LCLReserveToken (anySym, "\\any");
1763 ltoken_result = LCLReserveToken (LLT_RESULT, "result");
1764 ltoken_typename = LCLReserveToken (LLT_TYPEDEF_NAME, "TYPEDEF_NAME");
1765 ltoken_setIdType (ltoken_typename, SID_TYPE);
1768 ** Not context_getBoolName () --- "bool" is built in to LCL.
1769 ** This is bogus, but necessary for a lot of old lcl files.
1772 ltoken_bool = LCLReserveToken (LLT_TYPEDEF_NAME, "bool");
1774 ltoken_lbracked = LCLReserveToken (LLT_LBRACKET, "[");
1775 ltoken_rbracket = LCLReserveToken (LLT_RBRACKET, "]");
1777 (void) LCLReserveToken (LLT_COLON, ":");
1778 (void) LCLReserveToken (LLT_COMMA, ",");
1780 (void) LCLReserveToken (LLT_EQUALS, "=");
1781 (void) LCLReserveToken (LLT_LBRACE, "{");
1782 (void) LCLReserveToken (LLT_LPAR, "(");
1783 (void) LCLReserveToken (LLT_RBRACE, "}");
1784 (void) LCLReserveToken (LLT_RPAR, ")");
1785 (void) LCLReserveToken (LLT_SEMI, ";");
1786 (void) LCLReserveToken (LLT_VERTICALBAR, "|");
1788 (void) LCLReserveToken (LLT_MULOP, "*");
1790 (void) LCLReserveToken (LLT_WHITESPACE, " ");
1791 (void) LCLReserveToken (LLT_WHITESPACE, "\t");
1792 (void) LCLReserveToken (LLT_WHITESPACE, "\f");
1793 (void) LCLReserveToken (LLT_WHITESPACE, "\n");
1795 (void) LCLReserveToken (LEOFTOKEN, "E O F");
1796 (void) LCLReserveToken (LLT_EOL, "E O L");
1799 ltoken_and = LCLReserveToken (logicalOp, "\\and");
1800 ltoken_or = LCLReserveToken (logicalOp, "\\or");
1802 ltoken_equals = LCLReserveToken (equationSym, "\\equals");
1804 ltoken_eq = LCLReserveToken (eqOp, "\\eq");
1805 ltoken_neq = LCLReserveToken (eqOp, "\\neq");
1807 ltoken_not = LCLReserveToken (simpleOp, "\\not");
1808 ltoken_true = LCLReserveToken (simpleId, "true");
1809 ltoken_false = LCLReserveToken (simpleId, "false");
1812 (void) LCLReserveToken (LLT_ALL, "all");
1813 (void) LCLReserveToken (LLT_ANYTHING, "anything");
1814 (void) LCLReserveToken (LLT_BE, "be");
1815 (void) LCLReserveToken (LLT_CONSTANT, "constant");
1816 (void) LCLReserveToken (LLT_CHECKS, "checks");
1817 (void) LCLReserveToken (LLT_ELSE, "else");
1818 (void) LCLReserveToken (LLT_ENSURES, "ensures");
1819 (void) LCLReserveToken (LLT_FOR, "for");
1820 (void) LCLReserveToken (LLT_IF, "if");
1821 (void) LCLReserveToken (LLT_IMMUTABLE, "immutable");
1822 (void) LCLReserveToken (LLT_OBJ, "obj");
1823 (void) LCLReserveToken (LLT_OUT, "out");
1824 (void) LCLReserveToken (LLT_ITER, "iter");
1825 (void) LCLReserveToken (LLT_YIELD, "yield");
1826 (void) LCLReserveToken (LLT_PARTIAL, "partial");
1827 (void) LCLReserveToken (LLT_ONLY, "only");
1828 (void) LCLReserveToken (LLT_UNDEF, "undef");
1829 (void) LCLReserveToken (LLT_KILLED, "killed");
1830 (void) LCLReserveToken (LLT_OWNED, "owned");
1831 (void) LCLReserveToken (LLT_DEPENDENT, "dependent");
1832 (void) LCLReserveToken (LLT_PARTIAL, "partial");
1833 (void) LCLReserveToken (LLT_RELDEF, "reldef");
1834 (void) LCLReserveToken (LLT_KEEP, "keep");
1835 (void) LCLReserveToken (LLT_KEPT, "kept");
1836 (void) LCLReserveToken (LLT_TEMP, "temp");
1837 (void) LCLReserveToken (LLT_SHARED, "shared");
1838 (void) LCLReserveToken (LLT_RELNULL, "relnull");
1839 (void) LCLReserveToken (LLT_RELDEF, "reldef");
1840 (void) LCLReserveToken (LLT_CHECKED, "checked");
1841 (void) LCLReserveToken (LLT_UNCHECKED, "unchecked");
1842 (void) LCLReserveToken (LLT_CHECKEDSTRICT, "checkedstrict");
1843 (void) LCLReserveToken (LLT_CHECKMOD, "checkmod");
1844 (void) LCLReserveToken (LLT_TRUENULL, "truenull");
1845 (void) LCLReserveToken (LLT_FALSENULL, "falsenull");
1846 (void) LCLReserveToken (LLT_LNULL, "null");
1847 (void) LCLReserveToken (LLT_LNOTNULL, "notnull");
1848 (void) LCLReserveToken (LLT_RETURNED, "returned");
1849 (void) LCLReserveToken (LLT_OBSERVER, "observer");
1850 (void) LCLReserveToken (LLT_EXPOSED, "exposed");
1851 (void) LCLReserveToken (LLT_REFCOUNTED, "refcounted");
1852 (void) LCLReserveToken (LLT_REFS, "refs");
1853 (void) LCLReserveToken (LLT_NEWREF, "newref");
1854 (void) LCLReserveToken (LLT_TEMPREF, "tempref");
1855 (void) LCLReserveToken (LLT_KILLREF, "killref");
1856 (void) LCLReserveToken (LLT_NULLTERMINATED, "nullterminated");
1857 (void) LCLReserveToken (LLT_EXITS, "exits");
1858 (void) LCLReserveToken (LLT_MAYEXIT, "mayexit");
1859 (void) LCLReserveToken (LLT_TRUEEXIT, "trueexit");
1860 (void) LCLReserveToken (LLT_FALSEEXIT, "falseexit");
1861 (void) LCLReserveToken (LLT_NEVEREXIT, "neverexit");
1862 (void) LCLReserveToken (LLT_SEF, "sef");
1863 (void) LCLReserveToken (LLT_UNUSED, "unused");
1864 (void) LCLReserveToken (LLT_UNIQUE, "unique");
1865 (void) LCLReserveToken (LLT_IMPORTS, "imports");
1866 (void) LCLReserveToken (LLT_CONSTRAINT, "constraint");
1867 (void) LCLReserveToken (LLT_LET, "let");
1868 (void) LCLReserveToken (LLT_MODIFIES, "modifies");
1869 (void) LCLReserveToken (LLT_CLAIMS, "claims");
1870 (void) LCLReserveToken (LLT_BODY, "body");
1871 (void) LCLReserveToken (LLT_MUTABLE, "mutable");
1872 (void) LCLReserveToken (LLT_FRESH, "fresh");
1873 (void) LCLReserveToken (LLT_NOTHING, "nothing");
1874 (void) LCLReserveToken (LLT_INTERNAL, "internalState");
1875 (void) LCLReserveToken (LLT_FILESYS, "fileSystem");
1876 (void) LCLReserveToken (LLT_PRIVATE, "private");
1877 (void) LCLReserveToken (LLT_SPEC, "spec");
1878 (void) LCLReserveToken (LLT_REQUIRES, "requires");
1879 (void) LCLReserveToken (LLT_SIZEOF, "sizeof");
1880 (void) LCLReserveToken (LLT_TAGGEDUNION, "taggedunion");
1881 (void) LCLReserveToken (LLT_THEN, "then");
1882 (void) LCLReserveToken (LLT_TYPE, "type");
1883 (void) LCLReserveToken (LLT_TYPEDEF, "typedef");
1884 (void) LCLReserveToken (LLT_UNCHANGED, "unchanged");
1885 (void) LCLReserveToken (LLT_USES, "uses");
1886 (void) LCLReserveToken (LLT_PRINTFLIKE, "printflike");
1887 (void) LCLReserveToken (LLT_SCANFLIKE, "scanflike");
1888 (void) LCLReserveToken (LLT_MESSAGELIKE, "messagelike");
1890 /* LCL C Keywords */
1891 (void) LCLReserveToken (LLT_CHAR, "char");
1892 (void) LCLReserveToken (LLT_CONST, "const");
1893 (void) LCLReserveToken (LLT_DOUBLE, "double");
1894 (void) LCLReserveToken (LLT_ENUM, "enum");
1896 /* comment out so we can add in lclinit.lci: synonym double float */
1897 /* LCLReserveToken (LLT_FLOAT, "float"); */
1898 /* But we need to make the scanner parse "float" not as a simpleId, but
1899 as a TYPEDEF_NAME. This is done later in abstract_init */
1901 (void) LCLReserveToken (LLT_INT, "int");
1902 (void) LCLReserveToken (LLT_LONG, "long");
1903 (void) LCLReserveToken (LLT_SHORT, "short");
1904 (void) LCLReserveToken (LLT_STRUCT, "struct");
1905 (void) LCLReserveToken (LLT_SIGNED, "signed");
1906 (void) LCLReserveToken (LLT_UNION, "union");
1907 (void) LCLReserveToken (LLT_UNKNOWN, "__unknown");
1908 (void) LCLReserveToken (LLT_UNSIGNED, "unsigned");
1909 (void) LCLReserveToken (LLT_VOID, "void");
1910 (void) LCLReserveToken (LLT_VOLATILE, "volatile");
1915 LCLScanLineReset (void)
1918 prevTokenCode = LLT_LPAR; /* Presume first ' starts literal */
1922 LCLScanLineCleanup (void)
1926 bool LCLIsEndComment (char c)
1928 return LCLcharClass[(int)(c)].endCommentChar;
1931 charCode LCLScanCharClass (char c)
1933 return LCLcharClass[(int)(c)].code;
1936 void LCLSetCharClass (char c, charCode cod)
1938 LCLcharClass[(int)(c)].code = (cod);
1941 void LCLSetEndCommentChar (char c, bool flag)
1943 LCLcharClass[(int)(c)].endCommentChar = flag;