]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ** Splint - annotation-assisted static program checker | |
3 | ** Copyright (C) 1994-2002 University of Virginia, | |
4 | ** Massachusetts Institute of Technology | |
5 | ** | |
6 | ** This program is free software; you can redistribute it and/or modify it | |
7 | ** under the terms of the GNU General Public License as published by the | |
8 | ** Free Software Foundation; either version 2 of the License, or (at your | |
9 | ** option) any later version. | |
10 | ** | |
11 | ** This program is distributed in the hope that it will be useful, but | |
12 | ** WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | ** General Public License for more details. | |
15 | ** | |
16 | ** The GNU General Public License is available from http://www.gnu.org/ or | |
17 | ** the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, | |
18 | ** MA 02111-1307, USA. | |
19 | ** | |
20 | ** For information on splint: info@splint.org | |
21 | ** To report a bug: splint-bug@splint.org | |
22 | ** For more information: http://www.splint.org | |
23 | */ | |
24 | /* | |
25 | ** scanline.c | |
26 | ** | |
27 | ** Scan one line of Larch SL input at a time. | |
28 | ** | |
29 | ** The input is source text, line at a time. The output is a sequence | |
30 | ** of tokens, reported by call-out LSLScanFreshToken. | |
31 | ** | |
32 | ** This organization allows implementation of line-at-a-time | |
33 | ** incremental scanning. The incremental mechanism is in the | |
34 | ** driving module scan.c, which can save and replay tokens for | |
35 | ** unchanged lines. This module works either way. | |
36 | ** | |
37 | ** The main loop of the scanner keys on the leading character. | |
38 | ** Actions within the loop collect the token starting with the | |
39 | ** and determine its kind. | |
40 | */ | |
41 | ||
42 | # include "splintMacros.nf" | |
43 | # include "llbasic.h" | |
44 | # include "signature.h" | |
45 | # include "signature2.h" | |
46 | # include "scan.h" | |
47 | # include "scanline.h" | |
48 | # include "tokentable.h" | |
49 | # include "syntable.h" | |
50 | ||
51 | /*@notfunction@*/ | |
52 | # define MOVECHAR() do { *bufptr++ = c; c = *currentLine++; colNumber++; } while (FALSE) | |
53 | ||
54 | /*@constant static int MAXCHAR;@*/ | |
55 | # define MAXCHAR 512 | |
56 | ||
57 | /*@constant static int TABSIZE;@*/ | |
58 | # define TABSIZE 8 | |
59 | ||
60 | static void LocalUserError (/*@temp@*/ char *p_msg); | |
61 | ||
62 | static charClassData charClass[LASTCHAR + 1]; | |
63 | ||
64 | static int colNumber; | |
65 | static int startCol; | |
66 | static bool reportEOL; | |
67 | static bool reportComments; | |
68 | ||
69 | static char tokenBuffer[MAXCHAR]; | |
70 | ||
71 | static const charClassData charClassDef[] = | |
72 | { | |
73 | /* Control characters */ | |
74 | ||
75 | { CHC_NULL, TRUE }, /* 0 NULL */ | |
76 | { SINGLECHAR, FALSE }, /* 1 CTRL-A */ | |
77 | { SINGLECHAR, FALSE }, /* 2 CTRL-B */ | |
78 | { SINGLECHAR, FALSE }, /* 3 CTRL-C */ | |
79 | { SINGLECHAR, FALSE }, /* 4 CTRL-D */ | |
80 | { SINGLECHAR, FALSE }, /* 5 CTRL-E */ | |
81 | { SINGLECHAR, FALSE }, /* 6 CTRL-F */ | |
82 | { SINGLECHAR, FALSE }, /* 7 CTRL-G */ | |
83 | { SINGLECHAR, FALSE }, /* 8 CTRL-H */ | |
84 | ||
85 | /* defined formatting characters */ | |
86 | ||
87 | { WHITECHAR, FALSE }, /* 9 CTRL-I TAB */ | |
88 | { WHITECHAR, TRUE }, /* 10 CTRL-J EOL */ | |
89 | ||
90 | /* more control characters */ | |
91 | ||
92 | { SINGLECHAR, FALSE }, /* 11 CTRL-K */ | |
93 | { SINGLECHAR, FALSE }, /* 12 CTRL-L */ | |
94 | { SINGLECHAR, FALSE }, /* 13 CTRL-M */ | |
95 | { SINGLECHAR, FALSE }, /* 14 CTRL-N */ | |
96 | { SINGLECHAR, FALSE }, /* 15 CTRL-O */ | |
97 | { SINGLECHAR, FALSE }, /* 16 CTRL-P */ | |
98 | { SINGLECHAR, FALSE }, /* 17 CTRL-Q */ | |
99 | { SINGLECHAR, FALSE }, /* 18 CTRL-R */ | |
100 | { SINGLECHAR, FALSE }, /* 19 CTRL-S */ | |
101 | { SINGLECHAR, FALSE }, /* 20 CTRL-T */ | |
102 | { SINGLECHAR, FALSE }, /* 21 CTRL-U */ | |
103 | { SINGLECHAR, FALSE }, /* 22 CTRL-V */ | |
104 | { SINGLECHAR, FALSE }, /* 23 CTRL-W */ | |
105 | { SINGLECHAR, FALSE }, /* 24 CTRL-X */ | |
106 | { SINGLECHAR, FALSE }, /* 25 CTRL-Y */ | |
107 | { SINGLECHAR, FALSE }, /* 26 CTRL-Z */ | |
108 | { SINGLECHAR, FALSE }, /* 27 CTRL-[ ESC */ | |
109 | { SINGLECHAR, FALSE }, /* 28 CTRL-slash */ | |
110 | { SINGLECHAR, FALSE }, /* 29 CTRL-] GS */ | |
111 | { SINGLECHAR, FALSE }, /* 30 CTRL-^ RS */ | |
112 | { SINGLECHAR, FALSE }, /* 31 CTRL-_ US */ | |
113 | ||
114 | /* Special printing characters */ | |
115 | ||
116 | { WHITECHAR, FALSE }, /* 32 space */ | |
117 | { SINGLECHAR, FALSE }, /* 33 ! */ | |
118 | { SINGLECHAR, FALSE }, /* 34 " */ | |
119 | { SINGLECHAR, FALSE }, /* 35 # */ | |
120 | { SINGLECHAR, FALSE }, /* 36 $ */ | |
121 | { SINGLECHAR, FALSE }, /* 37 % */ | |
122 | { SINGLECHAR, FALSE }, /* 38 & */ | |
123 | { SINGLECHAR, FALSE }, /* 39 ' */ | |
124 | ||
125 | /* Reserved characters */ | |
126 | ||
127 | { PERMCHAR, FALSE }, /* 40 ( */ | |
128 | { PERMCHAR, FALSE }, /* 41 ) */ | |
129 | { OPCHAR, FALSE }, /* 42 * */ | |
130 | { OPCHAR, FALSE }, /* 43 + */ | |
131 | { PERMCHAR, FALSE }, /* 44 , */ | |
132 | { OPCHAR, FALSE }, /* 45 - */ | |
133 | { OPCHAR, FALSE }, /* 46 . */ | |
134 | { SLASHCHAR, FALSE }, /* 47 / */ | |
135 | ||
136 | /* Numbers */ | |
137 | ||
138 | { IDCHAR, FALSE }, /* 48 0 */ | |
139 | { IDCHAR, FALSE }, /* 49 1 */ | |
140 | { IDCHAR, FALSE }, /* 50 2 */ | |
141 | { IDCHAR, FALSE }, /* 51 3 */ | |
142 | { IDCHAR, FALSE }, /* 52 4 */ | |
143 | { IDCHAR, FALSE }, /* 53 5 */ | |
144 | { IDCHAR, FALSE }, /* 54 6 */ | |
145 | { IDCHAR, FALSE }, /* 55 7 */ | |
146 | { IDCHAR, FALSE }, /* 56 8 */ | |
147 | { IDCHAR, FALSE }, /* 57 9 */ | |
148 | ||
149 | /* More reserved and special printing characters */ | |
150 | ||
151 | { PERMCHAR, FALSE }, /* 58 : */ | |
152 | { SINGLECHAR, FALSE }, /* 59; */ | |
153 | { OPCHAR, FALSE }, /* 60 < */ | |
154 | { OPCHAR, FALSE }, /* 61 = */ | |
155 | { OPCHAR, FALSE }, /* 62 > */ | |
156 | { SINGLECHAR, FALSE }, /* 63 ? */ | |
157 | { SINGLECHAR, FALSE }, /* 64 @ */ | |
158 | ||
159 | /* Uppercase Alphabetics */ | |
160 | ||
161 | { IDCHAR, FALSE }, /* 65 A */ | |
162 | { IDCHAR, FALSE }, /* 66 B */ | |
163 | { IDCHAR, FALSE }, /* 67 C */ | |
164 | { IDCHAR, FALSE }, /* 68 D */ | |
165 | { IDCHAR, FALSE }, /* 69 E */ | |
166 | { IDCHAR, FALSE }, /* 70 F */ | |
167 | { IDCHAR, FALSE }, /* 71 G */ | |
168 | { IDCHAR, FALSE }, /* 72 H */ | |
169 | { IDCHAR, FALSE }, /* 73 I */ | |
170 | { IDCHAR, FALSE }, /* 74 J */ | |
171 | { IDCHAR, FALSE }, /* 75 K */ | |
172 | { IDCHAR, FALSE }, /* 76 L */ | |
173 | { IDCHAR, FALSE }, /* 77 M */ | |
174 | { IDCHAR, FALSE }, /* 78 N */ | |
175 | { IDCHAR, FALSE }, /* 79 O */ | |
176 | { IDCHAR, FALSE }, /* 80 P */ | |
177 | { IDCHAR, FALSE }, /* 81 Q */ | |
178 | { IDCHAR, FALSE }, /* 82 R */ | |
179 | { IDCHAR, FALSE }, /* 83 S */ | |
180 | { IDCHAR, FALSE }, /* 84 T */ | |
181 | { IDCHAR, FALSE }, /* 85 U */ | |
182 | { IDCHAR, FALSE }, /* 86 V */ | |
183 | { IDCHAR, FALSE }, /* 87 W */ | |
184 | { IDCHAR, FALSE }, /* 88 X */ | |
185 | { IDCHAR, FALSE }, /* 89 Y */ | |
186 | { IDCHAR, FALSE }, /* 90 Z */ | |
187 | ||
188 | /* Still more reserved and special printing characters */ | |
189 | ||
190 | { SINGLECHAR, FALSE }, /* 91 [ */ | |
191 | { CHC_EXTENSION, FALSE }, /* 92 slash */ | |
192 | { SINGLECHAR, FALSE }, /* 93 ] */ | |
193 | { SINGLECHAR, FALSE }, /* 94 ^ */ | |
194 | { IDCHAR, FALSE }, /* 95 _ */ | |
195 | { SINGLECHAR, FALSE }, /* 96 ` */ | |
196 | ||
197 | /* Lowercase alphabetics */ | |
198 | ||
199 | { IDCHAR, FALSE }, /* 97 a */ | |
200 | { IDCHAR, FALSE }, /* 98 b */ | |
201 | { IDCHAR, FALSE }, /* 99 c */ | |
202 | { IDCHAR, FALSE }, /* 100 d */ | |
203 | { IDCHAR, FALSE }, /* 101 e */ | |
204 | { IDCHAR, FALSE }, /* 102 f */ | |
205 | { IDCHAR, FALSE }, /* 103 g */ | |
206 | { IDCHAR, FALSE }, /* 104 h */ | |
207 | { IDCHAR, FALSE }, /* 105 i */ | |
208 | { IDCHAR, FALSE }, /* 106 j */ | |
209 | { IDCHAR, FALSE }, /* 107 k */ | |
210 | { IDCHAR, FALSE }, /* 108 l */ | |
211 | { IDCHAR, FALSE }, /* 109 m */ | |
212 | { IDCHAR, FALSE }, /* 110 n */ | |
213 | { IDCHAR, FALSE }, /* 111 o */ | |
214 | { IDCHAR, FALSE }, /* 112 p */ | |
215 | { IDCHAR, FALSE }, /* 113 q */ | |
216 | { IDCHAR, FALSE }, /* 114 r */ | |
217 | { IDCHAR, FALSE }, /* 115 s */ | |
218 | { IDCHAR, FALSE }, /* 116 t */ | |
219 | { IDCHAR, FALSE }, /* 117 u */ | |
220 | { IDCHAR, FALSE }, /* 118 v */ | |
221 | { IDCHAR, FALSE }, /* 119 w */ | |
222 | { IDCHAR, FALSE }, /* 120 x */ | |
223 | { IDCHAR, FALSE }, /* 121 y */ | |
224 | { IDCHAR, FALSE }, /* 122 z */ | |
225 | ||
226 | { SINGLECHAR, FALSE }, /* 123 { */ | |
227 | { SINGLECHAR, FALSE }, /* 124 | */ | |
228 | { SINGLECHAR, FALSE }, /* 125 } */ | |
229 | { SINGLECHAR, FALSE }, /* 126 ~ */ | |
230 | { SINGLECHAR, FALSE }, /* 127 DEL */ | |
231 | ||
232 | /* MCS - unused in English */ | |
233 | ||
234 | { SINGLECHAR, FALSE }, /* 128 */ | |
235 | { SINGLECHAR, FALSE }, /* 129 */ | |
236 | { SINGLECHAR, FALSE }, /* 130 */ | |
237 | { SINGLECHAR, FALSE }, /* 131 */ | |
238 | { SINGLECHAR, FALSE }, /* 132 */ | |
239 | { SINGLECHAR, FALSE }, /* 133 */ | |
240 | { SINGLECHAR, FALSE }, /* 134 */ | |
241 | { SINGLECHAR, FALSE }, /* 135 */ | |
242 | { SINGLECHAR, FALSE }, /* 136 */ | |
243 | { SINGLECHAR, FALSE }, /* 137 */ | |
244 | { SINGLECHAR, FALSE }, /* 138 */ | |
245 | { SINGLECHAR, FALSE }, /* 139 */ | |
246 | { SINGLECHAR, FALSE }, /* 140 */ | |
247 | { SINGLECHAR, FALSE }, /* 141 */ | |
248 | { SINGLECHAR, FALSE }, /* 142 */ | |
249 | { SINGLECHAR, FALSE }, /* 143 */ | |
250 | { SINGLECHAR, FALSE }, /* 144 */ | |
251 | { SINGLECHAR, FALSE }, /* 145 */ | |
252 | { SINGLECHAR, FALSE }, /* 146 */ | |
253 | { SINGLECHAR, FALSE }, /* 147 */ | |
254 | { SINGLECHAR, FALSE }, /* 148 */ | |
255 | { SINGLECHAR, FALSE }, /* 149 */ | |
256 | { SINGLECHAR, FALSE }, /* 150 */ | |
257 | { SINGLECHAR, FALSE }, /* 151 */ | |
258 | { SINGLECHAR, FALSE }, /* 152 */ | |
259 | { SINGLECHAR, FALSE }, /* 153 */ | |
260 | { SINGLECHAR, FALSE }, /* 154 */ | |
261 | { SINGLECHAR, FALSE }, /* 155 */ | |
262 | { SINGLECHAR, FALSE }, /* 156 */ | |
263 | { SINGLECHAR, FALSE }, /* 157 */ | |
264 | { SINGLECHAR, FALSE }, /* 158 */ | |
265 | { SINGLECHAR, FALSE }, /* 159 */ | |
266 | { SINGLECHAR, FALSE }, /* 160 */ | |
267 | { SINGLECHAR, FALSE }, /* 161 */ | |
268 | { SINGLECHAR, FALSE }, /* 162 */ | |
269 | { SINGLECHAR, FALSE }, /* 163 */ | |
270 | { SINGLECHAR, FALSE }, /* 164 */ | |
271 | { SINGLECHAR, FALSE }, /* 165 */ | |
272 | { SINGLECHAR, FALSE }, /* 166 */ | |
273 | { SINGLECHAR, FALSE }, /* 167 */ | |
274 | { SINGLECHAR, FALSE }, /* 168 */ | |
275 | { SINGLECHAR, FALSE }, /* 169 */ | |
276 | { SINGLECHAR, FALSE }, /* 170 */ | |
277 | { SINGLECHAR, FALSE }, /* 171 */ | |
278 | { SINGLECHAR, FALSE }, /* 172 */ | |
279 | { SINGLECHAR, FALSE }, /* 173 */ | |
280 | { SINGLECHAR, FALSE }, /* 174 */ | |
281 | { SINGLECHAR, FALSE }, /* 175 */ | |
282 | { SINGLECHAR, FALSE }, /* 176 */ | |
283 | { SINGLECHAR, FALSE }, /* 177 */ | |
284 | { SINGLECHAR, FALSE }, /* 178 */ | |
285 | { SINGLECHAR, FALSE }, /* 179 */ | |
286 | { SINGLECHAR, FALSE }, /* 180 */ | |
287 | { SINGLECHAR, FALSE }, /* 181 */ | |
288 | { SINGLECHAR, FALSE }, /* 182 */ | |
289 | { SINGLECHAR, FALSE }, /* 183 */ | |
290 | { SINGLECHAR, FALSE }, /* 184 */ | |
291 | { SINGLECHAR, FALSE }, /* 185 */ | |
292 | { SINGLECHAR, FALSE }, /* 186 */ | |
293 | { SINGLECHAR, FALSE }, /* 187 */ | |
294 | { SINGLECHAR, FALSE }, /* 188 */ | |
295 | { SINGLECHAR, FALSE }, /* 189 */ | |
296 | { SINGLECHAR, FALSE }, /* 190 */ | |
297 | { SINGLECHAR, FALSE }, /* 191 */ | |
298 | { SINGLECHAR, FALSE }, /* 192 */ | |
299 | { SINGLECHAR, FALSE }, /* 193 */ | |
300 | { SINGLECHAR, FALSE }, /* 194 */ | |
301 | { SINGLECHAR, FALSE }, /* 195 */ | |
302 | { SINGLECHAR, FALSE }, /* 196 */ | |
303 | { SINGLECHAR, FALSE }, /* 197 */ | |
304 | { SINGLECHAR, FALSE }, /* 198 */ | |
305 | { SINGLECHAR, FALSE }, /* 199 */ | |
306 | { SINGLECHAR, FALSE }, /* 200 */ | |
307 | { SINGLECHAR, FALSE }, /* 201 */ | |
308 | { SINGLECHAR, FALSE }, /* 202 */ | |
309 | { SINGLECHAR, FALSE }, /* 203 */ | |
310 | { SINGLECHAR, FALSE }, /* 204 */ | |
311 | { SINGLECHAR, FALSE }, /* 205 */ | |
312 | { SINGLECHAR, FALSE }, /* 206 */ | |
313 | { SINGLECHAR, FALSE }, /* 207 */ | |
314 | { SINGLECHAR, FALSE }, /* 208 */ | |
315 | { SINGLECHAR, FALSE }, /* 209 */ | |
316 | { SINGLECHAR, FALSE }, /* 210 */ | |
317 | { SINGLECHAR, FALSE }, /* 211 */ | |
318 | { SINGLECHAR, FALSE }, /* 212 */ | |
319 | { SINGLECHAR, FALSE }, /* 213 */ | |
320 | { SINGLECHAR, FALSE }, /* 214 */ | |
321 | { SINGLECHAR, FALSE }, /* 215 */ | |
322 | { SINGLECHAR, FALSE }, /* 216 */ | |
323 | { SINGLECHAR, FALSE }, /* 217 */ | |
324 | { SINGLECHAR, FALSE }, /* 218 */ | |
325 | { SINGLECHAR, FALSE }, /* 219 */ | |
326 | { SINGLECHAR, FALSE }, /* 220 */ | |
327 | { SINGLECHAR, FALSE }, /* 221 */ | |
328 | { SINGLECHAR, FALSE }, /* 222 */ | |
329 | { SINGLECHAR, FALSE }, /* 223 */ | |
330 | { SINGLECHAR, FALSE }, /* 224 */ | |
331 | { SINGLECHAR, FALSE }, /* 225 */ | |
332 | { SINGLECHAR, FALSE }, /* 226 */ | |
333 | { SINGLECHAR, FALSE }, /* 227 */ | |
334 | { SINGLECHAR, FALSE }, /* 228 */ | |
335 | { SINGLECHAR, FALSE }, /* 229 */ | |
336 | { SINGLECHAR, FALSE }, /* 230 */ | |
337 | { SINGLECHAR, FALSE }, /* 231 */ | |
338 | { SINGLECHAR, FALSE }, /* 232 */ | |
339 | { SINGLECHAR, FALSE }, /* 233 */ | |
340 | { SINGLECHAR, FALSE }, /* 234 */ | |
341 | { SINGLECHAR, FALSE }, /* 235 */ | |
342 | { SINGLECHAR, FALSE }, /* 236 */ | |
343 | { SINGLECHAR, FALSE }, /* 237 */ | |
344 | { SINGLECHAR, FALSE }, /* 238 */ | |
345 | { SINGLECHAR, FALSE }, /* 239 */ | |
346 | { SINGLECHAR, FALSE }, /* 240 */ | |
347 | { SINGLECHAR, FALSE }, /* 241 */ | |
348 | { SINGLECHAR, FALSE }, /* 242 */ | |
349 | { SINGLECHAR, FALSE }, /* 243 */ | |
350 | { SINGLECHAR, FALSE }, /* 244 */ | |
351 | { SINGLECHAR, FALSE }, /* 245 */ | |
352 | { SINGLECHAR, FALSE }, /* 246 */ | |
353 | { SINGLECHAR, FALSE }, /* 247 */ | |
354 | { SINGLECHAR, FALSE }, /* 248 */ | |
355 | { SINGLECHAR, FALSE }, /* 249 */ | |
356 | { SINGLECHAR, FALSE }, /* 250 */ | |
357 | { SINGLECHAR, FALSE }, /* 251 */ | |
358 | { SINGLECHAR, FALSE }, /* 252 */ | |
359 | { SINGLECHAR, FALSE }, /* 253 */ | |
360 | { SINGLECHAR, FALSE }, /* 254 */ | |
361 | { SINGLECHAR, FALSE } /* 255 */ | |
362 | }; | |
363 | ||
364 | /* | |
365 | **++ | |
366 | ** FUNCTION NAME: | |
367 | ** | |
368 | ** lscanline () | |
369 | ** | |
370 | ** FORMAL PARAMETERS: | |
371 | ** | |
372 | ** None | |
373 | ** | |
374 | ** RETURN VALUE: | |
375 | ** | |
376 | ** None | |
377 | ** | |
378 | ** INVARIANTS: | |
379 | ** | |
380 | ** [@description or none@] | |
381 | ** | |
382 | ** DESCRIPTION: | |
383 | ** | |
384 | ** One line of text is processed. | |
385 | ** Tokens are delivered via the call LSLScanFreshToken (). | |
386 | ** | |
387 | ** EXCEPTIONS: | |
388 | ** | |
389 | **-- | |
390 | */ | |
391 | ||
392 | void | |
393 | lscanLine (char *currentLine) | |
394 | { | |
395 | ltokenCode cod; | |
396 | lsymbol sym; | |
397 | register char c; | |
398 | register char *bufptr; | |
399 | ltoken newToken; | |
400 | ||
401 | c = *currentLine++; | |
402 | colNumber = 0; | |
403 | ||
404 | for (;;) | |
405 | { | |
406 | bufptr = &tokenBuffer[0]; | |
407 | startCol = colNumber; | |
408 | ||
409 | /*@-loopswitchbreak@*/ | |
410 | switch (lscanCharClass (c)) | |
411 | { | |
412 | ||
413 | case CHC_NULL: | |
414 | sym = lsymbol_fromChars ("E O L"); | |
415 | cod = LST_EOL; | |
416 | break; | |
417 | ||
418 | /* Identifiers */ | |
419 | ||
420 | case IDCHAR: | |
421 | ||
422 | while (lscanCharClass (c) == IDCHAR) | |
423 | { | |
424 | MOVECHAR (); | |
425 | } | |
426 | ||
427 | *bufptr = '\0'; | |
428 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
429 | cod = LST_SIMPLEID; | |
430 | break; | |
431 | ||
432 | /* One-character tokens */ | |
433 | ||
434 | case SINGLECHAR: | |
435 | case PERMCHAR: | |
436 | MOVECHAR (); | |
437 | *bufptr = '\0'; | |
438 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
439 | cod = LST_SIMPLEOP; | |
440 | break; | |
441 | ||
442 | case SLASHCHAR: | |
443 | if (*currentLine == '\\') | |
444 | { | |
445 | MOVECHAR (); | |
446 | MOVECHAR (); | |
447 | *bufptr = '\0'; | |
448 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
449 | cod = LST_SIMPLEOP; | |
450 | break; | |
451 | } | |
452 | MOVECHAR (); | |
453 | /* We fall through to next case if we have / followed */ | |
454 | /* by anything else. */ | |
455 | /*@fallthrough@*/ | |
456 | case OPCHAR: | |
457 | ||
458 | /* Operator symbols */ | |
459 | ||
460 | /* possible multi character */ | |
461 | while (lscanCharClass (c) == OPCHAR) | |
462 | { | |
463 | MOVECHAR (); | |
464 | } | |
465 | ||
466 | *bufptr = '\0'; /* null terminate in buffer */ | |
467 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
468 | cod = LST_SIMPLEOP; | |
469 | break; | |
470 | ||
471 | /* White space */ | |
472 | case WHITECHAR: | |
473 | /*@-switchswitchbreak@*/ | |
474 | switch (c) | |
475 | { | |
476 | case '\t': | |
477 | MOVECHAR (); | |
478 | colNumber--; | |
479 | colNumber += TABSIZE; | |
480 | colNumber -= (colNumber % TABSIZE); | |
481 | break; | |
482 | ||
483 | case '\v': | |
484 | case '\f': | |
485 | MOVECHAR (); | |
486 | colNumber--; | |
487 | break; | |
488 | ||
489 | default: | |
490 | MOVECHAR (); | |
491 | break; | |
492 | } | |
493 | *bufptr = '\0'; | |
494 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
495 | cod = LST_WHITESPACE; | |
496 | break; | |
497 | ||
498 | case CHC_EXTENSION: | |
499 | MOVECHAR (); | |
500 | ||
501 | switch (c) | |
502 | { | |
503 | ||
504 | /* open and close */ | |
505 | case '(': | |
506 | MOVECHAR (); | |
507 | while (lscanCharClass (c) == IDCHAR) | |
508 | { | |
509 | MOVECHAR (); | |
510 | } | |
511 | *bufptr = '\0'; | |
512 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
513 | cod = LST_OPENSYM; | |
514 | break; | |
515 | ||
516 | case ')': | |
517 | MOVECHAR (); | |
518 | while (lscanCharClass (c) == IDCHAR) | |
519 | { | |
520 | MOVECHAR (); | |
521 | } | |
522 | *bufptr = '\0'; | |
523 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
524 | cod = LST_CLOSESYM; | |
525 | break; | |
526 | ||
527 | /* separator */ | |
528 | case ',': | |
529 | MOVECHAR (); | |
530 | while (lscanCharClass (c) == IDCHAR) | |
531 | { | |
532 | MOVECHAR (); | |
533 | } | |
534 | *bufptr = '\0'; | |
535 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
536 | cod = LST_SEPSYM; | |
537 | break; | |
538 | ||
539 | /* simpleid */ | |
540 | case ':': | |
541 | MOVECHAR (); | |
542 | while (lscanCharClass (c) == IDCHAR) | |
543 | { | |
544 | MOVECHAR (); | |
545 | } | |
546 | *bufptr = '\0'; | |
547 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
548 | cod = LST_SIMPLEID; | |
549 | break; | |
550 | ||
551 | default: | |
552 | if (lscanCharClass (c) == IDCHAR) | |
553 | { | |
554 | do | |
555 | { | |
556 | MOVECHAR (); | |
557 | } | |
558 | while (lscanCharClass (c) == IDCHAR); | |
559 | *bufptr = '\0'; | |
560 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
561 | cod = LST_SIMPLEOP; | |
562 | } | |
563 | else | |
564 | { | |
565 | /* | |
566 | ** Meets none of the above. Take the extension | |
567 | ** character and the character following and treat | |
568 | ** together as a SINGLECHAR. SINGLECHARs tranlate into | |
569 | ** SIMPLEOPs. | |
570 | */ | |
571 | ||
572 | MOVECHAR (); | |
573 | *bufptr = '\0'; | |
574 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
575 | cod = LST_SIMPLEOP; | |
576 | } | |
577 | break; | |
578 | } | |
579 | /*@switchbreak@*/ break; | |
580 | /*@=switchswitchbreak@*/ | |
581 | default: | |
582 | ||
583 | LocalUserError ("unexpected character in input"); | |
584 | return; | |
585 | } | |
586 | /*@=loopswitchbreak@*/ | |
587 | ||
588 | /* | |
589 | ** Above code only "guessed" at token type. Insert it into the | |
590 | ** TokenTable. If the token already exists, it is returned as | |
591 | ** previously defined. If it does not exist, it is inserted as the | |
592 | ** token code computed above. | |
593 | */ | |
594 | ||
595 | newToken = LSLInsertToken (cod, sym, 0, FALSE); | |
596 | ||
597 | if (LSLIsSyn (ltoken_getText (newToken))) | |
598 | { | |
599 | /* | |
600 | ** Token is a synonym. Get the actual token and set the raw | |
601 | ** text to the synonym name. | |
602 | */ | |
603 | ||
604 | newToken = LSLGetTokenForSyn (ltoken_getText (newToken)); | |
605 | ltoken_setRawText (newToken, sym); | |
606 | } | |
607 | ||
608 | ltoken_setCol (newToken, startCol); | |
609 | ltoken_setLine (newToken, inputStream_thisLineNumber (LSLScanSource ())); | |
610 | ltoken_setFileName (newToken, inputStream_fileName (LSLScanSource ())); | |
611 | ||
612 | if (ltoken_getCode (newToken) == LST_COMMENTSYM) | |
613 | { | |
614 | bufptr = &tokenBuffer[0]; | |
615 | ||
616 | while (!LSLIsEndComment (c)) | |
617 | { | |
618 | MOVECHAR (); | |
619 | } | |
620 | if (lscanCharClass (c) != CHC_NULL) | |
621 | { | |
622 | MOVECHAR (); | |
623 | } | |
624 | if (reportComments) | |
625 | { | |
626 | *bufptr = '\0'; | |
627 | ltoken_setRawText (newToken, lsymbol_fromChars (&tokenBuffer[0])); | |
628 | LSLScanFreshToken (newToken); | |
629 | } | |
630 | } | |
631 | else if (ltoken_getCode (newToken) == LST_EOL) | |
632 | { | |
633 | if (reportEOL) | |
634 | { | |
635 | LSLScanFreshToken (newToken); | |
636 | } | |
637 | return; | |
638 | } | |
639 | else | |
640 | { | |
641 | if (cod != LST_WHITESPACE) | |
642 | { | |
643 | LSLScanFreshToken (newToken); | |
644 | } | |
645 | } | |
646 | } | |
647 | } | |
648 | ||
649 | ltoken | |
650 | LSLScanEofToken (void) | |
651 | { | |
652 | ltoken t = ltoken_copy (LSLInsertToken (LEOFTOKEN, | |
653 | lsymbol_fromChars ("E O F"), | |
654 | 0, TRUE)); | |
655 | ltoken_setCol (t, colNumber); | |
656 | ltoken_setLine (t, inputStream_thisLineNumber (LSLScanSource ())); | |
657 | ltoken_setFileName (t, inputStream_fileName (LSLScanSource ())); | |
658 | return t; | |
659 | } | |
660 | ||
661 | void | |
662 | LSLReportEolTokens (bool setting) | |
663 | { | |
664 | reportEOL = setting; | |
665 | } | |
666 | ||
667 | static void | |
668 | LocalUserError (char *msg) | |
669 | { | |
670 | inputStream s = LSLScanSource (); | |
671 | llfatalerror (message ("%s:%d,%d: %s", | |
672 | inputStream_fileName (s), | |
673 | inputStream_thisLineNumber (s), colNumber, | |
674 | cstring_fromChars (msg))); | |
675 | } | |
676 | ||
677 | /* | |
678 | **++ | |
679 | ** FUNCTION NAME: | |
680 | ** | |
681 | ** lscanLineInit () | |
682 | ** | |
683 | ** FORMAL PARAMETERS: | |
684 | ** | |
685 | ** None | |
686 | ** | |
687 | ** RETURN VALUE: | |
688 | ** | |
689 | ** None | |
690 | ** | |
691 | ** INVARIANTS: | |
692 | ** | |
693 | ** [@description or none@] | |
694 | ** | |
695 | ** DESCRIPTION: | |
696 | ** | |
697 | ** Initialize this module (should only be called once). | |
698 | ** | |
699 | ** IMPLICIT INPUTS/OUTPUT: | |
700 | ** | |
701 | ** GetNextLine - (output) initialized | |
702 | ** NullToken - (output) initialized | |
703 | ** PrintName - (output) array contents initialized | |
704 | ** | |
705 | ** EXCEPTIONS: | |
706 | ** | |
707 | ** None | |
708 | **-- | |
709 | */ | |
710 | ||
711 | void | |
712 | lscanLineInit (void) | |
713 | { | |
714 | int i; | |
715 | ||
716 | reportEOL = FALSE; | |
717 | reportComments = FALSE; | |
718 | ||
719 | for (i = 0; i <= LASTCHAR; i++) | |
720 | { | |
721 | charClass[i] = charClassDef[i]; | |
722 | } | |
723 | ||
724 | /* | |
725 | ** NOTE: The following line ensures that all tokens have nonzero | |
726 | ** handles, so that a handle of zero can be used to indicate that a | |
727 | ** token does not have a synonym. | |
728 | */ | |
729 | ||
730 | (void) LSLReserveToken (LST_SIMPLEID, "dummy token"); | |
731 | ||
732 | ltoken_forall = LSLReserveToken (LST_QUANTIFIERSYM, "\\forall"); | |
733 | ltoken_true = LSLReserveToken (LST_SIMPLEID, "true"); | |
734 | ltoken_false = LSLReserveToken (LST_SIMPLEID, "false"); | |
735 | ltoken_not = LSLReserveToken (LST_SIMPLEOP, "\\not"); | |
736 | ltoken_and = LSLReserveToken (LST_LOGICALOP, "\\and"); | |
737 | ltoken_or = LSLReserveToken (LST_LOGICALOP, "\\or"); | |
738 | ltoken_implies = LSLReserveToken (LST_LOGICALOP, "\\implies"); | |
739 | ||
740 | ltoken_eq = LSLReserveToken (LST_EQOP, "\\eq"); | |
741 | ltoken_neq = LSLReserveToken (LST_EQOP, "\\neq"); | |
742 | ||
743 | ltoken_equals = LSLReserveToken (LST_EQUATIONSYM, "\\equals"); | |
744 | ltoken_eqsep = LSLReserveToken (LST_EQSEPSYM, "\\eqsep"); | |
745 | ltoken_select = LSLReserveToken (LST_SELECTSYM, "\\select"); | |
746 | ltoken_open = LSLReserveToken (LST_OPENSYM, "\\open"); | |
747 | ltoken_sep = LSLReserveToken (LST_SEPSYM, "\\,"); | |
748 | ltoken_close = LSLReserveToken (LST_CLOSESYM, "\\close"); | |
749 | ltoken_id = LSLReserveToken (LST_SIMPLEID, "\\:"); | |
750 | ltoken_arrow = LSLReserveToken (LST_MAPSYM, "\\arrow"); | |
751 | ltoken_farrow = LSLReserveToken (LST_FIELDMAPSYM, "\\field_arrow"); | |
752 | ||
753 | ltoken_marker = LSLReserveToken (LST_MARKERSYM, "\\marker"); | |
754 | ltoken_comment = LSLReserveToken (LST_COMMENTSYM, "\\comment"); | |
755 | ltoken_compose = LSLReserveToken (LST_COMPOSESYM, "\\composeSort"); | |
756 | ltoken_if = LSLReserveToken (LST_ifTOKEN, "if"); | |
757 | ||
758 | (void) LSLReserveToken (LST_LPAR, " ("); | |
759 | (void) LSLReserveToken (LST_RPAR, ")"); | |
760 | (void) LSLReserveToken (LST_COMMA, ","); | |
761 | (void) LSLReserveToken (LST_COLON, ":"); | |
762 | ||
763 | (void) LSLReserveToken (LST_LBRACKET, "["); | |
764 | (void) LSLReserveToken (LST_RBRACKET, "]"); | |
765 | ||
766 | (void) LSLReserveToken (LST_WHITESPACE, " "); | |
767 | (void) LSLReserveToken (LST_WHITESPACE, "\t"); | |
768 | (void) LSLReserveToken (LST_WHITESPACE, "\n"); | |
769 | ||
770 | (void) LSLReserveToken (LEOFTOKEN, "E O F"); | |
771 | (void) LSLReserveToken (LST_EOL, "E O L"); | |
772 | ||
773 | (void) LSLReserveToken (LST_assertsTOKEN, "asserts"); | |
774 | (void) LSLReserveToken (LST_assumesTOKEN, "assumes"); | |
775 | (void) LSLReserveToken (LST_byTOKEN, "by"); | |
776 | (void) LSLReserveToken (LST_convertsTOKEN, "converts"); | |
777 | (void) LSLReserveToken (LST_elseTOKEN, "else"); | |
778 | (void) LSLReserveToken (LST_enumerationTOKEN, "enumeration"); | |
779 | (void) LSLReserveToken (LST_equationsTOKEN, "equations"); | |
780 | (void) LSLReserveToken (LST_exemptingTOKEN, "exempting"); | |
781 | (void) LSLReserveToken (LST_forTOKEN, "for"); | |
782 | (void) LSLReserveToken (LST_generatedTOKEN, "generated"); | |
783 | (void) LSLReserveToken (LST_impliesTOKEN, "implies"); | |
784 | (void) LSLReserveToken (LST_includesTOKEN, "includes"); | |
785 | (void) LSLReserveToken (LST_introducesTOKEN, "introduces"); | |
786 | (void) LSLReserveToken (LST_ofTOKEN, "of"); | |
787 | (void) LSLReserveToken (LST_partitionedTOKEN, "partitioned"); | |
788 | (void) LSLReserveToken (LST_thenTOKEN, "then"); | |
789 | (void) LSLReserveToken (LST_traitTOKEN, "trait"); | |
790 | (void) LSLReserveToken (LST_tupleTOKEN, "tuple"); | |
791 | (void) LSLReserveToken (LST_unionTOKEN, "union"); | |
792 | } | |
793 | ||
794 | void | |
795 | lscanLineReset (void) | |
796 | { | |
797 | } | |
798 | ||
799 | void | |
800 | lscanLineCleanup (void) | |
801 | { | |
802 | } | |
803 | ||
804 | charCode lscanCharClass (char c) | |
805 | { | |
806 | return charClass[ (int) (c)].code; | |
807 | } | |
808 | ||
809 | bool LSLIsEndComment (char c) | |
810 | { | |
811 | return charClass[ (int) (c)].endCommentChar; | |
812 | } | |
813 | ||
814 | void lsetCharClass (char c, charCode cod) | |
815 | { | |
816 | charClass[ (int) (c)].code = cod; | |
817 | } | |
818 | ||
819 | void lsetEndCommentChar (char c, bool flag) | |
820 | { | |
821 | charClass[ (int) (c)].endCommentChar = flag; | |
822 | } |