]>
Commit | Line | Data |
---|---|---|
616915dd | 1 | /* |
11db3170 | 2 | ** Splint - annotation-assisted static program checker |
c59f5181 | 3 | ** Copyright (C) 1994-2003 University of Virginia, |
616915dd | 4 | ** Massachusetts Institute of Technology |
5 | ** | |
6 | ** This program is free software; you can redistribute it and/or modify it | |
7 | ** under the terms of the GNU General Public License as published by the | |
8 | ** Free Software Foundation; either version 2 of the License, or (at your | |
9 | ** option) any later version. | |
10 | ** | |
11 | ** This program is distributed in the hope that it will be useful, but | |
12 | ** WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | ** General Public License for more details. | |
15 | ** | |
16 | ** The GNU General Public License is available from http://www.gnu.org/ or | |
17 | ** the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, | |
18 | ** MA 02111-1307, USA. | |
19 | ** | |
155af98d | 20 | ** For information on splint: info@splint.org |
21 | ** To report a bug: splint-bug@splint.org | |
11db3170 | 22 | ** For more information: http://www.splint.org |
616915dd | 23 | */ |
24 | /* | |
25 | ** scanline.c | |
26 | ** | |
27 | ** Scan one line of Larch SL input at a time. | |
28 | ** | |
29 | ** The input is source text, line at a time. The output is a sequence | |
30 | ** of tokens, reported by call-out LSLScanFreshToken. | |
31 | ** | |
32 | ** This organization allows implementation of line-at-a-time | |
33 | ** incremental scanning. The incremental mechanism is in the | |
34 | ** driving module scan.c, which can save and replay tokens for | |
35 | ** unchanged lines. This module works either way. | |
36 | ** | |
37 | ** The main loop of the scanner keys on the leading character. | |
38 | ** Actions within the loop collect the token starting with the | |
39 | ** and determine its kind. | |
40 | */ | |
41 | ||
1b8ae690 | 42 | # include "splintMacros.nf" |
b73d1009 | 43 | # include "basic.h" |
616915dd | 44 | # include "signature.h" |
45 | # include "signature2.h" | |
46 | # include "scan.h" | |
47 | # include "scanline.h" | |
48 | # include "tokentable.h" | |
49 | # include "syntable.h" | |
50 | ||
51 | /*@notfunction@*/ | |
52 | # define MOVECHAR() do { *bufptr++ = c; c = *currentLine++; colNumber++; } while (FALSE) | |
53 | ||
51bc6ecc | 54 | /* evans 2003-04-21: changed name to avoid conflict with MS VC++ */ |
55 | /*@constant static int SCANMAXCHAR;@*/ | |
56 | # define SCANMAXCHAR 512 | |
616915dd | 57 | |
58 | /*@constant static int TABSIZE;@*/ | |
59 | # define TABSIZE 8 | |
60 | ||
61 | static void LocalUserError (/*@temp@*/ char *p_msg); | |
62 | ||
63 | static charClassData charClass[LASTCHAR + 1]; | |
64 | ||
28bf4b0b | 65 | static int colNumber; |
66 | static int startCol; | |
616915dd | 67 | static bool reportEOL; |
68 | static bool reportComments; | |
69 | ||
51bc6ecc | 70 | static char tokenBuffer[SCANMAXCHAR]; |
616915dd | 71 | |
72 | static const charClassData charClassDef[] = | |
73 | { | |
74 | /* Control characters */ | |
75 | ||
76 | { CHC_NULL, TRUE }, /* 0 NULL */ | |
77 | { SINGLECHAR, FALSE }, /* 1 CTRL-A */ | |
78 | { SINGLECHAR, FALSE }, /* 2 CTRL-B */ | |
79 | { SINGLECHAR, FALSE }, /* 3 CTRL-C */ | |
80 | { SINGLECHAR, FALSE }, /* 4 CTRL-D */ | |
81 | { SINGLECHAR, FALSE }, /* 5 CTRL-E */ | |
82 | { SINGLECHAR, FALSE }, /* 6 CTRL-F */ | |
83 | { SINGLECHAR, FALSE }, /* 7 CTRL-G */ | |
84 | { SINGLECHAR, FALSE }, /* 8 CTRL-H */ | |
85 | ||
86 | /* defined formatting characters */ | |
87 | ||
88 | { WHITECHAR, FALSE }, /* 9 CTRL-I TAB */ | |
89 | { WHITECHAR, TRUE }, /* 10 CTRL-J EOL */ | |
90 | ||
91 | /* more control characters */ | |
92 | ||
93 | { SINGLECHAR, FALSE }, /* 11 CTRL-K */ | |
94 | { SINGLECHAR, FALSE }, /* 12 CTRL-L */ | |
95 | { SINGLECHAR, FALSE }, /* 13 CTRL-M */ | |
96 | { SINGLECHAR, FALSE }, /* 14 CTRL-N */ | |
97 | { SINGLECHAR, FALSE }, /* 15 CTRL-O */ | |
98 | { SINGLECHAR, FALSE }, /* 16 CTRL-P */ | |
99 | { SINGLECHAR, FALSE }, /* 17 CTRL-Q */ | |
100 | { SINGLECHAR, FALSE }, /* 18 CTRL-R */ | |
101 | { SINGLECHAR, FALSE }, /* 19 CTRL-S */ | |
102 | { SINGLECHAR, FALSE }, /* 20 CTRL-T */ | |
103 | { SINGLECHAR, FALSE }, /* 21 CTRL-U */ | |
104 | { SINGLECHAR, FALSE }, /* 22 CTRL-V */ | |
105 | { SINGLECHAR, FALSE }, /* 23 CTRL-W */ | |
106 | { SINGLECHAR, FALSE }, /* 24 CTRL-X */ | |
107 | { SINGLECHAR, FALSE }, /* 25 CTRL-Y */ | |
108 | { SINGLECHAR, FALSE }, /* 26 CTRL-Z */ | |
109 | { SINGLECHAR, FALSE }, /* 27 CTRL-[ ESC */ | |
110 | { SINGLECHAR, FALSE }, /* 28 CTRL-slash */ | |
111 | { SINGLECHAR, FALSE }, /* 29 CTRL-] GS */ | |
112 | { SINGLECHAR, FALSE }, /* 30 CTRL-^ RS */ | |
113 | { SINGLECHAR, FALSE }, /* 31 CTRL-_ US */ | |
114 | ||
115 | /* Special printing characters */ | |
116 | ||
117 | { WHITECHAR, FALSE }, /* 32 space */ | |
118 | { SINGLECHAR, FALSE }, /* 33 ! */ | |
119 | { SINGLECHAR, FALSE }, /* 34 " */ | |
120 | { SINGLECHAR, FALSE }, /* 35 # */ | |
121 | { SINGLECHAR, FALSE }, /* 36 $ */ | |
122 | { SINGLECHAR, FALSE }, /* 37 % */ | |
123 | { SINGLECHAR, FALSE }, /* 38 & */ | |
124 | { SINGLECHAR, FALSE }, /* 39 ' */ | |
125 | ||
126 | /* Reserved characters */ | |
127 | ||
128 | { PERMCHAR, FALSE }, /* 40 ( */ | |
129 | { PERMCHAR, FALSE }, /* 41 ) */ | |
130 | { OPCHAR, FALSE }, /* 42 * */ | |
131 | { OPCHAR, FALSE }, /* 43 + */ | |
132 | { PERMCHAR, FALSE }, /* 44 , */ | |
133 | { OPCHAR, FALSE }, /* 45 - */ | |
134 | { OPCHAR, FALSE }, /* 46 . */ | |
135 | { SLASHCHAR, FALSE }, /* 47 / */ | |
136 | ||
137 | /* Numbers */ | |
138 | ||
139 | { IDCHAR, FALSE }, /* 48 0 */ | |
140 | { IDCHAR, FALSE }, /* 49 1 */ | |
141 | { IDCHAR, FALSE }, /* 50 2 */ | |
142 | { IDCHAR, FALSE }, /* 51 3 */ | |
143 | { IDCHAR, FALSE }, /* 52 4 */ | |
144 | { IDCHAR, FALSE }, /* 53 5 */ | |
145 | { IDCHAR, FALSE }, /* 54 6 */ | |
146 | { IDCHAR, FALSE }, /* 55 7 */ | |
147 | { IDCHAR, FALSE }, /* 56 8 */ | |
148 | { IDCHAR, FALSE }, /* 57 9 */ | |
149 | ||
150 | /* More reserved and special printing characters */ | |
151 | ||
152 | { PERMCHAR, FALSE }, /* 58 : */ | |
153 | { SINGLECHAR, FALSE }, /* 59; */ | |
154 | { OPCHAR, FALSE }, /* 60 < */ | |
155 | { OPCHAR, FALSE }, /* 61 = */ | |
156 | { OPCHAR, FALSE }, /* 62 > */ | |
157 | { SINGLECHAR, FALSE }, /* 63 ? */ | |
158 | { SINGLECHAR, FALSE }, /* 64 @ */ | |
159 | ||
160 | /* Uppercase Alphabetics */ | |
161 | ||
162 | { IDCHAR, FALSE }, /* 65 A */ | |
163 | { IDCHAR, FALSE }, /* 66 B */ | |
164 | { IDCHAR, FALSE }, /* 67 C */ | |
165 | { IDCHAR, FALSE }, /* 68 D */ | |
166 | { IDCHAR, FALSE }, /* 69 E */ | |
167 | { IDCHAR, FALSE }, /* 70 F */ | |
168 | { IDCHAR, FALSE }, /* 71 G */ | |
169 | { IDCHAR, FALSE }, /* 72 H */ | |
170 | { IDCHAR, FALSE }, /* 73 I */ | |
171 | { IDCHAR, FALSE }, /* 74 J */ | |
172 | { IDCHAR, FALSE }, /* 75 K */ | |
173 | { IDCHAR, FALSE }, /* 76 L */ | |
174 | { IDCHAR, FALSE }, /* 77 M */ | |
175 | { IDCHAR, FALSE }, /* 78 N */ | |
176 | { IDCHAR, FALSE }, /* 79 O */ | |
177 | { IDCHAR, FALSE }, /* 80 P */ | |
178 | { IDCHAR, FALSE }, /* 81 Q */ | |
179 | { IDCHAR, FALSE }, /* 82 R */ | |
180 | { IDCHAR, FALSE }, /* 83 S */ | |
181 | { IDCHAR, FALSE }, /* 84 T */ | |
182 | { IDCHAR, FALSE }, /* 85 U */ | |
183 | { IDCHAR, FALSE }, /* 86 V */ | |
184 | { IDCHAR, FALSE }, /* 87 W */ | |
185 | { IDCHAR, FALSE }, /* 88 X */ | |
186 | { IDCHAR, FALSE }, /* 89 Y */ | |
187 | { IDCHAR, FALSE }, /* 90 Z */ | |
188 | ||
189 | /* Still more reserved and special printing characters */ | |
190 | ||
191 | { SINGLECHAR, FALSE }, /* 91 [ */ | |
192 | { CHC_EXTENSION, FALSE }, /* 92 slash */ | |
193 | { SINGLECHAR, FALSE }, /* 93 ] */ | |
194 | { SINGLECHAR, FALSE }, /* 94 ^ */ | |
195 | { IDCHAR, FALSE }, /* 95 _ */ | |
196 | { SINGLECHAR, FALSE }, /* 96 ` */ | |
197 | ||
198 | /* Lowercase alphabetics */ | |
199 | ||
200 | { IDCHAR, FALSE }, /* 97 a */ | |
201 | { IDCHAR, FALSE }, /* 98 b */ | |
202 | { IDCHAR, FALSE }, /* 99 c */ | |
203 | { IDCHAR, FALSE }, /* 100 d */ | |
204 | { IDCHAR, FALSE }, /* 101 e */ | |
205 | { IDCHAR, FALSE }, /* 102 f */ | |
206 | { IDCHAR, FALSE }, /* 103 g */ | |
207 | { IDCHAR, FALSE }, /* 104 h */ | |
208 | { IDCHAR, FALSE }, /* 105 i */ | |
209 | { IDCHAR, FALSE }, /* 106 j */ | |
210 | { IDCHAR, FALSE }, /* 107 k */ | |
211 | { IDCHAR, FALSE }, /* 108 l */ | |
212 | { IDCHAR, FALSE }, /* 109 m */ | |
213 | { IDCHAR, FALSE }, /* 110 n */ | |
214 | { IDCHAR, FALSE }, /* 111 o */ | |
215 | { IDCHAR, FALSE }, /* 112 p */ | |
216 | { IDCHAR, FALSE }, /* 113 q */ | |
217 | { IDCHAR, FALSE }, /* 114 r */ | |
218 | { IDCHAR, FALSE }, /* 115 s */ | |
219 | { IDCHAR, FALSE }, /* 116 t */ | |
220 | { IDCHAR, FALSE }, /* 117 u */ | |
221 | { IDCHAR, FALSE }, /* 118 v */ | |
222 | { IDCHAR, FALSE }, /* 119 w */ | |
223 | { IDCHAR, FALSE }, /* 120 x */ | |
224 | { IDCHAR, FALSE }, /* 121 y */ | |
225 | { IDCHAR, FALSE }, /* 122 z */ | |
226 | ||
227 | { SINGLECHAR, FALSE }, /* 123 { */ | |
228 | { SINGLECHAR, FALSE }, /* 124 | */ | |
229 | { SINGLECHAR, FALSE }, /* 125 } */ | |
230 | { SINGLECHAR, FALSE }, /* 126 ~ */ | |
231 | { SINGLECHAR, FALSE }, /* 127 DEL */ | |
232 | ||
233 | /* MCS - unused in English */ | |
234 | ||
235 | { SINGLECHAR, FALSE }, /* 128 */ | |
236 | { SINGLECHAR, FALSE }, /* 129 */ | |
237 | { SINGLECHAR, FALSE }, /* 130 */ | |
238 | { SINGLECHAR, FALSE }, /* 131 */ | |
239 | { SINGLECHAR, FALSE }, /* 132 */ | |
240 | { SINGLECHAR, FALSE }, /* 133 */ | |
241 | { SINGLECHAR, FALSE }, /* 134 */ | |
242 | { SINGLECHAR, FALSE }, /* 135 */ | |
243 | { SINGLECHAR, FALSE }, /* 136 */ | |
244 | { SINGLECHAR, FALSE }, /* 137 */ | |
245 | { SINGLECHAR, FALSE }, /* 138 */ | |
246 | { SINGLECHAR, FALSE }, /* 139 */ | |
247 | { SINGLECHAR, FALSE }, /* 140 */ | |
248 | { SINGLECHAR, FALSE }, /* 141 */ | |
249 | { SINGLECHAR, FALSE }, /* 142 */ | |
250 | { SINGLECHAR, FALSE }, /* 143 */ | |
251 | { SINGLECHAR, FALSE }, /* 144 */ | |
252 | { SINGLECHAR, FALSE }, /* 145 */ | |
253 | { SINGLECHAR, FALSE }, /* 146 */ | |
254 | { SINGLECHAR, FALSE }, /* 147 */ | |
255 | { SINGLECHAR, FALSE }, /* 148 */ | |
256 | { SINGLECHAR, FALSE }, /* 149 */ | |
257 | { SINGLECHAR, FALSE }, /* 150 */ | |
258 | { SINGLECHAR, FALSE }, /* 151 */ | |
259 | { SINGLECHAR, FALSE }, /* 152 */ | |
260 | { SINGLECHAR, FALSE }, /* 153 */ | |
261 | { SINGLECHAR, FALSE }, /* 154 */ | |
262 | { SINGLECHAR, FALSE }, /* 155 */ | |
263 | { SINGLECHAR, FALSE }, /* 156 */ | |
264 | { SINGLECHAR, FALSE }, /* 157 */ | |
265 | { SINGLECHAR, FALSE }, /* 158 */ | |
266 | { SINGLECHAR, FALSE }, /* 159 */ | |
267 | { SINGLECHAR, FALSE }, /* 160 */ | |
268 | { SINGLECHAR, FALSE }, /* 161 */ | |
269 | { SINGLECHAR, FALSE }, /* 162 */ | |
270 | { SINGLECHAR, FALSE }, /* 163 */ | |
271 | { SINGLECHAR, FALSE }, /* 164 */ | |
272 | { SINGLECHAR, FALSE }, /* 165 */ | |
273 | { SINGLECHAR, FALSE }, /* 166 */ | |
274 | { SINGLECHAR, FALSE }, /* 167 */ | |
275 | { SINGLECHAR, FALSE }, /* 168 */ | |
276 | { SINGLECHAR, FALSE }, /* 169 */ | |
277 | { SINGLECHAR, FALSE }, /* 170 */ | |
278 | { SINGLECHAR, FALSE }, /* 171 */ | |
279 | { SINGLECHAR, FALSE }, /* 172 */ | |
280 | { SINGLECHAR, FALSE }, /* 173 */ | |
281 | { SINGLECHAR, FALSE }, /* 174 */ | |
282 | { SINGLECHAR, FALSE }, /* 175 */ | |
283 | { SINGLECHAR, FALSE }, /* 176 */ | |
284 | { SINGLECHAR, FALSE }, /* 177 */ | |
285 | { SINGLECHAR, FALSE }, /* 178 */ | |
286 | { SINGLECHAR, FALSE }, /* 179 */ | |
287 | { SINGLECHAR, FALSE }, /* 180 */ | |
288 | { SINGLECHAR, FALSE }, /* 181 */ | |
289 | { SINGLECHAR, FALSE }, /* 182 */ | |
290 | { SINGLECHAR, FALSE }, /* 183 */ | |
291 | { SINGLECHAR, FALSE }, /* 184 */ | |
292 | { SINGLECHAR, FALSE }, /* 185 */ | |
293 | { SINGLECHAR, FALSE }, /* 186 */ | |
294 | { SINGLECHAR, FALSE }, /* 187 */ | |
295 | { SINGLECHAR, FALSE }, /* 188 */ | |
296 | { SINGLECHAR, FALSE }, /* 189 */ | |
297 | { SINGLECHAR, FALSE }, /* 190 */ | |
298 | { SINGLECHAR, FALSE }, /* 191 */ | |
299 | { SINGLECHAR, FALSE }, /* 192 */ | |
300 | { SINGLECHAR, FALSE }, /* 193 */ | |
301 | { SINGLECHAR, FALSE }, /* 194 */ | |
302 | { SINGLECHAR, FALSE }, /* 195 */ | |
303 | { SINGLECHAR, FALSE }, /* 196 */ | |
304 | { SINGLECHAR, FALSE }, /* 197 */ | |
305 | { SINGLECHAR, FALSE }, /* 198 */ | |
306 | { SINGLECHAR, FALSE }, /* 199 */ | |
307 | { SINGLECHAR, FALSE }, /* 200 */ | |
308 | { SINGLECHAR, FALSE }, /* 201 */ | |
309 | { SINGLECHAR, FALSE }, /* 202 */ | |
310 | { SINGLECHAR, FALSE }, /* 203 */ | |
311 | { SINGLECHAR, FALSE }, /* 204 */ | |
312 | { SINGLECHAR, FALSE }, /* 205 */ | |
313 | { SINGLECHAR, FALSE }, /* 206 */ | |
314 | { SINGLECHAR, FALSE }, /* 207 */ | |
315 | { SINGLECHAR, FALSE }, /* 208 */ | |
316 | { SINGLECHAR, FALSE }, /* 209 */ | |
317 | { SINGLECHAR, FALSE }, /* 210 */ | |
318 | { SINGLECHAR, FALSE }, /* 211 */ | |
319 | { SINGLECHAR, FALSE }, /* 212 */ | |
320 | { SINGLECHAR, FALSE }, /* 213 */ | |
321 | { SINGLECHAR, FALSE }, /* 214 */ | |
322 | { SINGLECHAR, FALSE }, /* 215 */ | |
323 | { SINGLECHAR, FALSE }, /* 216 */ | |
324 | { SINGLECHAR, FALSE }, /* 217 */ | |
325 | { SINGLECHAR, FALSE }, /* 218 */ | |
326 | { SINGLECHAR, FALSE }, /* 219 */ | |
327 | { SINGLECHAR, FALSE }, /* 220 */ | |
328 | { SINGLECHAR, FALSE }, /* 221 */ | |
329 | { SINGLECHAR, FALSE }, /* 222 */ | |
330 | { SINGLECHAR, FALSE }, /* 223 */ | |
331 | { SINGLECHAR, FALSE }, /* 224 */ | |
332 | { SINGLECHAR, FALSE }, /* 225 */ | |
333 | { SINGLECHAR, FALSE }, /* 226 */ | |
334 | { SINGLECHAR, FALSE }, /* 227 */ | |
335 | { SINGLECHAR, FALSE }, /* 228 */ | |
336 | { SINGLECHAR, FALSE }, /* 229 */ | |
337 | { SINGLECHAR, FALSE }, /* 230 */ | |
338 | { SINGLECHAR, FALSE }, /* 231 */ | |
339 | { SINGLECHAR, FALSE }, /* 232 */ | |
340 | { SINGLECHAR, FALSE }, /* 233 */ | |
341 | { SINGLECHAR, FALSE }, /* 234 */ | |
342 | { SINGLECHAR, FALSE }, /* 235 */ | |
343 | { SINGLECHAR, FALSE }, /* 236 */ | |
344 | { SINGLECHAR, FALSE }, /* 237 */ | |
345 | { SINGLECHAR, FALSE }, /* 238 */ | |
346 | { SINGLECHAR, FALSE }, /* 239 */ | |
347 | { SINGLECHAR, FALSE }, /* 240 */ | |
348 | { SINGLECHAR, FALSE }, /* 241 */ | |
349 | { SINGLECHAR, FALSE }, /* 242 */ | |
350 | { SINGLECHAR, FALSE }, /* 243 */ | |
351 | { SINGLECHAR, FALSE }, /* 244 */ | |
352 | { SINGLECHAR, FALSE }, /* 245 */ | |
353 | { SINGLECHAR, FALSE }, /* 246 */ | |
354 | { SINGLECHAR, FALSE }, /* 247 */ | |
355 | { SINGLECHAR, FALSE }, /* 248 */ | |
356 | { SINGLECHAR, FALSE }, /* 249 */ | |
357 | { SINGLECHAR, FALSE }, /* 250 */ | |
358 | { SINGLECHAR, FALSE }, /* 251 */ | |
359 | { SINGLECHAR, FALSE }, /* 252 */ | |
360 | { SINGLECHAR, FALSE }, /* 253 */ | |
361 | { SINGLECHAR, FALSE }, /* 254 */ | |
362 | { SINGLECHAR, FALSE } /* 255 */ | |
363 | }; | |
364 | ||
365 | /* | |
366 | **++ | |
367 | ** FUNCTION NAME: | |
368 | ** | |
369 | ** lscanline () | |
370 | ** | |
371 | ** FORMAL PARAMETERS: | |
372 | ** | |
373 | ** None | |
374 | ** | |
375 | ** RETURN VALUE: | |
376 | ** | |
377 | ** None | |
378 | ** | |
379 | ** INVARIANTS: | |
380 | ** | |
381 | ** [@description or none@] | |
382 | ** | |
383 | ** DESCRIPTION: | |
384 | ** | |
385 | ** One line of text is processed. | |
386 | ** Tokens are delivered via the call LSLScanFreshToken (). | |
387 | ** | |
388 | ** EXCEPTIONS: | |
389 | ** | |
390 | **-- | |
391 | */ | |
392 | ||
393 | void | |
394 | lscanLine (char *currentLine) | |
395 | { | |
396 | ltokenCode cod; | |
397 | lsymbol sym; | |
398 | register char c; | |
399 | register char *bufptr; | |
400 | ltoken newToken; | |
401 | ||
402 | c = *currentLine++; | |
403 | colNumber = 0; | |
404 | ||
405 | for (;;) | |
406 | { | |
407 | bufptr = &tokenBuffer[0]; | |
408 | startCol = colNumber; | |
409 | ||
410 | /*@-loopswitchbreak@*/ | |
411 | switch (lscanCharClass (c)) | |
412 | { | |
413 | ||
414 | case CHC_NULL: | |
415 | sym = lsymbol_fromChars ("E O L"); | |
416 | cod = LST_EOL; | |
417 | break; | |
418 | ||
419 | /* Identifiers */ | |
420 | ||
421 | case IDCHAR: | |
422 | ||
423 | while (lscanCharClass (c) == IDCHAR) | |
424 | { | |
425 | MOVECHAR (); | |
426 | } | |
427 | ||
428 | *bufptr = '\0'; | |
429 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
430 | cod = LST_SIMPLEID; | |
431 | break; | |
432 | ||
433 | /* One-character tokens */ | |
434 | ||
435 | case SINGLECHAR: | |
436 | case PERMCHAR: | |
437 | MOVECHAR (); | |
438 | *bufptr = '\0'; | |
439 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
440 | cod = LST_SIMPLEOP; | |
441 | break; | |
442 | ||
443 | case SLASHCHAR: | |
444 | if (*currentLine == '\\') | |
445 | { | |
446 | MOVECHAR (); | |
447 | MOVECHAR (); | |
448 | *bufptr = '\0'; | |
449 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
450 | cod = LST_SIMPLEOP; | |
451 | break; | |
452 | } | |
453 | MOVECHAR (); | |
454 | /* We fall through to next case if we have / followed */ | |
455 | /* by anything else. */ | |
456 | /*@fallthrough@*/ | |
457 | case OPCHAR: | |
458 | ||
459 | /* Operator symbols */ | |
460 | ||
461 | /* possible multi character */ | |
462 | while (lscanCharClass (c) == OPCHAR) | |
463 | { | |
464 | MOVECHAR (); | |
465 | } | |
466 | ||
467 | *bufptr = '\0'; /* null terminate in buffer */ | |
468 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
469 | cod = LST_SIMPLEOP; | |
470 | break; | |
471 | ||
472 | /* White space */ | |
473 | case WHITECHAR: | |
474 | /*@-switchswitchbreak@*/ | |
475 | switch (c) | |
476 | { | |
477 | case '\t': | |
478 | MOVECHAR (); | |
479 | colNumber--; | |
480 | colNumber += TABSIZE; | |
481 | colNumber -= (colNumber % TABSIZE); | |
482 | break; | |
483 | ||
484 | case '\v': | |
485 | case '\f': | |
486 | MOVECHAR (); | |
487 | colNumber--; | |
488 | break; | |
489 | ||
490 | default: | |
491 | MOVECHAR (); | |
492 | break; | |
493 | } | |
494 | *bufptr = '\0'; | |
495 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
496 | cod = LST_WHITESPACE; | |
497 | break; | |
498 | ||
499 | case CHC_EXTENSION: | |
500 | MOVECHAR (); | |
501 | ||
502 | switch (c) | |
503 | { | |
504 | ||
505 | /* open and close */ | |
506 | case '(': | |
507 | MOVECHAR (); | |
508 | while (lscanCharClass (c) == IDCHAR) | |
509 | { | |
510 | MOVECHAR (); | |
511 | } | |
512 | *bufptr = '\0'; | |
513 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
514 | cod = LST_OPENSYM; | |
515 | break; | |
516 | ||
517 | case ')': | |
518 | MOVECHAR (); | |
519 | while (lscanCharClass (c) == IDCHAR) | |
520 | { | |
521 | MOVECHAR (); | |
522 | } | |
523 | *bufptr = '\0'; | |
524 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
525 | cod = LST_CLOSESYM; | |
526 | break; | |
527 | ||
528 | /* separator */ | |
529 | case ',': | |
530 | MOVECHAR (); | |
531 | while (lscanCharClass (c) == IDCHAR) | |
532 | { | |
533 | MOVECHAR (); | |
534 | } | |
535 | *bufptr = '\0'; | |
536 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
537 | cod = LST_SEPSYM; | |
538 | break; | |
539 | ||
540 | /* simpleid */ | |
541 | case ':': | |
542 | MOVECHAR (); | |
543 | while (lscanCharClass (c) == IDCHAR) | |
544 | { | |
545 | MOVECHAR (); | |
546 | } | |
547 | *bufptr = '\0'; | |
548 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
549 | cod = LST_SIMPLEID; | |
550 | break; | |
551 | ||
552 | default: | |
553 | if (lscanCharClass (c) == IDCHAR) | |
554 | { | |
555 | do | |
556 | { | |
557 | MOVECHAR (); | |
558 | } | |
559 | while (lscanCharClass (c) == IDCHAR); | |
560 | *bufptr = '\0'; | |
561 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
562 | cod = LST_SIMPLEOP; | |
563 | } | |
564 | else | |
565 | { | |
566 | /* | |
567 | ** Meets none of the above. Take the extension | |
568 | ** character and the character following and treat | |
569 | ** together as a SINGLECHAR. SINGLECHARs tranlate into | |
570 | ** SIMPLEOPs. | |
571 | */ | |
572 | ||
573 | MOVECHAR (); | |
574 | *bufptr = '\0'; | |
575 | sym = lsymbol_fromChars (&tokenBuffer[0]); | |
576 | cod = LST_SIMPLEOP; | |
577 | } | |
578 | break; | |
579 | } | |
580 | /*@switchbreak@*/ break; | |
581 | /*@=switchswitchbreak@*/ | |
582 | default: | |
583 | ||
584 | LocalUserError ("unexpected character in input"); | |
585 | return; | |
586 | } | |
587 | /*@=loopswitchbreak@*/ | |
588 | ||
589 | /* | |
590 | ** Above code only "guessed" at token type. Insert it into the | |
591 | ** TokenTable. If the token already exists, it is returned as | |
592 | ** previously defined. If it does not exist, it is inserted as the | |
593 | ** token code computed above. | |
594 | */ | |
595 | ||
596 | newToken = LSLInsertToken (cod, sym, 0, FALSE); | |
597 | ||
598 | if (LSLIsSyn (ltoken_getText (newToken))) | |
599 | { | |
600 | /* | |
601 | ** Token is a synonym. Get the actual token and set the raw | |
602 | ** text to the synonym name. | |
603 | */ | |
604 | ||
605 | newToken = LSLGetTokenForSyn (ltoken_getText (newToken)); | |
606 | ltoken_setRawText (newToken, sym); | |
607 | } | |
608 | ||
609 | ltoken_setCol (newToken, startCol); | |
28bf4b0b | 610 | ltoken_setLine (newToken, inputStream_thisLineNumber (LSLScanSource ())); |
611 | ltoken_setFileName (newToken, inputStream_fileName (LSLScanSource ())); | |
616915dd | 612 | |
613 | if (ltoken_getCode (newToken) == LST_COMMENTSYM) | |
614 | { | |
615 | bufptr = &tokenBuffer[0]; | |
616 | ||
617 | while (!LSLIsEndComment (c)) | |
618 | { | |
619 | MOVECHAR (); | |
620 | } | |
621 | if (lscanCharClass (c) != CHC_NULL) | |
622 | { | |
623 | MOVECHAR (); | |
624 | } | |
625 | if (reportComments) | |
626 | { | |
627 | *bufptr = '\0'; | |
628 | ltoken_setRawText (newToken, lsymbol_fromChars (&tokenBuffer[0])); | |
629 | LSLScanFreshToken (newToken); | |
630 | } | |
631 | } | |
632 | else if (ltoken_getCode (newToken) == LST_EOL) | |
633 | { | |
634 | if (reportEOL) | |
635 | { | |
636 | LSLScanFreshToken (newToken); | |
637 | } | |
638 | return; | |
639 | } | |
640 | else | |
641 | { | |
642 | if (cod != LST_WHITESPACE) | |
643 | { | |
644 | LSLScanFreshToken (newToken); | |
645 | } | |
646 | } | |
647 | } | |
648 | } | |
649 | ||
650 | ltoken | |
651 | LSLScanEofToken (void) | |
652 | { | |
653 | ltoken t = ltoken_copy (LSLInsertToken (LEOFTOKEN, | |
654 | lsymbol_fromChars ("E O F"), | |
655 | 0, TRUE)); | |
656 | ltoken_setCol (t, colNumber); | |
28bf4b0b | 657 | ltoken_setLine (t, inputStream_thisLineNumber (LSLScanSource ())); |
658 | ltoken_setFileName (t, inputStream_fileName (LSLScanSource ())); | |
616915dd | 659 | return t; |
660 | } | |
661 | ||
662 | void | |
663 | LSLReportEolTokens (bool setting) | |
664 | { | |
665 | reportEOL = setting; | |
666 | } | |
667 | ||
668 | static void | |
669 | LocalUserError (char *msg) | |
670 | { | |
28bf4b0b | 671 | inputStream s = LSLScanSource (); |
672 | llfatalerror (message ("%s:%d,%d: %s", | |
673 | inputStream_fileName (s), | |
674 | inputStream_thisLineNumber (s), colNumber, | |
616915dd | 675 | cstring_fromChars (msg))); |
676 | } | |
677 | ||
678 | /* | |
679 | **++ | |
680 | ** FUNCTION NAME: | |
681 | ** | |
682 | ** lscanLineInit () | |
683 | ** | |
684 | ** FORMAL PARAMETERS: | |
685 | ** | |
686 | ** None | |
687 | ** | |
688 | ** RETURN VALUE: | |
689 | ** | |
690 | ** None | |
691 | ** | |
692 | ** INVARIANTS: | |
693 | ** | |
694 | ** [@description or none@] | |
695 | ** | |
696 | ** DESCRIPTION: | |
697 | ** | |
698 | ** Initialize this module (should only be called once). | |
699 | ** | |
700 | ** IMPLICIT INPUTS/OUTPUT: | |
701 | ** | |
702 | ** GetNextLine - (output) initialized | |
703 | ** NullToken - (output) initialized | |
704 | ** PrintName - (output) array contents initialized | |
705 | ** | |
706 | ** EXCEPTIONS: | |
707 | ** | |
708 | ** None | |
709 | **-- | |
710 | */ | |
711 | ||
712 | void | |
713 | lscanLineInit (void) | |
714 | { | |
715 | int i; | |
716 | ||
717 | reportEOL = FALSE; | |
718 | reportComments = FALSE; | |
719 | ||
720 | for (i = 0; i <= LASTCHAR; i++) | |
721 | { | |
722 | charClass[i] = charClassDef[i]; | |
723 | } | |
724 | ||
725 | /* | |
726 | ** NOTE: The following line ensures that all tokens have nonzero | |
727 | ** handles, so that a handle of zero can be used to indicate that a | |
728 | ** token does not have a synonym. | |
729 | */ | |
730 | ||
731 | (void) LSLReserveToken (LST_SIMPLEID, "dummy token"); | |
732 | ||
733 | ltoken_forall = LSLReserveToken (LST_QUANTIFIERSYM, "\\forall"); | |
734 | ltoken_true = LSLReserveToken (LST_SIMPLEID, "true"); | |
735 | ltoken_false = LSLReserveToken (LST_SIMPLEID, "false"); | |
736 | ltoken_not = LSLReserveToken (LST_SIMPLEOP, "\\not"); | |
737 | ltoken_and = LSLReserveToken (LST_LOGICALOP, "\\and"); | |
738 | ltoken_or = LSLReserveToken (LST_LOGICALOP, "\\or"); | |
739 | ltoken_implies = LSLReserveToken (LST_LOGICALOP, "\\implies"); | |
740 | ||
741 | ltoken_eq = LSLReserveToken (LST_EQOP, "\\eq"); | |
742 | ltoken_neq = LSLReserveToken (LST_EQOP, "\\neq"); | |
743 | ||
744 | ltoken_equals = LSLReserveToken (LST_EQUATIONSYM, "\\equals"); | |
745 | ltoken_eqsep = LSLReserveToken (LST_EQSEPSYM, "\\eqsep"); | |
746 | ltoken_select = LSLReserveToken (LST_SELECTSYM, "\\select"); | |
747 | ltoken_open = LSLReserveToken (LST_OPENSYM, "\\open"); | |
748 | ltoken_sep = LSLReserveToken (LST_SEPSYM, "\\,"); | |
749 | ltoken_close = LSLReserveToken (LST_CLOSESYM, "\\close"); | |
750 | ltoken_id = LSLReserveToken (LST_SIMPLEID, "\\:"); | |
751 | ltoken_arrow = LSLReserveToken (LST_MAPSYM, "\\arrow"); | |
752 | ltoken_farrow = LSLReserveToken (LST_FIELDMAPSYM, "\\field_arrow"); | |
753 | ||
754 | ltoken_marker = LSLReserveToken (LST_MARKERSYM, "\\marker"); | |
755 | ltoken_comment = LSLReserveToken (LST_COMMENTSYM, "\\comment"); | |
756 | ltoken_compose = LSLReserveToken (LST_COMPOSESYM, "\\composeSort"); | |
757 | ltoken_if = LSLReserveToken (LST_ifTOKEN, "if"); | |
758 | ||
bb7c2085 | 759 | (void) LSLReserveToken (LST_LPAR, " ("); |
616915dd | 760 | (void) LSLReserveToken (LST_RPAR, ")"); |
761 | (void) LSLReserveToken (LST_COMMA, ","); | |
762 | (void) LSLReserveToken (LST_COLON, ":"); | |
763 | ||
764 | (void) LSLReserveToken (LST_LBRACKET, "["); | |
765 | (void) LSLReserveToken (LST_RBRACKET, "]"); | |
766 | ||
767 | (void) LSLReserveToken (LST_WHITESPACE, " "); | |
768 | (void) LSLReserveToken (LST_WHITESPACE, "\t"); | |
769 | (void) LSLReserveToken (LST_WHITESPACE, "\n"); | |
770 | ||
771 | (void) LSLReserveToken (LEOFTOKEN, "E O F"); | |
772 | (void) LSLReserveToken (LST_EOL, "E O L"); | |
773 | ||
774 | (void) LSLReserveToken (LST_assertsTOKEN, "asserts"); | |
775 | (void) LSLReserveToken (LST_assumesTOKEN, "assumes"); | |
776 | (void) LSLReserveToken (LST_byTOKEN, "by"); | |
777 | (void) LSLReserveToken (LST_convertsTOKEN, "converts"); | |
778 | (void) LSLReserveToken (LST_elseTOKEN, "else"); | |
779 | (void) LSLReserveToken (LST_enumerationTOKEN, "enumeration"); | |
780 | (void) LSLReserveToken (LST_equationsTOKEN, "equations"); | |
781 | (void) LSLReserveToken (LST_exemptingTOKEN, "exempting"); | |
782 | (void) LSLReserveToken (LST_forTOKEN, "for"); | |
783 | (void) LSLReserveToken (LST_generatedTOKEN, "generated"); | |
784 | (void) LSLReserveToken (LST_impliesTOKEN, "implies"); | |
785 | (void) LSLReserveToken (LST_includesTOKEN, "includes"); | |
786 | (void) LSLReserveToken (LST_introducesTOKEN, "introduces"); | |
787 | (void) LSLReserveToken (LST_ofTOKEN, "of"); | |
788 | (void) LSLReserveToken (LST_partitionedTOKEN, "partitioned"); | |
789 | (void) LSLReserveToken (LST_thenTOKEN, "then"); | |
790 | (void) LSLReserveToken (LST_traitTOKEN, "trait"); | |
791 | (void) LSLReserveToken (LST_tupleTOKEN, "tuple"); | |
792 | (void) LSLReserveToken (LST_unionTOKEN, "union"); | |
793 | } | |
794 | ||
795 | void | |
796 | lscanLineReset (void) | |
797 | { | |
798 | } | |
799 | ||
800 | void | |
801 | lscanLineCleanup (void) | |
802 | { | |
803 | } | |
804 | ||
805 | charCode lscanCharClass (char c) | |
806 | { | |
bb7c2085 | 807 | return charClass[ (int) (c)].code; |
616915dd | 808 | } |
809 | ||
810 | bool LSLIsEndComment (char c) | |
811 | { | |
bb7c2085 | 812 | return charClass[ (int) (c)].endCommentChar; |
616915dd | 813 | } |
814 | ||
815 | void lsetCharClass (char c, charCode cod) | |
816 | { | |
bb7c2085 | 817 | charClass[ (int) (c)].code = cod; |
616915dd | 818 | } |
819 | ||
820 | void lsetEndCommentChar (char c, bool flag) | |
821 | { | |
bb7c2085 | 822 | charClass[ (int) (c)].endCommentChar = flag; |
616915dd | 823 | } |