3 * (C) Copyright Apr 15 1995, Edmond J. Breen.
5 * This code may be copied for personal, non-profit use only.
9 * First written V1.0 1984 (E.J.B)
10 * Revised V1.2 1994 (E.J.B)
11 * Revised V2.0 1995 (E.J.B)
29 static void retract(char c);
30 static int fail(int ival, int c);
31 static int charliteral(int c);
32 static void EiC_stringliteral(void);
33 static void success(int ival);
36 char *nextproline(void);
38 int fail(int ival, int c);
39 int charliteral(int c);
40 void stringliteral(void);
41 void success(int ival);
44 #define NEWLINE(ch) (ch == '\n' ? TRUE: FALSE)
45 #define LETTER(ch) ((ch >= 'a' && ch <= 'z') ? TRUE :\
46 ((ch >= 'A' && ch <= 'Z') ? TRUE :\
47 ((ch == '_' || ch == '$')? TRUE : FALSE)))
49 #define DIGIT(ch) ((ch >= '0' && ch <= '9') ? TRUE : FALSE)
50 #define WHITE(ch) ((ch == ' ' || ch == '\t')? TRUE : FALSE)
56 {"__eiclongjmp",eiclongjmpsym,},
57 {"__eicsetjmp",eicsetjmpsym,},
64 {"continue", continuesym,},
65 {"default", defaultsym,},
67 {"double", doublesym,},
70 {"extern", externsym,},
77 {"register", registersym,},
78 {"return", returnsym,},
81 {"signed", signedsym,},
82 {"sizeof", sizeofsym,},
83 {"static", staticsym,},
84 {"struct", structsym,},
85 {"switch", switchsym,},
86 {"typedef", typedefsym,},
88 {"unsafe", unsafesym,},
89 {"unsigned", unsignedsym,},
91 {"volatile", volatilesym,},
99 char EiC_LEXEM[BSIZE+1];
101 token_t *token = &TOK;
105 token_t EiC_TokenArray[MAX_TOKENS];
106 int EiC_TokenI = 0, EiC_TokenP = 0, EiC_TokenR=0;
110 unsigned short STOKEN;
117 unsigned lex_lineno, lex_lineprev, lex_linepos, lex_linelen, lex_curpos = 0,
121 static int state = 0;
122 static unsigned long lexival;
123 static double lexfval;
124 static char Lseen, /* long seen */
125 Fseen, /* floating point seen */
126 Useen, /* Unsigned specifier seen */
127 Hseen; /* Hex or Octal value seen */
129 #define getoct(x) (((x)>='0'&&(x)<='7')? (x)-'0':-1)
132 static int gethex(int c)
134 if (c >= '0' && c <= '9')
136 if (c >= 'a' && c <= 'f')
138 if (c >= 'A' && c <= 'F')
144 void EiC_initlex(char *str)
152 EiC_TokenR = EiC_TokenP = EiC_TokenI = 0;
163 char *EiC_nextproline();
164 if (lex_buff[lex_curpos] == EOF) {
167 else if (lex_buff[lex_curpos] == '\0') {
169 if (lex_curpos != lex_lastpos)
172 lex_buff = EiC_nextproline();
173 lex_lastpos = lex_curpos = 0;
177 if (lex_lineprev != lex_lineno) {
178 lex_linepos = lex_curpos;
179 lex_lineprev = lex_lineno;
181 return (lex_buff[lex_curpos++]);
184 static void checkExt(int c)
185 { /* check for unsigned and long suffix */
186 Lseen = Useen = Fseen = 0;
187 if (c == 'f' || c == 'F')
189 else if (c == 'u' || c == 'U') {
191 if ((c = EiC_nextchar()) == 'l' || c == 'L')
195 } else if (c == 'l' || c == 'L')
201 static void retract(char c)
205 if (lex_curpos < lex_lastpos)
206 lex_lastpos = lex_curpos;
211 static int fail(int ival, int c)
215 case RELOP: return (10);
216 case ID: return (20);
218 case INT: return (100);
223 static void setfloatval(void)
226 if (Lseen || lexfval > FLT_MAX || !Fseen ) {
227 token->Val.dval = lexfval;
230 /* mandatory conversion to float */
238 static void setintval(void)
246 if (Lseen || lexival > UINT_MAX) {
248 token->Val.ulval = lexival;
251 token->Val.uival = (unsigned) lexival;
253 } else if (Lseen || lexival > UINT_MAX) {
254 if (lexival > ULONG_MAX) {
256 token->Val.ulval = lexival;
257 } else if (lexival >= ULONG_MAX) {
259 token->Val.ulval = lexival;
261 token->Val.lval = lexival;
265 if (lexival <= INT_MAX) {
266 token->Val.ival = (int) lexival;
268 } else if(Hseen && lexival <= UINT_MAX) {
270 token->Val.uival = (unsigned) lexival;
271 } else if(lexival <= LONG_MAX) {
272 token->Val.lval = (long) lexival;
276 token->Val.ulval = lexival;
281 static void success(int ival)
285 size = (int) (lex_curpos - lex_lastpos);
286 memcpy(EiC_LEXEM, &lex_buff[lex_lastpos], size);
287 EiC_LEXEM[size] = '\0';
295 if ((token->Tok = EiC_iskeyword(cwords, EiC_LEXEM,
296 sizeof(cwords) / sizeof(keyword_t))) == 0) {
298 /* search for id in various name spaces */
299 if ((token->Val.sym = EiC_lookup(EiC_work_tab, EiC_LEXEM)) == NULL)
300 token->Val.sym = EiC_insertLUT(EiC_work_tab, EiC_LEXEM, ID);
302 if (token->Val.sym->sclass == c_typedef)
303 token->Tok = TYPENAME;
308 EiC_error("Declaration syntax error");
309 for (lexival = 0, i = 0; i < size; i++)
310 lexival = lexival * 8 + getoct(EiC_LEXEM[i]);
315 for (lexival = 0, i = 2; i < size; i++)
316 lexival = lexival * 16 + gethex(EiC_LEXEM[i]);
321 for (lexival = 0, i = 0; i < size; i++)
322 lexival = lexival * 10 + EiC_LEXEM[i] - '0';
327 EiC_error("Declaration syntax error");
328 lexfval = atof(EiC_LEXEM);
338 static int WASLITERAL;
339 static int charliteral(int c)
342 switch ((c = EiC_nextchar())) {
343 case 'n': c = '\n'; break; /* newline */
344 case 't': c = '\t'; break; /* tabspace */
345 case 'v': c = '\v'; break; /* vertical tab */
346 case 'b': c = '\b'; break; /* backspace */
347 case 'r': c = '\r'; break; /* carriage return */
348 case 'f': c = '\f'; break; /* formfeed */
349 case 'a': c = '\a'; break; /* bell */
350 case '\\': c = '\\'; break; /* backslash */
351 case '\'': c = '\''; break; /* single quote */
352 case '"': c = '\"'; break; /* double quote */
353 case '?': c = '\?'; break; /* question mark */
354 case 'x': /* string of hex characters */
357 while ((i = gethex((c = EiC_nextchar()))) > -1) {
362 EiC_error("Illegal character hex value");
367 if (getoct(c) > -1) { /* octal characters */
369 while ((i = getoct(c)) > -1) {
375 EiC_error("Illegal character octal value");
378 EiC_error("Illegal character escape sequence `\\%c'", c);
384 return ((signed char )c);
387 static void EiC_stringliteral(void)
389 unsigned size, lastsize = 0, c;
391 lex_lastpos = lex_curpos;
394 for (size = 0; ((c = charliteral(EiC_nextchar())) != '\0' || WASLITERAL) &&
395 !(c == '"' && !WASLITERAL) && size < BSIZE; size++)
400 p = (char *) xrealloc(p, lastsize + size + 1);
402 p = (char *) xcalloc(size + 1, sizeof(char));
404 memcpy(&p[lastsize], EiC_LEXEM, size);
407 if(c != '"' && size == BSIZE) {
413 EiC_error("String literal error");
418 lex_lastpos++, lex_lineno++;
419 } while (WHITE(c) || c == '\n');
421 lex_lastpos = lex_curpos;
426 } while (c == '"' || size == BSIZE);
429 token->Val.p.sp = token->Val.p.p = p;
430 token->Val.p.ep = p + lastsize + 1;
433 extern int EiC_lexan(void)
435 int t=0, loop; char c=0, EiC_nextchar();
439 token = &EiC_TokenArray[EiC_TokenP];
444 EiC_TokenP=(EiC_TokenP+1)%MAX_TOKENS;
451 if (STOKEN != NOTOKEN) {
462 case 0: lex_lastpos = lex_curpos; c = EiC_nextchar();
463 state = (WHITE(c) ? 0 :
464 (c == '\n' ? lex_lineno++, 0 :
465 (c == '<' ? t = LT, 1 :
466 (c == '>' ? t = GT, 2 :
467 (c == '+' ? t = '+', 3 :
468 (c == '-' ? t = '-', 4 :
469 (c == '|' ? t = BOR, 5 :
470 (c == '&' ? t = AND, 6 :
474 (c == '/' ? t = '/', c = EiC_nextchar(), 50 :
475 (c == '%' ? t = '%', c = EiC_nextchar(), 50 :
476 (c == '*' ? t = '*', c = EiC_nextchar(), 50 :
477 (c == '=' ? t = ASS, c = EiC_nextchar(), 50 :
478 (c == '!' ? t = NOT, c = EiC_nextchar(), 50 :
479 (c == '^' ? t = XOR, c = EiC_nextchar(), 50 :
480 fail(RELOP, c))))))))))))))))));
482 case 1: /* get <, <= and << */
483 if ((c = EiC_nextchar()) == '<') t = LSHT;
486 case 2: /* get >, >= and >> */
487 if ((c = EiC_nextchar()) == '>') t = RSHT;
490 case 3: c = EiC_nextchar(); /* get +, += or ++ */
491 if (c == '+') t = INC, state = 60;
494 case 4: c = EiC_nextchar(); /* get -, -= -- */
496 if (c == '-') t = DEC;
497 else if (c == '>') t = RARROW;
500 case 5: c = EiC_nextchar(); /* get |, |= or || */
501 if (c == '|') t = LOR, state = 60;
504 case 6: c = EiC_nextchar(); /* get &, &= or && */
505 if (c == '&') t = LAND, state = 60;
508 case 7:token->Val.ival = charliteral(EiC_nextchar()); /* char_constants */
510 if (EiC_nextchar() != '\'')
511 EiC_error("Missing single quote '");
514 case 8: EiC_stringliteral(); /* string literals */
516 /*return STR;*/ loop = 0; break;
517 case 9: c = EiC_nextchar();
525 case 10: c = EiC_nextchar(); /* identifiers and keywords */
526 state = (LETTER(c) ? 11 :
527 (c == '_' ? 11 : fail(ID, c)));
529 case 11: c = EiC_nextchar();
530 state = (LETTER(c) ? 11 :
532 (c == '_' ? 11 : 12)));
534 case 12: retract(c); success(ID); /*return (token->Tok);*/ loop = 0; break;
536 case 20: c = EiC_nextchar(); /* integers and reals */
537 state = (c == '0' ? 30 :
538 (DIGIT(c) ? 21 : fail(INT, c)));
540 case 21: c = EiC_nextchar();
541 state = (DIGIT(c) ? 21 :
544 (c == 'E' ? 23 : 25))));
546 case 22: c = EiC_nextchar();
547 state = (DIGIT(c) ? 22 :
549 (c == 'E' ? 23 : 26)));
551 case 23: c = EiC_nextchar();
552 state = (c == '+' ? 24 :
554 (DIGIT(c) ? 24 : fail(FLOAT, c) /* ??? */ )));
556 case 24: c = EiC_nextchar();
557 state = (DIGIT(c) ? 24 : 26);
559 case 25: checkExt(c); success(INT); /*return (token->Tok);*/ loop = 0; break;
560 case 26: checkExt(c); success(FLOAT); /*return (token->Tok);*/ loop = 0; break;
561 case 27: checkExt(c); success(HEX); /*return (token->Tok);*/ loop = 0; break;
562 case 28: checkExt(c); success(OCTAL); /*return (token->Tok);*/ loop = 0; break;
563 case 30: /* check for octal and hex numbers */
564 if ((c = EiC_nextchar()) == 'x' || c == 'X') {
565 while (gethex((c = EiC_nextchar())) > -1);
569 if (c != '.' && c != 'e' && c != 'E') {
570 while (getoct(c) > -1)
575 retract(c); state = 21; break;
576 case 50: /* mix with equal's */
579 case '+': t = ADDEQ; break; /* += */
580 case '-': t = SUBEQ; break; /* -= */
581 case '/': t = DIVEQ; break; /* /= */
582 case '*': t = MULEQ; break; /* *= */
583 case '%': t = MODEQ; break; /* %= */
584 case ASS: t = EQ; break; /* == */
585 case GT: t = GE; break; /* >= */
586 case LT: t = LE; break; /* <= */
587 case NOT: t = NE; break; /* != */
588 case RSHT:t = RSHTEQ; break; /* >>= */
589 case LSHT:t = LSHTEQ; break; /* <<= */
590 case AND: t = ANDEQ; break; /* &= */
591 case BOR: t = BOREQ; break; /* |= */
592 case XOR: t = XOREQ; break; /* ^= */
597 case 60: success(MISC); token->Tok = t; /*return (token->Tok);*/ loop = 0; break;
598 case 100: token->Tok = EiC_nextchar(); /*return (token->Tok);*/ loop = 0; break;
604 if(EiC_TokenI<MAX_TOKENS)
607 EiC_TokenP = (EiC_TokenP +1)%MAX_TOKENS;