/* Copyright 2009 * Kaz Kylheku <kkylheku@gmail.com> * Vancouver, Canada * All rights reserved. * * BSD License: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ %{ #include <stdio.h> #include <string.h> #include <stdarg.h> #include <stdlib.h> #include <limits.h> #include <errno.h> #include <dirent.h> #include "y.tab.h" #include "lib.h" #include "gc.h" #include "stream.h" #include "parser.h" #define YY_NO_UNPUT #define YY_INPUT(buf, result, max_size) \ do { \ obj_t *c = nil; \ int n, ch; \ for (n = 0; n < max_size && \ (c = get_char(yyin_stream)) && \ (ch = c_chr(c)) != '\n'; ++n) \ buf[n] = (char) ch; \ if (ch == '\n') \ buf[n++] = (char) ch; \ result = n; \ } while (0) obj_t *yyin_stream; long lineno = 1; int opt_loglevel = 1; /* 0 - quiet; 1 - normal; 2 - verbose */ int opt_nobindings = 0; int opt_arraydims = 1; int errors; void yyerror(const char *s) { yyerrorf("%s", s); } void yyerrorf(const char *s, ...) { if (opt_loglevel >= 1) { va_list vl; va_start (vl, s); fprintf(stderr, "%s: (%s:%ld): ", progname, spec_file, lineno); vfprintf(stderr, s, vl); putc('\n', stderr); va_end (vl); } errors++; } void yybadtoken(int tok, const char *context) { const char *problem = 0; switch (tok) { case TEXT: problem = "text"; break; case IDENT: problem = "identifier"; break; case ALL: problem = "\"all\""; break; case SOME: problem = "\"some\""; break; case NONE: problem = "\"none\""; break; case MAYBE: problem = "\"maybe\""; break; case CASES: problem = "\"cases\""; break; case AND: problem = "\"and\""; break; case OR: problem = "\"or\""; break; case END: problem = "\"end\""; break; case COLLECT: problem = "\"collect\""; break; case UNTIL: problem = "\"until\""; break; case COLL: problem = "\"coll\""; break; case OUTPUT: problem = "\"output\""; break; case REPEAT: problem = "\"repeat\""; break; case REP: problem = "\"rep\""; break; case SINGLE: problem = "\"single\""; break; case FIRST: problem = "\"first\""; break; case LAST: problem = "\"last\""; break; case EMPTY: problem = "\"empty\""; break; case DEFINE: problem = "\"define\""; break; case TRY: problem = "\"try\""; break; case CATCH: problem = "\"catch\""; break; case FINALLY: problem = "\"finally\""; break; case NUMBER: problem = "\"number\""; break; case REGCHAR: problem = "regular expression character"; break; case LITCHAR: problem = "string literal character"; break; } if (problem != 0) if (context) yyerrorf("misplaced %s in %s", problem, context); else yyerrorf("unexpected %s", problem); else if (context) yyerrorf("unterminated %s", context); else yyerrorf("unexpected end of input"); } static int char_esc(int letter) { switch (letter) { case 'a': return '\a'; case 'b': return '\b'; case 't': return '\t'; case 'n': return '\n'; case 'v': return '\v'; case 'f': return '\f'; case 'r': return '\r'; case 'e': return 27; case '"': return '"'; case '\'': return '\''; case '`': return '`'; } abort(); } static int num_esc(char *num) { if (num[0] == 'x') { if (strlen(num) > 3) yyerror("too many digits in hex character escape"); return strtol(num + 1, 0, 16); } else { if (strlen(num) > 3) yyerror("too many digits in octal character escape"); return strtol(num, 0, 8); } } %} %option stack TOK [a-zA-Z_][a-zA-Z0-9_]*|[+-]?[0-9]+ ID_END [^a-zA-Z0-9_] NUM_END [^0-9] WS [\t ]* HEX [0-9A-Fa-f] OCT [0-7] %x SPECIAL NESTED REGEX REGCLASS STRLIT CHRLIT QSILIT %% <SPECIAL,NESTED>{TOK} { long val; char *errp; errno = 0; val = strtol(yytext, &errp, 10); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); if (*errp != 0) { /* not a number */ yylval.lexeme = strdup(yytext); return IDENT; } if ((val == LONG_MAX || val == LONG_MIN) && errno == ERANGE) yyerror("numeric overflow in token"); yylval.num = val; return NUMBER; } <SPECIAL>\({WS}all{WS}\) { yy_pop_state(); return ALL; } <SPECIAL>\({WS}some{WS}\) { yy_pop_state(); return SOME; } <SPECIAL>\({WS}none{WS}\) { yy_pop_state(); return NONE; } <SPECIAL>\({WS}maybe{WS}\) { yy_pop_state(); return MAYBE; } <SPECIAL>\({WS}cases{WS}\) { yy_pop_state(); return CASES; } <SPECIAL>\({WS}and{WS}\) { yy_pop_state(); return AND; } <SPECIAL>\({WS}or{WS}\) { yy_pop_state(); return OR; } <SPECIAL>\({WS}end{WS}\) { yy_pop_state(); return END; } <SPECIAL>\({WS}collect{WS}\) { yy_pop_state(); return COLLECT; } <SPECIAL>\({WS}coll{WS}\) { yy_pop_state(); return COLL; } <SPECIAL>\({WS}until{WS}\) { yy_pop_state(); return UNTIL; } <SPECIAL>\({WS}output/{ID_END} { yy_push_state(NESTED); return OUTPUT; } <SPECIAL>\({WS}repeat{WS}\) { yy_pop_state(); return REPEAT; } <SPECIAL>\({WS}rep{WS}\) { yy_pop_state(); return REP; } <SPECIAL>\({WS}single{WS}\) { yy_pop_state(); return SINGLE; } <SPECIAL>\({WS}first{WS}\) { yy_pop_state(); return FIRST; } <SPECIAL>\({WS}last{WS}\) { yy_pop_state(); return LAST; } <SPECIAL>\({WS}empty{WS}\) { yy_pop_state(); return EMPTY; } <SPECIAL>\({WS}define/{ID_END} { yy_push_state(NESTED); return DEFINE; } <SPECIAL>\({WS}try{WS}\) { yy_pop_state(); return TRY; } <SPECIAL>\({WS}catch/{ID_END} { yy_push_state(NESTED); return CATCH; } <SPECIAL>\({WS}finally{WS}\) { yy_pop_state(); return FINALLY; } <SPECIAL,NESTED>\{|\( { yy_push_state(NESTED); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); return yytext[0]; } <SPECIAL,NESTED>\}|\) { yy_pop_state(); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); return yytext[0]; } <SPECIAL,NESTED>[\t ]+ { /* Eat whitespace in directive */ } <SPECIAL,NESTED>\" { yy_push_state(STRLIT); return '"'; } <SPECIAL,NESTED>\' { yy_push_state(CHRLIT); return '\''; } <SPECIAL,NESTED>` { yy_push_state(QSILIT); return '`'; } <SPECIAL>@ { yy_pop_state(); yylval.lexeme = strdup("@"); return TEXT; } <SPECIAL,NESTED>\n { lineno++; } <SPECIAL,NESTED>[/] { yy_push_state(REGEX); return '/'; } <SPECIAL,NESTED>\. { yylval.chr = '.'; return '.'; } <SPECIAL>[\\][abtnvfre] { char lexeme[2]; lexeme[0] = char_esc(yytext[1]); lexeme[1] = 0; yylval.lexeme = strdup(lexeme); yy_pop_state(); return TEXT; } <SPECIAL>[\\](x{HEX}+|{OCT}+) { char lexeme[2]; lexeme[0] = num_esc(yytext + 1); lexeme[1] = 0; yylval.lexeme = strdup(lexeme); yy_pop_state(); return TEXT; } <SPECIAL,NESTED>. { yyerrorf("bad character in directive: '%c'", yytext[0]); } <REGEX>[/] { yy_pop_state(); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); yylval.chr = '/'; return '/'; } <REGEX>[\\][abtnvfre] { yylval.chr = char_esc(yytext[1]); return REGCHAR; } <REGEX>[\\](x{HEX}+|{OCT}+) { yylval.chr = num_esc(yytext + 1); return REGCHAR; } <REGEX>\n { lineno++; yyerror("newline in regex"); } <REGEX>[.*?+^] { yylval.chr = yytext[0]; return yytext[0]; } <REGEX>[\[\]\-] { yylval.chr = yytext[0]; return yytext[0]; } <REGEX>[()|] { yylval.chr = yytext[0]; return yytext[0]; } <REGEX>[\\]. { yylval.chr = yytext[1]; return REGCHAR; } <REGEX>. { yylval.chr = yytext[0]; return REGCHAR; } <INITIAL>[^@\n]+ { yylval.lexeme = strdup(yytext); return TEXT; } <INITIAL>\n { lineno++; return '\n'; } <INITIAL>@{WS}\* { yy_push_state(SPECIAL); return '*'; } <INITIAL>@ { yy_push_state(SPECIAL); } <INITIAL>^@#.*\n { /* eat whole line comment */ lineno++; } <INITIAL>@#.* { /* comment to end of line */ } <STRLIT>\" { yy_pop_state(); return yytext[0]; } <CHRLIT>\' { yy_pop_state(); return yytext[0]; } <QSILIT>` { yy_pop_state(); return yytext[0]; } <STRLIT,CHRLIT,QSILIT>[\\][abtnvfre"`'] { yylval.chr = char_esc(yytext[1]); return LITCHAR; } <STRLIT,CHRLIT>[\\](x{HEX}+|{OCT}+) { yylval.chr = num_esc(yytext + 1); return LITCHAR; } <STRLIT>\n { yyerror("newline in string literal"); lineno++; yylval.chr = yytext[0]; return LITCHAR; } <CHRLIT>\n { yyerror("newline in character literal"); lineno++; yylval.chr = yytext[0]; return LITCHAR; } <QSILIT>\n { yyerror("newline in string quasiliteral"); lineno++; yylval.chr = yytext[0]; return LITCHAR; } <QSILIT>@ { yy_push_state(SPECIAL); } <STRLIT,CHRLIT,QSILIT>. { yylval.chr = yytext[0]; return LITCHAR; } %%