diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2017-07-31 17:33:59 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2017-07-31 17:40:55 -0700 |
commit | 0b38bc996c4c7e2693931bbd5103c7772b56b4bd (patch) | |
tree | 8e74fd6b7efc3a0fb87037b2bb58b9d8c6129339 /parser.l | |
parent | 2f5e7a5b96039b7a00543b4056bab7ec85c8db4b (diff) | |
download | txr-0b38bc996c4c7e2693931bbd5103c7772b56b4bd.tar.gz txr-0b38bc996c4c7e2693931bbd5103c7772b56b4bd.tar.bz2 txr-0b38bc996c4c7e2693931bbd5103c7772b56b4bd.zip |
txr-015 2009-10-15txr-015
Diffstat (limited to 'parser.l')
-rw-r--r-- | parser.l | 523 |
1 files changed, 523 insertions, 0 deletions
diff --git a/parser.l b/parser.l new file mode 100644 index 00000000..7a5f0c17 --- /dev/null +++ b/parser.l @@ -0,0 +1,523 @@ +/* Copyright 2009 + * Kaz Kylheku <kkylheku@gmail.com> + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +%{ + +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#include <stdlib.h> +#include <limits.h> +#include <errno.h> +#include <dirent.h> +#include "y.tab.h" +#include "lib.h" +#include "gc.h" +#include "stream.h" +#include "parser.h" + +#define YY_NO_UNPUT + +#define YY_INPUT(buf, result, max_size) \ + do { \ + obj_t *c = nil; \ + int n, ch; \ + for (n = 0; n < max_size && \ + (c = get_char(yyin_stream)) && \ + (ch = c_chr(c)) != '\n'; ++n) \ + buf[n] = (char) ch; \ + if (ch == '\n') \ + buf[n++] = (char) ch; \ + result = n; \ + } while (0) + +obj_t *yyin_stream; + +long lineno = 1; +int opt_loglevel = 1; /* 0 - quiet; 1 - normal; 2 - verbose */ +int opt_nobindings = 0; +int opt_arraydims = 1; + +int errors; + +void yyerror(const char *s) +{ + yyerrorf("%s", s); +} + +void yyerrorf(const char *s, ...) +{ + if (opt_loglevel >= 1) { + va_list vl; + va_start (vl, s); + fprintf(stderr, "%s: (%s:%ld): ", progname, spec_file, lineno); + vfprintf(stderr, s, vl); + putc('\n', stderr); + va_end (vl); + } + errors++; +} + +void yybadtoken(int tok, const char *context) +{ + const char *problem = 0; + + switch (tok) { + case TEXT: problem = "text"; break; + case IDENT: problem = "identifier"; break; + case ALL: problem = "\"all\""; break; + case SOME: problem = "\"some\""; break; + case NONE: problem = "\"none\""; break; + case MAYBE: problem = "\"maybe\""; break; + case CASES: problem = "\"cases\""; break; + case AND: problem = "\"and\""; break; + case OR: problem = "\"or\""; break; + case END: problem = "\"end\""; break; + case COLLECT: problem = "\"collect\""; break; + case UNTIL: problem = "\"until\""; break; + case COLL: problem = "\"coll\""; break; + case OUTPUT: problem = "\"output\""; break; + case REPEAT: problem = "\"repeat\""; break; + case REP: problem = "\"rep\""; break; + case SINGLE: problem = "\"single\""; break; + case FIRST: problem = "\"first\""; break; + case LAST: problem = "\"last\""; break; + case EMPTY: problem = "\"empty\""; break; + case DEFINE: problem = "\"define\""; break; + case TRY: problem = "\"try\""; break; + case CATCH: problem = "\"catch\""; break; + case FINALLY: problem = "\"finally\""; break; + case NUMBER: problem = "\"number\""; break; + case REGCHAR: problem = "regular expression character"; break; + case LITCHAR: problem = "string literal character"; break; + } + + if (problem != 0) + if (context) + yyerrorf("misplaced %s in %s", problem, context); + else + yyerrorf("unexpected %s", problem); + else + if (context) + yyerrorf("unterminated %s", context); + else + yyerrorf("unexpected end of input"); +} + +static int char_esc(int letter) +{ + switch (letter) { + case 'a': return '\a'; + case 'b': return '\b'; + case 't': return '\t'; + case 'n': return '\n'; + case 'v': return '\v'; + case 'f': return '\f'; + case 'r': return '\r'; + case 'e': return 27; + case '"': return '"'; + case '\'': return '\''; + case '`': return '`'; + } + + abort(); +} + +static int num_esc(char *num) +{ + if (num[0] == 'x') { + if (strlen(num) > 3) + yyerror("too many digits in hex character escape"); + return strtol(num + 1, 0, 16); + } else { + if (strlen(num) > 3) + yyerror("too many digits in octal character escape"); + return strtol(num, 0, 8); + } +} + +%} + +%option stack + +TOK [a-zA-Z_][a-zA-Z0-9_]*|[+-]?[0-9]+ +ID_END [^a-zA-Z0-9_] +NUM_END [^0-9] +WS [\t ]* +HEX [0-9A-Fa-f] +OCT [0-7] + +%x SPECIAL NESTED REGEX REGCLASS STRLIT CHRLIT QSILIT + +%% + +<SPECIAL,NESTED>{TOK} { + long val; + char *errp; + + errno = 0; + + val = strtol(yytext, &errp, 10); + + if (yy_top_state() == INITIAL + || yy_top_state() == QSILIT) + yy_pop_state(); + + if (*errp != 0) { + /* not a number */ + yylval.lexeme = strdup(yytext); + return IDENT; + } + + if ((val == LONG_MAX || val == LONG_MIN) + && errno == ERANGE) + yyerror("numeric overflow in token"); + + yylval.num = val; + return NUMBER; + } + +<SPECIAL>\({WS}all{WS}\) { + yy_pop_state(); + return ALL; + } + +<SPECIAL>\({WS}some{WS}\) { + yy_pop_state(); + return SOME; + } + +<SPECIAL>\({WS}none{WS}\) { + yy_pop_state(); + return NONE; + } + +<SPECIAL>\({WS}maybe{WS}\) { + yy_pop_state(); + return MAYBE; + } + +<SPECIAL>\({WS}cases{WS}\) { + yy_pop_state(); + return CASES; + } + +<SPECIAL>\({WS}and{WS}\) { + yy_pop_state(); + return AND; + } + +<SPECIAL>\({WS}or{WS}\) { + yy_pop_state(); + return OR; + } + +<SPECIAL>\({WS}end{WS}\) { + yy_pop_state(); + return END; + } + +<SPECIAL>\({WS}collect{WS}\) { + yy_pop_state(); + return COLLECT; + } + +<SPECIAL>\({WS}coll{WS}\) { + yy_pop_state(); + return COLL; + } + +<SPECIAL>\({WS}until{WS}\) { + yy_pop_state(); + return UNTIL; + } + +<SPECIAL>\({WS}output/{ID_END} { + yy_push_state(NESTED); + return OUTPUT; + } + +<SPECIAL>\({WS}repeat{WS}\) { + yy_pop_state(); + return REPEAT; + } + + +<SPECIAL>\({WS}rep{WS}\) { + yy_pop_state(); + return REP; + } + +<SPECIAL>\({WS}single{WS}\) { + yy_pop_state(); + return SINGLE; + } + +<SPECIAL>\({WS}first{WS}\) { + yy_pop_state(); + return FIRST; + } + +<SPECIAL>\({WS}last{WS}\) { + yy_pop_state(); + return LAST; + } + +<SPECIAL>\({WS}empty{WS}\) { + yy_pop_state(); + return EMPTY; + } + +<SPECIAL>\({WS}define/{ID_END} { + yy_push_state(NESTED); + return DEFINE; + } + +<SPECIAL>\({WS}try{WS}\) { + yy_pop_state(); + return TRY; + } + +<SPECIAL>\({WS}catch/{ID_END} { + yy_push_state(NESTED); + return CATCH; + } + +<SPECIAL>\({WS}finally{WS}\) { + yy_pop_state(); + return FINALLY; + } + +<SPECIAL,NESTED>\{|\( { + yy_push_state(NESTED); + if (yy_top_state() == INITIAL + || yy_top_state() == QSILIT) + yy_pop_state(); + return yytext[0]; + } + +<SPECIAL,NESTED>\}|\) { + yy_pop_state(); + if (yy_top_state() == INITIAL + || yy_top_state() == QSILIT) + yy_pop_state(); + return yytext[0]; + } + +<SPECIAL,NESTED>[\t ]+ { /* Eat whitespace in directive */ } + +<SPECIAL,NESTED>\" { + yy_push_state(STRLIT); + return '"'; + } + +<SPECIAL,NESTED>\' { + yy_push_state(CHRLIT); + return '\''; + } + +<SPECIAL,NESTED>` { + yy_push_state(QSILIT); + return '`'; + } + +<SPECIAL>@ { + yy_pop_state(); + yylval.lexeme = strdup("@"); + return TEXT; + } + +<SPECIAL,NESTED>\n { + lineno++; + } + +<SPECIAL,NESTED>[/] { + yy_push_state(REGEX); + return '/'; + } + +<SPECIAL,NESTED>\. { + yylval.chr = '.'; + return '.'; + } + +<SPECIAL>[\\][abtnvfre] { + char lexeme[2]; + lexeme[0] = char_esc(yytext[1]); + lexeme[1] = 0; + yylval.lexeme = strdup(lexeme); + yy_pop_state(); + return TEXT; + } + +<SPECIAL>[\\](x{HEX}+|{OCT}+) { + char lexeme[2]; + lexeme[0] = num_esc(yytext + 1); + lexeme[1] = 0; + yylval.lexeme = strdup(lexeme); + yy_pop_state(); + return TEXT; + } + +<SPECIAL,NESTED>. { + yyerrorf("bad character in directive: '%c'", + yytext[0]); + } + +<REGEX>[/] { + yy_pop_state(); + if (yy_top_state() == INITIAL + || yy_top_state() == QSILIT) + yy_pop_state(); + yylval.chr = '/'; + return '/'; + } + + +<REGEX>[\\][abtnvfre] { + yylval.chr = char_esc(yytext[1]); + return REGCHAR; + } + +<REGEX>[\\](x{HEX}+|{OCT}+) { + yylval.chr = num_esc(yytext + 1); + return REGCHAR; + } + +<REGEX>\n { + lineno++; + yyerror("newline in regex"); + } + +<REGEX>[.*?+^] { + yylval.chr = yytext[0]; + return yytext[0]; + } + + +<REGEX>[\[\]\-] { + yylval.chr = yytext[0]; + return yytext[0]; + } + +<REGEX>[()|] { + yylval.chr = yytext[0]; + return yytext[0]; + } + +<REGEX>[\\]. { + yylval.chr = yytext[1]; + return REGCHAR; + } + +<REGEX>. { + yylval.chr = yytext[0]; + return REGCHAR; + } + +<INITIAL>[^@\n]+ { + yylval.lexeme = strdup(yytext); + return TEXT; + } + +<INITIAL>\n { + lineno++; + return '\n'; + } + +<INITIAL>@{WS}\* { + yy_push_state(SPECIAL); + return '*'; + } + +<INITIAL>@ { + yy_push_state(SPECIAL); + } + +<INITIAL>^@#.*\n { + /* eat whole line comment */ + lineno++; + } + +<INITIAL>@#.* { + /* comment to end of line */ + } + +<STRLIT>\" { + yy_pop_state(); + return yytext[0]; + } + +<CHRLIT>\' { + yy_pop_state(); + return yytext[0]; + } + +<QSILIT>` { + yy_pop_state(); + return yytext[0]; + } + +<STRLIT,CHRLIT,QSILIT>[\\][abtnvfre"`'] { + yylval.chr = char_esc(yytext[1]); + return LITCHAR; + } + +<STRLIT,CHRLIT>[\\](x{HEX}+|{OCT}+) { + yylval.chr = num_esc(yytext + 1); + return LITCHAR; + } +<STRLIT>\n { + yyerror("newline in string literal"); + lineno++; + yylval.chr = yytext[0]; + return LITCHAR; + } + +<CHRLIT>\n { + yyerror("newline in character literal"); + lineno++; + yylval.chr = yytext[0]; + return LITCHAR; + } + +<QSILIT>\n { + yyerror("newline in string quasiliteral"); + lineno++; + yylval.chr = yytext[0]; + return LITCHAR; + } + +<QSILIT>@ { + yy_push_state(SPECIAL); + } + +<STRLIT,CHRLIT,QSILIT>. { + yylval.chr = yytext[0]; + return LITCHAR; + } + +%% |