summaryrefslogtreecommitdiffstats
path: root/parser.l
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2017-07-31 17:33:59 -0700
committerKaz Kylheku <kaz@kylheku.com>2017-07-31 17:40:55 -0700
commit0b38bc996c4c7e2693931bbd5103c7772b56b4bd (patch)
tree8e74fd6b7efc3a0fb87037b2bb58b9d8c6129339 /parser.l
parent2f5e7a5b96039b7a00543b4056bab7ec85c8db4b (diff)
downloadtxr-0b38bc996c4c7e2693931bbd5103c7772b56b4bd.tar.gz
txr-0b38bc996c4c7e2693931bbd5103c7772b56b4bd.tar.bz2
txr-0b38bc996c4c7e2693931bbd5103c7772b56b4bd.zip
txr-015 2009-10-15txr-015
Diffstat (limited to 'parser.l')
-rw-r--r--parser.l523
1 files changed, 523 insertions, 0 deletions
diff --git a/parser.l b/parser.l
new file mode 100644
index 00000000..7a5f0c17
--- /dev/null
+++ b/parser.l
@@ -0,0 +1,523 @@
+/* Copyright 2009
+ * Kaz Kylheku <kkylheku@gmail.com>
+ * Vancouver, Canada
+ * All rights reserved.
+ *
+ * BSD License:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+%{
+
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <errno.h>
+#include <dirent.h>
+#include "y.tab.h"
+#include "lib.h"
+#include "gc.h"
+#include "stream.h"
+#include "parser.h"
+
+#define YY_NO_UNPUT
+
+#define YY_INPUT(buf, result, max_size) \
+ do { \
+ obj_t *c = nil; \
+ int n, ch; \
+ for (n = 0; n < max_size && \
+ (c = get_char(yyin_stream)) && \
+ (ch = c_chr(c)) != '\n'; ++n) \
+ buf[n] = (char) ch; \
+ if (ch == '\n') \
+ buf[n++] = (char) ch; \
+ result = n; \
+ } while (0)
+
+obj_t *yyin_stream;
+
+long lineno = 1;
+int opt_loglevel = 1; /* 0 - quiet; 1 - normal; 2 - verbose */
+int opt_nobindings = 0;
+int opt_arraydims = 1;
+
+int errors;
+
+void yyerror(const char *s)
+{
+ yyerrorf("%s", s);
+}
+
+void yyerrorf(const char *s, ...)
+{
+ if (opt_loglevel >= 1) {
+ va_list vl;
+ va_start (vl, s);
+ fprintf(stderr, "%s: (%s:%ld): ", progname, spec_file, lineno);
+ vfprintf(stderr, s, vl);
+ putc('\n', stderr);
+ va_end (vl);
+ }
+ errors++;
+}
+
+void yybadtoken(int tok, const char *context)
+{
+ const char *problem = 0;
+
+ switch (tok) {
+ case TEXT: problem = "text"; break;
+ case IDENT: problem = "identifier"; break;
+ case ALL: problem = "\"all\""; break;
+ case SOME: problem = "\"some\""; break;
+ case NONE: problem = "\"none\""; break;
+ case MAYBE: problem = "\"maybe\""; break;
+ case CASES: problem = "\"cases\""; break;
+ case AND: problem = "\"and\""; break;
+ case OR: problem = "\"or\""; break;
+ case END: problem = "\"end\""; break;
+ case COLLECT: problem = "\"collect\""; break;
+ case UNTIL: problem = "\"until\""; break;
+ case COLL: problem = "\"coll\""; break;
+ case OUTPUT: problem = "\"output\""; break;
+ case REPEAT: problem = "\"repeat\""; break;
+ case REP: problem = "\"rep\""; break;
+ case SINGLE: problem = "\"single\""; break;
+ case FIRST: problem = "\"first\""; break;
+ case LAST: problem = "\"last\""; break;
+ case EMPTY: problem = "\"empty\""; break;
+ case DEFINE: problem = "\"define\""; break;
+ case TRY: problem = "\"try\""; break;
+ case CATCH: problem = "\"catch\""; break;
+ case FINALLY: problem = "\"finally\""; break;
+ case NUMBER: problem = "\"number\""; break;
+ case REGCHAR: problem = "regular expression character"; break;
+ case LITCHAR: problem = "string literal character"; break;
+ }
+
+ if (problem != 0)
+ if (context)
+ yyerrorf("misplaced %s in %s", problem, context);
+ else
+ yyerrorf("unexpected %s", problem);
+ else
+ if (context)
+ yyerrorf("unterminated %s", context);
+ else
+ yyerrorf("unexpected end of input");
+}
+
+static int char_esc(int letter)
+{
+ switch (letter) {
+ case 'a': return '\a';
+ case 'b': return '\b';
+ case 't': return '\t';
+ case 'n': return '\n';
+ case 'v': return '\v';
+ case 'f': return '\f';
+ case 'r': return '\r';
+ case 'e': return 27;
+ case '"': return '"';
+ case '\'': return '\'';
+ case '`': return '`';
+ }
+
+ abort();
+}
+
+static int num_esc(char *num)
+{
+ if (num[0] == 'x') {
+ if (strlen(num) > 3)
+ yyerror("too many digits in hex character escape");
+ return strtol(num + 1, 0, 16);
+ } else {
+ if (strlen(num) > 3)
+ yyerror("too many digits in octal character escape");
+ return strtol(num, 0, 8);
+ }
+}
+
+%}
+
+%option stack
+
+TOK [a-zA-Z_][a-zA-Z0-9_]*|[+-]?[0-9]+
+ID_END [^a-zA-Z0-9_]
+NUM_END [^0-9]
+WS [\t ]*
+HEX [0-9A-Fa-f]
+OCT [0-7]
+
+%x SPECIAL NESTED REGEX REGCLASS STRLIT CHRLIT QSILIT
+
+%%
+
+<SPECIAL,NESTED>{TOK} {
+ long val;
+ char *errp;
+
+ errno = 0;
+
+ val = strtol(yytext, &errp, 10);
+
+ if (yy_top_state() == INITIAL
+ || yy_top_state() == QSILIT)
+ yy_pop_state();
+
+ if (*errp != 0) {
+ /* not a number */
+ yylval.lexeme = strdup(yytext);
+ return IDENT;
+ }
+
+ if ((val == LONG_MAX || val == LONG_MIN)
+ && errno == ERANGE)
+ yyerror("numeric overflow in token");
+
+ yylval.num = val;
+ return NUMBER;
+ }
+
+<SPECIAL>\({WS}all{WS}\) {
+ yy_pop_state();
+ return ALL;
+ }
+
+<SPECIAL>\({WS}some{WS}\) {
+ yy_pop_state();
+ return SOME;
+ }
+
+<SPECIAL>\({WS}none{WS}\) {
+ yy_pop_state();
+ return NONE;
+ }
+
+<SPECIAL>\({WS}maybe{WS}\) {
+ yy_pop_state();
+ return MAYBE;
+ }
+
+<SPECIAL>\({WS}cases{WS}\) {
+ yy_pop_state();
+ return CASES;
+ }
+
+<SPECIAL>\({WS}and{WS}\) {
+ yy_pop_state();
+ return AND;
+ }
+
+<SPECIAL>\({WS}or{WS}\) {
+ yy_pop_state();
+ return OR;
+ }
+
+<SPECIAL>\({WS}end{WS}\) {
+ yy_pop_state();
+ return END;
+ }
+
+<SPECIAL>\({WS}collect{WS}\) {
+ yy_pop_state();
+ return COLLECT;
+ }
+
+<SPECIAL>\({WS}coll{WS}\) {
+ yy_pop_state();
+ return COLL;
+ }
+
+<SPECIAL>\({WS}until{WS}\) {
+ yy_pop_state();
+ return UNTIL;
+ }
+
+<SPECIAL>\({WS}output/{ID_END} {
+ yy_push_state(NESTED);
+ return OUTPUT;
+ }
+
+<SPECIAL>\({WS}repeat{WS}\) {
+ yy_pop_state();
+ return REPEAT;
+ }
+
+
+<SPECIAL>\({WS}rep{WS}\) {
+ yy_pop_state();
+ return REP;
+ }
+
+<SPECIAL>\({WS}single{WS}\) {
+ yy_pop_state();
+ return SINGLE;
+ }
+
+<SPECIAL>\({WS}first{WS}\) {
+ yy_pop_state();
+ return FIRST;
+ }
+
+<SPECIAL>\({WS}last{WS}\) {
+ yy_pop_state();
+ return LAST;
+ }
+
+<SPECIAL>\({WS}empty{WS}\) {
+ yy_pop_state();
+ return EMPTY;
+ }
+
+<SPECIAL>\({WS}define/{ID_END} {
+ yy_push_state(NESTED);
+ return DEFINE;
+ }
+
+<SPECIAL>\({WS}try{WS}\) {
+ yy_pop_state();
+ return TRY;
+ }
+
+<SPECIAL>\({WS}catch/{ID_END} {
+ yy_push_state(NESTED);
+ return CATCH;
+ }
+
+<SPECIAL>\({WS}finally{WS}\) {
+ yy_pop_state();
+ return FINALLY;
+ }
+
+<SPECIAL,NESTED>\{|\( {
+ yy_push_state(NESTED);
+ if (yy_top_state() == INITIAL
+ || yy_top_state() == QSILIT)
+ yy_pop_state();
+ return yytext[0];
+ }
+
+<SPECIAL,NESTED>\}|\) {
+ yy_pop_state();
+ if (yy_top_state() == INITIAL
+ || yy_top_state() == QSILIT)
+ yy_pop_state();
+ return yytext[0];
+ }
+
+<SPECIAL,NESTED>[\t ]+ { /* Eat whitespace in directive */ }
+
+<SPECIAL,NESTED>\" {
+ yy_push_state(STRLIT);
+ return '"';
+ }
+
+<SPECIAL,NESTED>\' {
+ yy_push_state(CHRLIT);
+ return '\'';
+ }
+
+<SPECIAL,NESTED>` {
+ yy_push_state(QSILIT);
+ return '`';
+ }
+
+<SPECIAL>@ {
+ yy_pop_state();
+ yylval.lexeme = strdup("@");
+ return TEXT;
+ }
+
+<SPECIAL,NESTED>\n {
+ lineno++;
+ }
+
+<SPECIAL,NESTED>[/] {
+ yy_push_state(REGEX);
+ return '/';
+ }
+
+<SPECIAL,NESTED>\. {
+ yylval.chr = '.';
+ return '.';
+ }
+
+<SPECIAL>[\\][abtnvfre] {
+ char lexeme[2];
+ lexeme[0] = char_esc(yytext[1]);
+ lexeme[1] = 0;
+ yylval.lexeme = strdup(lexeme);
+ yy_pop_state();
+ return TEXT;
+ }
+
+<SPECIAL>[\\](x{HEX}+|{OCT}+) {
+ char lexeme[2];
+ lexeme[0] = num_esc(yytext + 1);
+ lexeme[1] = 0;
+ yylval.lexeme = strdup(lexeme);
+ yy_pop_state();
+ return TEXT;
+ }
+
+<SPECIAL,NESTED>. {
+ yyerrorf("bad character in directive: '%c'",
+ yytext[0]);
+ }
+
+<REGEX>[/] {
+ yy_pop_state();
+ if (yy_top_state() == INITIAL
+ || yy_top_state() == QSILIT)
+ yy_pop_state();
+ yylval.chr = '/';
+ return '/';
+ }
+
+
+<REGEX>[\\][abtnvfre] {
+ yylval.chr = char_esc(yytext[1]);
+ return REGCHAR;
+ }
+
+<REGEX>[\\](x{HEX}+|{OCT}+) {
+ yylval.chr = num_esc(yytext + 1);
+ return REGCHAR;
+ }
+
+<REGEX>\n {
+ lineno++;
+ yyerror("newline in regex");
+ }
+
+<REGEX>[.*?+^] {
+ yylval.chr = yytext[0];
+ return yytext[0];
+ }
+
+
+<REGEX>[\[\]\-] {
+ yylval.chr = yytext[0];
+ return yytext[0];
+ }
+
+<REGEX>[()|] {
+ yylval.chr = yytext[0];
+ return yytext[0];
+ }
+
+<REGEX>[\\]. {
+ yylval.chr = yytext[1];
+ return REGCHAR;
+ }
+
+<REGEX>. {
+ yylval.chr = yytext[0];
+ return REGCHAR;
+ }
+
+<INITIAL>[^@\n]+ {
+ yylval.lexeme = strdup(yytext);
+ return TEXT;
+ }
+
+<INITIAL>\n {
+ lineno++;
+ return '\n';
+ }
+
+<INITIAL>@{WS}\* {
+ yy_push_state(SPECIAL);
+ return '*';
+ }
+
+<INITIAL>@ {
+ yy_push_state(SPECIAL);
+ }
+
+<INITIAL>^@#.*\n {
+ /* eat whole line comment */
+ lineno++;
+ }
+
+<INITIAL>@#.* {
+ /* comment to end of line */
+ }
+
+<STRLIT>\" {
+ yy_pop_state();
+ return yytext[0];
+ }
+
+<CHRLIT>\' {
+ yy_pop_state();
+ return yytext[0];
+ }
+
+<QSILIT>` {
+ yy_pop_state();
+ return yytext[0];
+ }
+
+<STRLIT,CHRLIT,QSILIT>[\\][abtnvfre"`'] {
+ yylval.chr = char_esc(yytext[1]);
+ return LITCHAR;
+ }
+
+<STRLIT,CHRLIT>[\\](x{HEX}+|{OCT}+) {
+ yylval.chr = num_esc(yytext + 1);
+ return LITCHAR;
+ }
+<STRLIT>\n {
+ yyerror("newline in string literal");
+ lineno++;
+ yylval.chr = yytext[0];
+ return LITCHAR;
+ }
+
+<CHRLIT>\n {
+ yyerror("newline in character literal");
+ lineno++;
+ yylval.chr = yytext[0];
+ return LITCHAR;
+ }
+
+<QSILIT>\n {
+ yyerror("newline in string quasiliteral");
+ lineno++;
+ yylval.chr = yytext[0];
+ return LITCHAR;
+ }
+
+<QSILIT>@ {
+ yy_push_state(SPECIAL);
+ }
+
+<STRLIT,CHRLIT,QSILIT>. {
+ yylval.chr = yytext[0];
+ return LITCHAR;
+ }
+
+%%