txr-015 2009-10-15txr-015

author: Kaz Kylheku <kaz@kylheku.com> 2017-07-31 17:33:59 -0700
committer: Kaz Kylheku <kaz@kylheku.com> 2017-07-31 17:40:55 -0700
commit: 0b38bc996c4c7e2693931bbd5103c7772b56b4bd (patch)
tree: 8e74fd6b7efc3a0fb87037b2bb58b9d8c6129339 /parser.l
parent: 2f5e7a5b96039b7a00543b4056bab7ec85c8db4b (diff)
download: txr-0b38bc996c4c7e2693931bbd5103c7772b56b4bd.tar.gz
txr-0b38bc996c4c7e2693931bbd5103c7772b56b4bd.tar.bz2
txr-0b38bc996c4c7e2693931bbd5103c7772b56b4bd.zip
1 files changed, 523 insertions, 0 deletions
diff --git a/parser.l b/parser.l
new file mode 100644
index 00000000..7a5f0c17
--- /dev/null
+++ b/parser.l
@@ -0,0 +1,523 @@
+/* Copyright 2009
+ * Kaz Kylheku <kkylheku@gmail.com>
+ * Vancouver, Canada
+ * All rights reserved.
+ *
+ * BSD License:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *   3. The name of the author may not be used to endorse or promote
+ *      products derived from this software without specific prior
+ *      written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+%{
+
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <errno.h>
+#include <dirent.h>
+#include "y.tab.h"
+#include "lib.h"
+#include "gc.h"
+#include "stream.h"
+#include "parser.h"
+
+#define YY_NO_UNPUT
+
+#define YY_INPUT(buf, result, max_size)						\
+	do {																						\
+		obj_t *c = nil;																\
+		int n, ch;																		\
+		for (n = 0; n < max_size && 									\
+								(c = get_char(yyin_stream)) && 		\
+								(ch = c_chr(c)) != '\n'; ++n)		  \
+			buf[n] = (char) ch;													\
+		if (ch == '\n')														  	\
+			buf[n++] = (char) ch;                       \
+		result = n;                                   \
+	} while (0)
+
+obj_t *yyin_stream;
+
+long lineno = 1;
+int opt_loglevel = 1;   /* 0 - quiet; 1 - normal; 2 - verbose */
+int opt_nobindings = 0;
+int opt_arraydims = 1;
+
+int errors;
+
+void yyerror(const char *s)
+{
+  yyerrorf("%s", s);
+}
+
+void yyerrorf(const char *s, ...)
+{
+  if (opt_loglevel >= 1) {
+    va_list vl;
+    va_start (vl, s);
+    fprintf(stderr, "%s: (%s:%ld): ", progname, spec_file, lineno);
+    vfprintf(stderr, s, vl);
+    putc('\n', stderr);
+    va_end (vl);
+  }
+  errors++;
+}
+
+void yybadtoken(int tok, const char *context)
+{
+  const char *problem = 0;
+
+  switch (tok) {
+  case TEXT:    problem = "text"; break;
+  case IDENT:   problem = "identifier"; break;
+  case ALL:     problem = "\"all\""; break;
+  case SOME:    problem = "\"some\""; break;
+  case NONE:    problem = "\"none\""; break;
+  case MAYBE:   problem = "\"maybe\""; break;
+  case CASES:   problem = "\"cases\""; break;
+  case AND:     problem = "\"and\""; break;
+  case OR:      problem = "\"or\""; break;
+  case END:     problem = "\"end\""; break;
+  case COLLECT: problem = "\"collect\""; break;
+  case UNTIL:   problem = "\"until\""; break;
+  case COLL:    problem = "\"coll\""; break;
+  case OUTPUT:  problem = "\"output\""; break;
+  case REPEAT:  problem = "\"repeat\""; break;
+  case REP:     problem = "\"rep\""; break;
+  case SINGLE:  problem = "\"single\""; break;
+  case FIRST:   problem = "\"first\""; break;
+  case LAST:    problem = "\"last\""; break;
+  case EMPTY:   problem = "\"empty\""; break;
+  case DEFINE:  problem = "\"define\""; break;
+  case TRY:     problem = "\"try\""; break;
+  case CATCH:   problem = "\"catch\""; break;
+  case FINALLY: problem = "\"finally\""; break;
+  case NUMBER:  problem = "\"number\""; break;
+  case REGCHAR: problem = "regular expression character"; break;
+  case LITCHAR: problem = "string literal character"; break;
+  }
+
+  if (problem != 0)
+    if (context)
+      yyerrorf("misplaced %s in %s", problem, context);
+    else
+      yyerrorf("unexpected %s", problem);
+  else
+    if (context)
+      yyerrorf("unterminated %s", context);
+    else
+      yyerrorf("unexpected end of input");
+}
+
+static int char_esc(int letter)
+{
+  switch (letter) {
+  case 'a': return '\a';
+  case 'b': return '\b';
+  case 't': return '\t';
+  case 'n': return '\n';
+  case 'v': return '\v';
+  case 'f': return '\f';
+  case 'r': return '\r';
+  case 'e': return 27;
+  case '"': return '"';
+  case '\'': return '\'';
+  case '`': return '`';
+  }
+
+  abort();
+}
+
+static int num_esc(char *num)
+{
+  if (num[0] == 'x') {
+    if (strlen(num) > 3)
+      yyerror("too many digits in hex character escape");
+    return strtol(num + 1, 0, 16);
+  } else {
+    if (strlen(num) > 3)
+      yyerror("too many digits in octal character escape");
+    return strtol(num, 0, 8);
+  }
+}
+
+%}
+
+%option stack
+
+TOK     [a-zA-Z_][a-zA-Z0-9_]*|[+-]?[0-9]+
+ID_END  [^a-zA-Z0-9_]
+NUM_END [^0-9]
+WS      [\t ]*
+HEX     [0-9A-Fa-f]
+OCT     [0-7]
+
+%x      SPECIAL NESTED REGEX REGCLASS STRLIT CHRLIT QSILIT
+
+%%
+
+<SPECIAL,NESTED>{TOK}   {
+                          long val;
+                          char *errp;
+
+                          errno = 0;
+
+                          val = strtol(yytext, &errp, 10);
+
+                          if (yy_top_state() == INITIAL
+                              || yy_top_state() == QSILIT)
+                            yy_pop_state();
+
+                          if (*errp != 0) {
+                            /* not a number */
+                            yylval.lexeme = strdup(yytext);
+                            return IDENT;
+                          }
+
+                          if ((val == LONG_MAX || val == LONG_MIN)
+                              && errno == ERANGE)
+                            yyerror("numeric overflow in token");
+
+                          yylval.num = val;
+                          return NUMBER;
+                        }
+
+<SPECIAL>\({WS}all{WS}\)        {
+                                  yy_pop_state();
+                                  return ALL;
+                                }
+
+<SPECIAL>\({WS}some{WS}\)       {
+                                  yy_pop_state();
+                                  return SOME;
+                                }
+
+<SPECIAL>\({WS}none{WS}\)       {
+                                  yy_pop_state();
+                                  return NONE;
+                                }
+
+<SPECIAL>\({WS}maybe{WS}\)      {
+                                  yy_pop_state();
+                                  return MAYBE;
+                                }
+
+<SPECIAL>\({WS}cases{WS}\)      {
+                                  yy_pop_state();
+                                  return CASES;
+                                }
+
+<SPECIAL>\({WS}and{WS}\)        {
+                                  yy_pop_state();
+                                  return AND;
+                                }
+
+<SPECIAL>\({WS}or{WS}\)         {
+                                  yy_pop_state();
+                                  return OR;
+                                }
+
+<SPECIAL>\({WS}end{WS}\)        {
+                                  yy_pop_state();
+                                  return END;
+                                }
+
+<SPECIAL>\({WS}collect{WS}\)    {
+                                  yy_pop_state();
+                                  return COLLECT;
+                                }
+
+<SPECIAL>\({WS}coll{WS}\)       {
+                                  yy_pop_state();
+                                  return COLL;
+                                }
+
+<SPECIAL>\({WS}until{WS}\)      {
+                                  yy_pop_state();
+                                  return UNTIL;
+                                }
+
+<SPECIAL>\({WS}output/{ID_END}  {
+                                  yy_push_state(NESTED);
+                                  return OUTPUT;
+                                }
+
+<SPECIAL>\({WS}repeat{WS}\)     {
+                                  yy_pop_state();
+                                  return REPEAT;
+                                }
+
+
+<SPECIAL>\({WS}rep{WS}\)        {
+                                  yy_pop_state();
+                                  return REP;
+                                }
+
+<SPECIAL>\({WS}single{WS}\)     {
+                                  yy_pop_state();
+                                  return SINGLE;
+                                }
+
+<SPECIAL>\({WS}first{WS}\)      {
+                                  yy_pop_state();
+                                  return FIRST;
+                                }
+
+<SPECIAL>\({WS}last{WS}\)       {
+                                  yy_pop_state();
+                                  return LAST;
+                                }
+
+<SPECIAL>\({WS}empty{WS}\)      {
+                                  yy_pop_state();
+                                  return EMPTY;
+                                }
+
+<SPECIAL>\({WS}define/{ID_END}  {
+                                  yy_push_state(NESTED);
+                                  return DEFINE;
+                                }
+
+<SPECIAL>\({WS}try{WS}\)        {
+                                  yy_pop_state();
+                                  return TRY;
+                                }
+
+<SPECIAL>\({WS}catch/{ID_END}   {
+                                  yy_push_state(NESTED);
+                                  return CATCH;
+                                }
+
+<SPECIAL>\({WS}finally{WS}\)    {
+                                  yy_pop_state();
+                                  return FINALLY;
+                                }
+
+<SPECIAL,NESTED>\{|\(   {
+                          yy_push_state(NESTED);
+                          if (yy_top_state() == INITIAL
+                              || yy_top_state() == QSILIT)
+                            yy_pop_state();
+                          return yytext[0];
+                        }
+
+<SPECIAL,NESTED>\}|\)   {
+                          yy_pop_state();
+                          if (yy_top_state() == INITIAL
+                              || yy_top_state() == QSILIT)
+                            yy_pop_state();
+                          return yytext[0];
+                        }
+
+<SPECIAL,NESTED>[\t ]+  { /* Eat whitespace in directive */ }
+
+<SPECIAL,NESTED>\"      {
+                          yy_push_state(STRLIT);
+                          return '"';
+                        }
+
+<SPECIAL,NESTED>\'      {
+                          yy_push_state(CHRLIT);
+                          return '\'';
+                        }
+
+<SPECIAL,NESTED>`       {
+                          yy_push_state(QSILIT);
+                          return '`';
+                        }
+
+<SPECIAL>@              {
+                          yy_pop_state();
+                          yylval.lexeme = strdup("@");
+                          return TEXT;
+                        }
+
+<SPECIAL,NESTED>\n      {
+                          lineno++;
+                        }
+
+<SPECIAL,NESTED>[/]     {
+                          yy_push_state(REGEX);
+                          return '/';
+                        }
+
+<SPECIAL,NESTED>\.      {
+                          yylval.chr = '.';
+                          return '.';
+                        }
+
+<SPECIAL>[\\][abtnvfre] {
+                          char lexeme[2];
+                          lexeme[0] = char_esc(yytext[1]);
+                          lexeme[1] = 0;
+                          yylval.lexeme = strdup(lexeme);
+                          yy_pop_state();
+                          return TEXT;
+                        }
+
+<SPECIAL>[\\](x{HEX}+|{OCT}+)   {
+                                  char lexeme[2];
+                                  lexeme[0] = num_esc(yytext + 1);
+                                  lexeme[1] = 0;
+                                  yylval.lexeme = strdup(lexeme);
+                                  yy_pop_state();
+                                  return TEXT;
+                                }
+
+<SPECIAL,NESTED>.       {
+                          yyerrorf("bad character in directive: '%c'",
+                                   yytext[0]);
+                        }
+
+<REGEX>[/]      {
+                  yy_pop_state();
+                  if (yy_top_state() == INITIAL
+                      || yy_top_state() == QSILIT)
+                    yy_pop_state();
+                  yylval.chr = '/';
+                  return '/';
+                }
+
+
+<REGEX>[\\][abtnvfre]   {
+                          yylval.chr = char_esc(yytext[1]);
+                          return REGCHAR;
+                        }
+
+<REGEX>[\\](x{HEX}+|{OCT}+)     {
+                                  yylval.chr = num_esc(yytext + 1);
+                                  return REGCHAR;
+                                }
+
+<REGEX>\n       {
+                  lineno++;
+                  yyerror("newline in regex");
+                }
+
+<REGEX>[.*?+^]  {
+                  yylval.chr = yytext[0];
+                  return yytext[0];
+                }
+
+
+<REGEX>[\[\]\-] {
+                  yylval.chr = yytext[0];
+                  return yytext[0];
+                }
+
+<REGEX>[()|]    {
+                  yylval.chr = yytext[0];
+                  return yytext[0];
+                }
+
+<REGEX>[\\].    {
+                  yylval.chr = yytext[1];
+                  return REGCHAR;
+                }
+
+<REGEX>.        {
+                  yylval.chr = yytext[0];
+                  return REGCHAR;
+                }
+
+<INITIAL>[^@\n]+        {
+                          yylval.lexeme = strdup(yytext);
+                          return TEXT;
+                        }
+
+<INITIAL>\n     {
+                  lineno++;
+                  return '\n';
+                }
+
+<INITIAL>@{WS}\*        {
+                          yy_push_state(SPECIAL);
+                          return '*';
+                        }
+
+<INITIAL>@      {
+                  yy_push_state(SPECIAL);
+                }
+
+<INITIAL>^@#.*\n        {
+                          /* eat whole line comment */
+                          lineno++;
+                        }
+
+<INITIAL>@#.*   {
+                   /* comment to end of line */
+                }
+
+<STRLIT>\"      {
+                  yy_pop_state();
+                  return yytext[0];
+                }
+
+<CHRLIT>\'      {
+                  yy_pop_state();
+                  return yytext[0];
+                }
+
+<QSILIT>`       {
+                  yy_pop_state();
+                  return yytext[0];
+                }
+
+<STRLIT,CHRLIT,QSILIT>[\\][abtnvfre"`'] {
+                                          yylval.chr = char_esc(yytext[1]);
+                                          return LITCHAR;
+                                        }
+
+<STRLIT,CHRLIT>[\\](x{HEX}+|{OCT}+)     {
+                                          yylval.chr = num_esc(yytext + 1);
+                                          return LITCHAR;
+                                        }
+<STRLIT>\n              {
+                          yyerror("newline in string literal");
+                          lineno++;
+                          yylval.chr = yytext[0];
+                          return LITCHAR;
+                        }
+
+<CHRLIT>\n              {
+                          yyerror("newline in character literal");
+                          lineno++;
+                          yylval.chr = yytext[0];
+                          return LITCHAR;
+                        }
+
+<QSILIT>\n              {
+                          yyerror("newline in string quasiliteral");
+                          lineno++;
+                          yylval.chr = yytext[0];
+                          return LITCHAR;
+                        }
+
+<QSILIT>@               {
+                          yy_push_state(SPECIAL);
+                        }
+
+<STRLIT,CHRLIT,QSILIT>. {
+                          yylval.chr = yytext[0];
+                          return LITCHAR;
+                        }
+
+%%
author	Kaz Kylheku <kaz@kylheku.com>	2017-07-31 17:33:59 -0700
committer	Kaz Kylheku <kaz@kylheku.com>	2017-07-31 17:40:55 -0700
commit	0b38bc996c4c7e2693931bbd5103c7772b56b4bd (patch)
tree	8e74fd6b7efc3a0fb87037b2bb58b9d8c6129339 /parser.l
parent	2f5e7a5b96039b7a00543b4056bab7ec85c8db4b (diff)
download	txr-0b38bc996c4c7e2693931bbd5103c7772b56b4bd.tar.gz txr-0b38bc996c4c7e2693931bbd5103c7772b56b4bd.tar.bz2 txr-0b38bc996c4c7e2693931bbd5103c7772b56b4bd.zip