summaryrefslogtreecommitdiffstats
path: root/extract.l
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2017-07-31 17:33:59 -0700
committerKaz Kylheku <kaz@kylheku.com>2017-07-31 17:40:55 -0700
commit0b38bc996c4c7e2693931bbd5103c7772b56b4bd (patch)
tree8e74fd6b7efc3a0fb87037b2bb58b9d8c6129339 /extract.l
parent2f5e7a5b96039b7a00543b4056bab7ec85c8db4b (diff)
downloadtxr-0b38bc996c4c7e2693931bbd5103c7772b56b4bd.tar.gz
txr-0b38bc996c4c7e2693931bbd5103c7772b56b4bd.tar.bz2
txr-0b38bc996c4c7e2693931bbd5103c7772b56b4bd.zip
txr-015 2009-10-15txr-015
Diffstat (limited to 'extract.l')
-rw-r--r--extract.l760
1 files changed, 0 insertions, 760 deletions
diff --git a/extract.l b/extract.l
deleted file mode 100644
index ab041bb9..00000000
--- a/extract.l
+++ /dev/null
@@ -1,760 +0,0 @@
-/* Copyright 2009
- * Kaz Kylheku <kkylheku@gmail.com>
- * Vancouver, Canada
- * All rights reserved.
- *
- * BSD License:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
- */
-
-%{
-
-#include <stdio.h>
-#include <string.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <limits.h>
-#include <errno.h>
-#include <dirent.h>
-#include "y.tab.h"
-#include "lib.h"
-#include "gc.h"
-#include "extract.h"
-
-#define YY_NO_UNPUT
-
-const char *version = "014";
-const char *progname = "txr";
-const char *spec_file = "stdin";
-long lineno = 1;
-int opt_loglevel = 1; /* 0 - quiet; 1 - normal; 2 - verbose */
-int opt_nobindings = 0;
-int opt_arraydims = 1;
-
-static int nesting;
-static int closechar;
-static int errors;
-
-/*
- * Can implement an emergency allocator here from a fixed storage
- * pool, which sets an OOM flag. Program can check flag
- * and gracefully terminate instead of aborting like this.
- */
-void *oom_realloc_handler(void *old, size_t size)
-{
- fprintf(stderr, "%s: out of memory\n", progname);
- puts("false");
- abort();
-}
-
-void yyerror(const char *s)
-{
- yyerrorlf(1, lineno, "%s", s);
- errors++;
-}
-
-void yyerrorf(int level, const char *s, ...)
-{
- if (opt_loglevel >= level) {
- va_list vl;
- va_start (vl, s);
- fprintf(stderr, "%s: (%s:%ld): ", progname, spec_file, lineno);
- vfprintf(stderr, s, vl);
- putc('\n', stderr);
- va_end (vl);
- }
-
- if (level < 2)
- errors++;
-}
-
-void yyerrorlf(int level, long spec_lineno, const char *s, ...)
-{
- if (opt_loglevel >= level) {
- va_list vl;
- va_start (vl, s);
- fprintf(stderr, "%s: (%s:%ld): ", progname, spec_file, spec_lineno);
- vfprintf(stderr, s, vl);
- putc('\n', stderr);
- va_end (vl);
- }
-
- if (level < 2)
- errors++;
-}
-
-void yybadtoken(int tok, const char *context)
-{
- const char *problem = 0;
-
- switch (tok) {
- case TEXT: problem = "text"; break;
- case IDENT: problem = "identifier"; break;
- case ALL: problem = "\"all\""; break;
- case SOME: problem = "\"some\""; break;
- case NONE: problem = "\"none\""; break;
- case MAYBE: problem = "\"maybe\""; break;
- case CASES: problem = "\"cases\""; break;
- case AND: problem = "\"and\""; break;
- case OR: problem = "\"or\""; break;
- case END: problem = "\"end\""; break;
- case COLLECT: problem = "\"collect\""; break;
- case UNTIL: problem = "\"until\""; break;
- case COLL: problem = "\"coll\""; break;
- case OUTPUT: problem = "\"output\""; break;
- case REPEAT: problem = "\"repeat\""; break;
- case REP: problem = "\"rep\""; break;
- case SINGLE: problem = "\"single\""; break;
- case FIRST: problem = "\"first\""; break;
- case LAST: problem = "\"last\""; break;
- case EMPTY: problem = "\"empty\""; break;
- case DEFINE: problem = "\"define\""; break;
- case NUMBER: problem = "\"number\""; break;
- case REGCHAR: problem = "regular expression character"; break;
- }
-
- if (problem != 0)
- if (context)
- yyerrorlf(1, lineno, "misplaced %s in %s", problem, context);
- else
- yyerrorlf(1, lineno, "unexpected %s", problem);
- else
- if (context)
- yyerrorlf(1, lineno, "unterminated %s", context);
- else
- yyerrorlf(1, lineno, "unexpected end of input");
-}
-
-static int char_esc(int letter)
-{
- switch (letter) {
- case 'a': return '\a';
- case 'b': return '\b';
- case 't': return '\t';
- case 'n': return '\n';
- case 'v': return '\v';
- case 'f': return '\f';
- case 'r': return '\r';
- case 'e': return 27;
- case '"': return '"';
- case '\'': return '\'';
- }
-
- abort();
-}
-
-static int num_esc(char *num)
-{
- if (num[0] == 'x') {
- if (strlen(num) > 3)
- yyerror("too many digits in hex character escape");
- return strtol(num + 1, 0, 16);
- } else {
- if (strlen(num) > 3)
- yyerror("too many digits in octal character escape");
- return strtol(num, 0, 8);
- }
-}
-
-%}
-
-TOK [a-zA-Z_][a-zA-Z0-9_]*|[+-]?[0-9]+
-ID_END [^a-zA-Z0-9_]
-NUM_END [^0-9]
-WS [\t ]*
-HEX [0-9A-Fa-f]
-OCT [0-7]
-
-%x SPECIAL NESTED REGEX REGCLASS STRLIT CHRLIT
-
-%%
-
-<SPECIAL,NESTED>{TOK} {
- long val;
- char *errp;
-
- errno = 0;
-
- val = strtol(yytext, &errp, 10);
-
- if (nesting == 0)
- BEGIN(INITIAL);
-
- if (*errp != 0) {
- /* not a number */
- yylval.lexeme = strdup(yytext);
- return IDENT;
- }
-
- if ((val == LONG_MAX || val == LONG_MIN)
- && errno == ERANGE)
- yyerror("numeric overflow in token");
-
- yylval.num = val;
- return NUMBER;
- }
-
-
-<SPECIAL>\({WS}all{WS}\) {
- BEGIN(INITIAL);
- return ALL;
- }
-
-<SPECIAL>\({WS}some{WS}\) {
- BEGIN(INITIAL);
- return SOME;
- }
-
-<SPECIAL>\({WS}none{WS}\) {
- BEGIN(INITIAL);
- return NONE;
- }
-
-<SPECIAL>\({WS}maybe{WS}\) {
- BEGIN(INITIAL);
- return MAYBE;
- }
-
-<SPECIAL>\({WS}cases{WS}\) {
- BEGIN(INITIAL);
- return CASES;
- }
-
-<SPECIAL>\({WS}and{WS}\) {
- BEGIN(INITIAL);
- return AND;
- }
-
-<SPECIAL>\({WS}or{WS}\) {
- BEGIN(INITIAL);
- return OR;
- }
-
-<SPECIAL>\({WS}end{WS}\) {
- BEGIN(INITIAL);
- return END;
- }
-
-<SPECIAL>\({WS}collect{WS}\) {
- BEGIN(INITIAL);
- return COLLECT;
- }
-
-<SPECIAL>\({WS}coll{WS}\) {
- BEGIN(INITIAL);
- return COLL;
- }
-
-<SPECIAL>\({WS}until{WS}\) {
- BEGIN(INITIAL);
- return UNTIL;
- }
-
-<SPECIAL>\({WS}output{WS}\) {
- BEGIN(INITIAL);
- return OUTPUT;
- }
-
-<SPECIAL>\({WS}repeat{WS}\) {
- BEGIN(INITIAL);
- return REPEAT;
- }
-
-
-<SPECIAL>\({WS}rep{WS}\) {
- BEGIN(INITIAL);
- return REP;
- }
-
-<SPECIAL>\({WS}single{WS}\) {
- BEGIN(INITIAL);
- return SINGLE;
- }
-
-<SPECIAL>\({WS}first{WS}\) {
- BEGIN(INITIAL);
- return FIRST;
- }
-
-<SPECIAL>\({WS}last{WS}\) {
- BEGIN(INITIAL);
- return LAST;
- }
-
-<SPECIAL>\({WS}empty{WS}\) {
- BEGIN(INITIAL);
- return EMPTY;
- }
-
-<SPECIAL>\({WS}define/{ID_END} {
- nesting++;
- closechar = ')';
- BEGIN(NESTED);
- return DEFINE;
- }
-
-<SPECIAL,NESTED>\{|\( {
- nesting++;
- if (yytext[0] == '{')
- closechar = '}';
- else
- closechar = ')';
- BEGIN(NESTED);
- return yytext[0];
- }
-
-<SPECIAL,NESTED>\}|\) {
- if (yytext[0] != closechar) {
- yyerror("paren mismatch");
- BEGIN(INITIAL);
- } else {
- switch (--nesting) {
- case 1:
- BEGIN(SPECIAL);
- break;
- case 0:
- BEGIN(INITIAL);
- break;
- }
-
- return yytext[0];
- }
- }
-
-<SPECIAL,NESTED>[\t ]+ { /* Eat whitespace in directive */ }
-
-<SPECIAL,NESTED>\" {
- BEGIN(STRLIT);
- return '"';
- }
-
-<SPECIAL,NESTED>\' {
- BEGIN(CHRLIT);
- return '\'';
- }
-
-<SPECIAL>@ {
- if (nesting == 0) {
- BEGIN(INITIAL);
- yylval.lexeme = strdup("@");
- return TEXT;
- }
- }
-
-<SPECIAL,NESTED>\n {
- lineno++;
- }
-
-<SPECIAL,NESTED>[/] {
- BEGIN(REGEX);
- return '/';
- }
-
-<SPECIAL,NESTED>\. {
- yylval.chr = '.';
- return '.';
- }
-
-<SPECIAL>[\\][abtnvfre] {
- char lexeme[2];
- lexeme[0] = char_esc(yytext[1]);
- lexeme[1] = 0;
- yylval.lexeme = strdup(lexeme);
- BEGIN(INITIAL);
- return TEXT;
- }
-
-<SPECIAL>[\\](x{HEX}+|{OCT}+) {
- char lexeme[2];
- lexeme[0] = num_esc(yytext + 1);
- lexeme[1] = 0;
- yylval.lexeme = strdup(lexeme);
- BEGIN(INITIAL);
- return TEXT;
- }
-
-<SPECIAL,NESTED>. {
- yyerrorf(0, "bad character in directive: '%c'",
- yytext[0]);
- }
-
-<REGEX>[/] {
- if (nesting == 0)
- BEGIN(INITIAL);
- else
- BEGIN(NESTED);
- yylval.chr = '/';
- return '/';
- }
-
-
-<REGEX>[\\][abtnvfre] {
- yylval.chr = char_esc(yytext[1]);
- return REGCHAR;
- }
-
-<REGEX>[\\](x{HEX}+|{OCT}+) {
- yylval.chr = num_esc(yytext + 1);
- return REGCHAR;
- }
-
-<REGEX>\n {
- lineno++;
- yyerror("newline in regex");
- }
-
-<REGEX>[.*?+^] {
- yylval.chr = yytext[0];
- return yytext[0];
- }
-
-
-<REGEX>[\[\]\-] {
- yylval.chr = yytext[0];
- return yytext[0];
- }
-
-<REGEX>[()|] {
- yylval.chr = yytext[0];
- return yytext[0];
- }
-
-<REGEX>[\\]. {
- yylval.chr = yytext[1];
- return REGCHAR;
- }
-
-<REGEX>. {
- yylval.chr = yytext[0];
- return REGCHAR;
- }
-
-<INITIAL>[^@\n]+ {
- yylval.lexeme = strdup(yytext);
- return TEXT;
- }
-
-<INITIAL>\n {
- lineno++;
- return '\n';
- }
-
-<INITIAL>@{WS}\* {
- BEGIN(SPECIAL);
- return '*';
- }
-
-<INITIAL>@ {
- BEGIN(SPECIAL);
- }
-
-<INITIAL>^@#.*\n {
- /* eat whole line comment */
- lineno++;
- }
-
-<INITIAL>@#.* {
- /* comment to end of line */
- }
-
-<STRLIT>\" {
- if (nesting == 0)
- BEGIN(INITIAL);
- else
- BEGIN(NESTED);
- return '"';
- }
-
-<CHRLIT>\' {
- if (nesting == 0)
- BEGIN(INITIAL);
- else
- BEGIN(NESTED);
- return '\'';
- }
-
-<STRLIT,CHRLIT>[\\][abtnvfre] {
- yylval.chr = char_esc(yytext[1]);
- return LITCHAR;
- }
-
-<STRLIT,CHRLIT>[\\](x{HEX}+|{OCT}+) {
- yylval.chr = num_esc(yytext + 1);
- return LITCHAR;
- }
-<STRLIT>\n {
- yyerror("newline in string literal");
- lineno++;
- yylval.chr = yytext[0];
- return LITCHAR;
- }
-<CHRLIT>\n {
- yyerror("newline in character literal");
- lineno++;
- yylval.chr = yytext[0];
- return LITCHAR;
- }
-<STRLIT,CHRLIT>. {
- yylval.chr = yytext[0];
- return LITCHAR;
- }
-
-%%
-
-void help(void)
-{
- const char *text =
-"\n"
-"txr version %s\n"
-"\n"
-"copyright 2009, Kaz Kylheku <kkylheku@gmail.com>\n"
-"\n"
-"usage:\n"
-"\n"
-" %s [ options ] query-file { data-file }*\n"
-"\n"
-"The query-file or data-file arguments may be specified as -, in which case\n"
-"standard input is used. If these arguments end with a | character, then\n"
-"they are treated as command pipes. Leading arguments which begin with a -\n"
-"followed by one or more characters, and which are not arguments to options\n"
-"are interpreted as options. The -- option indicates the end of the options.\n"
-"\n"
-"If no data-file arguments sare supplied, then the query itself must open a\n"
-"a data source prior to attempting to make any pattern match, or it will\n"
-"simply fail due to a match which has run out of data.\n"
-"\n"
-"options:\n"
-"\n"
-"-Dvar=value Pre-define variable var, with the given value.\n"
-" A list value can be specified using commas.\n"
-"-Dvar Predefine variable var, with empty string value.\n"
-"-q Quiet: don't report errors during query matching.\n"
-"-v Verbose: extra logging from matcher.\n"
-"-b Don't dump list of bindings.\n"
-"-a num Generate array variables up to num-dimensions.\n"
-" Default is 1. Additional dimensions are fudged\n"
-" by generating numeric suffixes\n"
-"--help You already know!\n"
-"--version Display program version\n"
-"\n"
-"Options that take no argument can be combined. The -q and -v options\n"
-"are mutually exclusive; the right-most one dominates.\n"
-"\n"
- ;
- fprintf(stdout, text, version, progname);
-}
-
-void hint(void)
-{
- fprintf(stderr, "%s: incorrect arguments: try --help\n", progname);
-}
-
-int main(int argc, char **argv)
-{
- obj_t *stack_bottom_0 = nil;
- obj_t *spec = nil;
- obj_t *bindings = nil;
- int match_loglevel = opt_loglevel;
- progname = argv[0] ? argv[0] : progname;
- obj_t *stack_bottom_1 = nil;
-
- init(progname, oom_realloc_handler, &stack_bottom_0, &stack_bottom_1);
-
- if (argc <= 1) {
- hint();
- return EXIT_FAILURE;
- }
-
- argc--, argv++;
-
- while (argc > 0 && (*argv)[0] == '-') {
- if (!strcmp(*argv, "--")) {
- argv++, argc--;
- break;
- }
-
- if (!strcmp(*argv, "-"))
- break;
-
- if (!strncmp(*argv, "-D", 2)) {
- char *var = *argv + 2;
- char *equals = strchr(var, '=');
- char *has_comma = (equals != 0) ? strchr(equals, ',') : 0;
-
- if (has_comma) {
- char *val = equals + 1;
- obj_t *list = nil;
-
- *equals = 0;
-
- for (;;) {
- size_t piece = strcspn(val, ",");
- char comma_p = val[piece];
-
- val[piece] = 0;
-
- list = cons(string(strdup(val)), list);
-
- if (!comma_p)
- break;
-
- val += piece + 1;
- }
-
- list = nreverse(list);
- bindings = cons(cons(intern(string(strdup(var))), list), bindings);
- } else if (equals) {
- char *val = equals + 1;
- *equals = 0;
- bindings = cons(cons(intern(string(strdup(var))),
- string(strdup(val))), bindings);
- } else {
- bindings = cons(cons(intern(string(strdup(var))),
- null_string), bindings);
- }
-
- argc--, argv++;
- continue;
- }
-
- if (!strcmp(*argv, "--version")) {
- printf("%s: version %s\n", progname, version);
- return 0;
- }
-
- if (!strcmp(*argv, "--help")) {
- help();
- return 0;
- }
-
- if (!strcmp(*argv, "-a")) {
- long val;
- char *errp;
- char opt = (*argv)[1];
-
- if (argc == 1) {
- fprintf(stderr, "%s: option %c needs argument\n", progname, opt);
-
- return EXIT_FAILURE;
- }
-
- argv++, argc--;
-
- switch (opt) {
- case 'a':
- val = strtol(*argv, &errp, 10);
- if (*errp != 0) {
- fprintf(stderr, "%s: option %c needs numeric argument, not %s\n",
- progname, opt, *argv);
- return EXIT_FAILURE;
- }
-
- opt_arraydims = val;
- break;
- }
-
- argv++, argc--;
- continue;
- }
-
- if (!strcmp(*argv, "--gc-debug")) {
- opt_gc_debug = 1;
- argv++, argc--;
- continue;
- }
-
- {
- char *popt;
- for (popt = (*argv)+1; *popt != 0; popt++) {
- switch (*popt) {
- case 'v':
- match_loglevel = 2;
- break;
- case 'q':
- match_loglevel = 0;
- break;
- case 'b':
- opt_nobindings = 1;
- break;
- case '-':
- fprintf(stderr, "%s: unrecognized long option: --%s\n",
- progname, popt + 1);
- return EXIT_FAILURE;
- default:
- fprintf(stderr, "%s: unrecognized option: %c\n", progname, *popt);
- return EXIT_FAILURE;
- }
- }
-
- argc--, argv++;
- }
- }
-
- if (argc < 1) {
- hint();
- return EXIT_FAILURE;
- }
-
- if (strcmp(*argv, "-") != 0) {
- yyin = fopen(*argv, "r");
- if (yyin == 0) {
- fprintf(stderr, "%s: unable to open %s\n", progname, *argv);
- return EXIT_FAILURE;
- }
- spec_file = *argv;
- }
-
- argc--, argv++;
-
- {
- int gc;
-
- gc = gc_state(0);
- yyparse();
- gc_state(gc);
-
- if (errors)
- return EXIT_FAILURE;
- spec = get_spec();
-
-
- opt_loglevel = match_loglevel;
-
- if (opt_loglevel >= 2) {
- fputs("spec:\n", stderr);
- dump(spec, stderr);
-
- fputs("bindings:\n", stderr);
- dump(bindings, stderr);
- }
-
- {
- int retval;
- list_collect_decl(filenames, iter);
-
- while (*argv)
- list_collect(iter, string(*argv++));
-
- retval = extract(spec, filenames, bindings);
-
- return errors ? EXIT_FAILURE : retval;
- }
- }
-}