summaryrefslogtreecommitdiffstats
path: root/extract.y
diff options
context:
space:
mode:
Diffstat (limited to 'extract.y')
-rw-r--r--extract.y279
1 files changed, 250 insertions, 29 deletions
diff --git a/extract.y b/extract.y
index 54dbb747..5ac61b2b 100644
--- a/extract.y
+++ b/extract.y
@@ -45,6 +45,7 @@ int yylex(void);
void yyerror(const char *);
obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts);
+obj_t *define_transform(obj_t *define_form);
static obj_t *parsed_spec;
static int output_produced;
@@ -58,23 +59,23 @@ static int output_produced;
long num;
}
-%token <lexeme> TEXT IDENT ALL SOME NONE MAYBE AND OR END COLLECT UNTIL COLL
-%token <lexeme> OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY
+%token <lexeme> TEXT IDENT ALL SOME NONE MAYBE CASES AND OR END COLLECT
+%token <lexeme> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE
%token <num> NUMBER
-%token <chr> REGCHAR
+%token <chr> REGCHAR LITCHAR
%type <obj> spec clauses clause all_clause some_clause none_clause maybe_clause
-%type <obj> collect_clause clause_parts additional_parts output_clause
-%type <obj> line elems_opt elems elem var var_op list exprs expr
-%type <obj> out_clauses out_clauses_opt out_clause
+%type <obj> cases_clause collect_clause clause_parts additional_parts
+%type <obj> output_clause define_clause line elems_opt elems elem var var_op
+%type <obj> list exprs expr out_clauses out_clauses_opt out_clause
%type <obj> repeat_clause repeat_parts_opt o_line
%type <obj> o_elems_opt o_elems_opt2 o_elems o_elem rep_elem rep_parts_opt
%type <obj> regex regexpr regbranch
%type <obj> regterm regclass regclassterm regrange
+%type <obj> strlit chrlit litchars
%type <chr> regchar
-
-%nonassoc ALL SOME NONE MAYBE AND OR END COLLECT UNTIL COLL
-%nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY
+%nonassoc ALL SOME NONE MAYBE CASES AND OR END COLLECT UNTIL COLL
+%nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY DEFINE
%nonassoc '{' '}' '[' ']' '(' ')'
%right IDENT TEXT NUMBER
%left '|' '/'
@@ -97,7 +98,10 @@ clause : all_clause { $$ = list(num(lineno - 1), $1, nao); }
| some_clause { $$ = list(num(lineno - 1), $1, nao); }
| none_clause { $$ = list(num(lineno - 1), $1, nao); }
| maybe_clause { $$ = list(num(lineno - 1), $1, nao); }
+ | cases_clause { $$ = list(num(lineno - 1), $1, nao); }
| collect_clause { $$ = list(num(lineno - 1), $1, nao); }
+ | define_clause { $$ = list(num(lineno - 1),
+ define_transform($1), nao); }
| output_clause { $$ = list(num(lineno - 1), $1, nao); }
| line { $$ = $1; }
| repeat_clause { $$ = nil;
@@ -108,7 +112,7 @@ all_clause : ALL newl clause_parts { $$ = cons(all, $3); }
| ALL newl error { $$ = nil;
yybadtoken(yychar,
"all clause"); }
- | ALL newl END { $$ = nil;
+ | ALL newl END newl { $$ = nil;
yyerror("empty all clause"); }
;
@@ -117,7 +121,7 @@ some_clause : SOME newl clause_parts { $$ = cons(some, $3); }
| SOME newl error { $$ = nil;
yybadtoken(yychar,
"some clause"); }
- | SOME newl END { $$ = nil;
+ | SOME newl END newl { $$ = nil;
yyerror("empty some clause"); }
;
@@ -125,7 +129,7 @@ none_clause : NONE newl clause_parts { $$ = cons(none, $3); }
| NONE newl error { $$ = nil;
yybadtoken(yychar,
"none clause"); }
- | NONE newl END { $$ = nil;
+ | NONE newl END newl { $$ = nil;
yyerror("empty none clause"); }
;
@@ -133,10 +137,18 @@ maybe_clause : MAYBE newl clause_parts { $$ = cons(maybe, $3); }
| MAYBE newl error { $$ = nil;
yybadtoken(yychar,
"maybe clause"); }
- | MAYBE newl END { $$ = nil;
+ | MAYBE newl END newl { $$ = nil;
yyerror("empty maybe clause"); }
;
+cases_clause : CASES newl clause_parts { $$ = cons(cases, $3); }
+ | CASES newl error { $$ = nil;
+ yybadtoken(yychar,
+ "cases clause"); }
+ | CASES newl END newl { $$ = nil;
+ yyerror("empty cases clause"); }
+ ;
+
collect_clause : COLLECT newl clauses END newl { $$ = list(collect, $3, nao); }
| COLLECT newl clauses
UNTIL newl clauses END newl { $$ = list(collect, $3,
@@ -181,6 +193,23 @@ elem : TEXT { $$ = string($1); }
yybadtoken(yychar, "coll clause"); }
;
+define_clause : DEFINE exprs ')' newl
+ clauses
+ END newl { $$ = list(define, $2, $5, nao); }
+ | DEFINE ')' newl
+ clauses
+ END newl { $$ = list(define, nil, $4, nao); }
+ | DEFINE exprs ')' newl
+ END newl { $$ = list(define, $2, nao); }
+ | DEFINE ')' newl
+ END newl { $$ = list(define, nao); }
+ | DEFINE error { yybadtoken(yychar, "list expression"); }
+ | DEFINE exprs ')' newl
+ error { yybadtoken(yychar, "define"); }
+ | DEFINE ')' newl
+ error { yybadtoken(yychar, "define"); }
+ ;
+
output_clause : OUTPUT o_elems '\n'
out_clauses
END newl { $$ = list(output, $4, $2, nao); }
@@ -209,8 +238,12 @@ out_clause : repeat_clause { $$ = list(num(lineno - 1), $1, nao); }
yyerror("match clause in output"); }
| maybe_clause { $$ = nil;
yyerror("match clause in output"); }
+ | cases_clause { $$ = nil;
+ yyerror("match clause in output"); }
| collect_clause { $$ = nil;
yyerror("match clause in output"); }
+ | define_clause { $$ = nil;
+ yyerror("match clause in output"); }
| output_clause { $$ = nil;
yyerror("match clause in output"); }
;
@@ -324,6 +357,8 @@ expr : IDENT { $$ = intern(string($1)); }
| NUMBER { $$ = num($1); }
| list { $$ = $1; }
| regex { $$ = cons(regex_compile($1), $1); }
+ | chrlit { $$ = $1; }
+ | strlit { $$ = $1; }
;
regex : '/' regexpr '/' { $$ = $2; }
@@ -384,6 +419,36 @@ newl : '\n'
yyerrok; }
;
+strlit : '"' '"' { $$ = null_string; }
+ | '"' litchars '"' {
+ if ($2) {
+ obj_t *len = length($2), *iter, *ix;
+ $$ = mkustring(len);
+ for (iter = $2, ix = zero;
+ iter;
+ iter = cdr(iter), ix = plus(ix, one))
+ {
+ chr_str_set($$, ix, car(iter));
+ }
+ } else {
+ $$ = nil;
+ }
+ }
+ | '"' error { yybadtoken(yychar, "string literal"); }
+ ;
+
+chrlit : '\'' '\'' { yyerror("empty character literal");
+ $$ = nil; }
+ | '\'' litchars '\'' { $$ = car($2);
+ if (cdr($2))
+ yyerror("multiple characters in "
+ "character literal"); }
+ | '\'' error { yybadtoken(yychar, "character literal"); }
+ ;
+
+litchars : LITCHAR { $$ = cons(chr($1), nil); }
+ | LITCHAR litchars { $$ = cons(chr($1), $2); }
+ ;
%%
obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts)
@@ -415,6 +480,45 @@ obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts)
last_parts, empty_parts, nao);
}
+obj_t *define_transform(obj_t *define_form)
+{
+ obj_t *sym = first(define_form);
+ obj_t *args = second(define_form);
+
+ if (define_form == nil)
+ return nil;
+
+ assert (sym == define);
+
+ if (args == nil) {
+ yyerror("define requires arguments");
+ return define_form;
+ }
+
+ if (!consp(args) || !listp(cdr(args))) {
+ yyerror("bad define argument syntax");
+ return define_form;
+ } else {
+ obj_t *name = first(args);
+ obj_t *params = second(args);
+
+ if (!symbolp(name)) {
+ yyerror("function name must be a symbol");
+ return define_form;
+ }
+
+ if (!proper_listp(params)) {
+ yyerror("invalid function parameter list");
+ return define_form;
+ }
+
+ if (!all_satisfy(params, func_n1(symbolp), nil))
+ yyerror("function parameters must be symbols");
+ }
+
+ return define_form;
+}
+
obj_t *get_spec(void)
{
return parsed_spec;
@@ -443,12 +547,19 @@ void dump_var(const char *name, char *pfx1, size_t len1,
if (len1 >= 112 || len2 >= 112)
abort();
- if (stringp(value)) {
+ if (stringp(value) || chrp(value)) {
fputs(name, stdout);
fputs(pfx1, stdout);
fputs(pfx2, stdout);
putchar('=');
- dump_shell_string(c_str(value));
+ if (stringp(value)) {
+ dump_shell_string(c_str(value));
+ } else {
+ char mini[2];
+ mini[0] = c_chr(value);
+ mini[1] = 0;
+ dump_shell_string(mini);
+ }
putchar('\n');
} else {
obj_t *iter;
@@ -572,6 +683,13 @@ obj_t *dest_bind(obj_t *bindings, obj_t *pattern, obj_t *value)
return bindings;
}
+obj_t *eval_form(obj_t *form, obj_t *bindings)
+{
+ if (symbolp(form))
+ return assoc(bindings, form);
+ return cons(t, form);
+}
+
obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
obj_t *pos, obj_t *spec_lineno, obj_t *data_lineno,
obj_t *file)
@@ -1192,7 +1310,7 @@ repeat_spec_same_data:
long reps = 0;
if (rest(specline))
- yyerrorlf(1, spec_lineno, "material after skip directive ignored");
+ yyerrorlf(1, spec_lineno, "unexpected material after skip directive");
if ((spec = rest(spec)) == nil)
break;
@@ -1229,7 +1347,7 @@ repeat_spec_same_data:
return nil;
} else if (sym == trailer) {
if (rest(specline))
- yyerrorlf(1, spec_lineno, "material after trailer directive ignored");
+ yyerrorlf(1, spec_lineno, "unexpected material after trailer directive");
if ((spec = rest(spec)) == nil)
break;
@@ -1245,7 +1363,7 @@ repeat_spec_same_data:
} else if (sym == block) {
obj_t *name = first(rest(first_spec));
if (rest(specline))
- yyerrorlf(1, spec_lineno, "material after block directive ignored");
+ yyerrorlf(1, spec_lineno, "unexpected material after block directive");
if ((spec = rest(spec)) == nil)
break;
uw_block_begin(name, result);
@@ -1256,7 +1374,7 @@ repeat_spec_same_data:
obj_t *target = first(rest(first_spec));
if (rest(specline))
- yyerrorlf(1, spec_lineno, "material after %s ignored",
+ yyerrorlf(1, spec_lineno, "unexpected material after %s",
c_str(symbol_name(sym)));
uw_block_return(target,
@@ -1302,7 +1420,9 @@ repeat_spec_same_data:
if3(data, cons(data, num(data_lineno)), t));
return nil;
}
- } else if (sym == some || sym == all || sym == none || sym == maybe) {
+ } else if (sym == some || sym == all || sym == none || sym == maybe ||
+ sym == cases)
+ {
obj_t *specs;
obj_t *all_match = t;
obj_t *some_match = nil;
@@ -1331,6 +1451,8 @@ repeat_spec_same_data:
max_data = new_data;
}
}
+ if (sym == cases)
+ break;
} else {
all_match = nil;
}
@@ -1341,8 +1463,8 @@ repeat_spec_same_data:
return nil;
}
- if (sym == some && !some_match) {
- yyerrorlf(2, spec_lineno, "some: no clauses matched");
+ if ((sym == some || sym == cases) && !some_match) {
+ yyerrorlf(2, spec_lineno, "some/cases: no clauses matched");
return nil;
}
@@ -1514,15 +1636,13 @@ repeat_spec_same_data:
} else if (sym == bind) {
obj_t *args = rest(first_spec);
obj_t *pattern = first(args);
- obj_t *var = second(args);
- obj_t *lookup = assoc(bindings, var);
+ obj_t *form = second(args);
+ obj_t *val = eval_form(form, bindings);
- if (!var || !symbolp(var))
- yyerrorlf(1, spec_lineno, "bind: bad variable spec");
- else if (!lookup)
- yyerrorlf(1, spec_lineno, "bind: unbound source variable");
+ if (!val)
+ yyerrorlf(1, spec_lineno, "bind: unbound variable on right side");
- bindings = dest_bind(bindings, pattern, cdr(lookup));
+ bindings = dest_bind(bindings, pattern, cdr(val));
if (bindings == t)
return nil;
@@ -1581,6 +1701,107 @@ repeat_spec_same_data:
break;
goto repeat_spec_same_data;
+ } else if (sym == define) {
+ obj_t *args = second(first_spec);
+ obj_t *body = third(first_spec);
+ obj_t *name = first(args);
+ obj_t *params = second(args);
+
+ if (rest(specline))
+ yyerrorlf(1, spec_lineno, "unexpected material after define");
+
+ uw_set_func(name, cons(params, body));
+
+ if ((spec = rest(spec)) == nil)
+ break;
+
+ goto repeat_spec_same_data;
+ } else {
+ obj_t *func = uw_get_func(sym);
+
+ if (func) {
+ obj_t *args = rest(first_spec);
+ obj_t *params = car(func);
+ obj_t *body = cdr(func);
+ obj_t *piter, *aiter;
+ obj_t *bindings_cp = copy_alist(bindings);
+
+ if (!equal(length(args), length(params))) {
+ yyerrorlf(1, spec_lineno, "function %s takes %ld argument(s)",
+ c_str(sym), c_num(length(params)));
+ return nil;
+ }
+
+ for (piter = params, aiter = args; piter;
+ piter = cdr(piter), aiter = cdr(aiter))
+ {
+ obj_t *param = car(piter);
+ obj_t *arg = car(aiter);
+
+ if (symbolp(arg)) {
+ obj_t *existing = assoc(bindings, arg);
+ if (existing) {
+ bindings_cp = acons_new(bindings_cp,
+ param,
+ cdr(existing));
+ } else {
+ bindings_cp = alist_remove(bindings_cp, cons(param, nil));
+ }
+ } else {
+ bindings_cp = acons_new(bindings_cp, param, arg);
+ }
+ }
+
+ {
+ uw_block_begin(nil, result);
+ uw_env_begin;
+ result = match_files(body, files, bindings_cp,
+ data, num(data_lineno));
+ uw_env_end;
+ uw_block_end;
+
+ if (!result) {
+ yyerrorlf(2, spec_lineno, "function failed");
+ return nil;
+ }
+
+ {
+ cons_bind (new_bindings, success, result);
+
+ for (piter = params, aiter = args; piter;
+ piter = cdr(piter), aiter = cdr(aiter))
+ {
+ obj_t *param = car(piter);
+ obj_t *arg = car(aiter);
+
+ if (symbolp(arg)) {
+ obj_t *newbind = assoc(new_bindings, param);
+ if (newbind) {
+ bindings = dest_bind(bindings, arg, cdr(newbind));
+ if (bindings == t)
+ return nil;
+ }
+ }
+ }
+
+ if (consp(success)) {
+ yyerrorlf(2, spec_lineno,
+ "function matched; advancing from line %ld to %ld",
+ data_lineno, c_num(cdr(success)));
+ data = car(success);
+ data_lineno = c_num(cdr(success));
+ } else {
+ yyerrorlf(2, spec_lineno, "function consumed entire file");
+ data = nil;
+ }
+ }
+ }
+
+ if ((spec = rest(spec)) == nil)
+ break;
+
+ goto repeat_spec_same_data;
+ }
}
}