/* Copyright 2009 * Kaz Kylheku * Vancouver, Canada * All rights reserved. * * BSD License: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ %{ #include #include #include #include #include "lib.h" #include "regex.h" #include "utf8.h" #include "parser.h" int yylex(void); void yyerror(const char *); obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts); obj_t *define_transform(obj_t *define_form); obj_t *lit_char_helper(obj_t *litchars); static obj_t *parsed_spec; %} %union { wchar_t *lexeme; union obj *obj; wchar_t chr; long num; } %token TEXT IDENT ALL SOME NONE MAYBE CASES AND OR END COLLECT %token UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE %token TRY CATCH FINALLY %token NUMBER %token REGCHAR LITCHAR %type spec clauses clauses_opt clause %type all_clause some_clause none_clause maybe_clause %type cases_clause collect_clause clause_parts additional_parts %type output_clause define_clause try_clause catch_clauses_opt %type line elems_opt elems elem var var_op %type list exprs expr out_clauses out_clauses_opt out_clause %type repeat_clause repeat_parts_opt o_line %type o_elems_opt o_elems_opt2 o_elems o_elem rep_elem rep_parts_opt %type regex regexpr regbranch %type regterm regclass regclassterm regrange %type strlit chrlit quasilit quasi_items quasi_item litchars %type regchar %nonassoc ALL SOME NONE MAYBE CASES AND OR END COLLECT UNTIL COLL %nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY DEFINE %nonassoc '{' '}' '[' ']' '(' ')' %right IDENT TEXT NUMBER %left '|' '/' %right '*' '?' '+' %right '^' '.' '\\' REGCHAR LITCHAR %% spec : clauses { parsed_spec = $1; } | /* empty */ { parsed_spec = nil; } | error { parsed_spec = nil; yybadtoken(yychar, 0); } ; clauses : clause { $$ = cons($1, nil); } | clause clauses { $$ = cons($1, $2); } ; clauses_opt : clauses { $$ = $1; } | /* empty */ { $$ = nil; } ; clause : all_clause { $$ = list(num(lineno - 1), $1, nao); } | some_clause { $$ = list(num(lineno - 1), $1, nao); } | none_clause { $$ = list(num(lineno - 1), $1, nao); } | maybe_clause { $$ = list(num(lineno - 1), $1, nao); } | cases_clause { $$ = list(num(lineno - 1), $1, nao); } | collect_clause { $$ = list(num(lineno - 1), $1, nao); } | define_clause { $$ = list(num(lineno - 1), define_transform($1), nao); } | try_clause { $$ = list(num(lineno - 1), $1, nao); } | output_clause { $$ = list(num(lineno - 1), $1, nao); } | line { $$ = $1; } | repeat_clause { $$ = nil; yyerror("repeat outside of output"); } ; all_clause : ALL newl clause_parts { $$ = cons(all, $3); } | ALL newl error { $$ = nil; yybadtoken(yychar, "all clause"); } | ALL newl END newl { $$ = nil; yyerror("empty all clause"); } ; some_clause : SOME newl clause_parts { $$ = cons(some, $3); } | SOME newl error { $$ = nil; yybadtoken(yychar, "some clause"); } | SOME newl END newl { $$ = nil; yyerror("empty some clause"); } ; none_clause : NONE newl clause_parts { $$ = cons(none, $3); } | NONE newl error { $$ = nil; yybadtoken(yychar, "none clause"); } | NONE newl END newl { $$ = nil; yyerror("empty none clause"); } ; maybe_clause : MAYBE newl clause_parts { $$ = cons(maybe, $3); } | MAYBE newl error { $$ = nil; yybadtoken(yychar, "maybe clause"); } | MAYBE newl END newl { $$ = nil; yyerror("empty maybe clause"); } ; cases_clause : CASES newl clause_parts { $$ = cons(cases, $3); } | CASES newl error { $$ = nil; yybadtoken(yychar, "cases clause"); } | CASES newl END newl { $$ = nil; yyerror("empty cases clause"); } ; collect_clause : COLLECT newl clauses END newl { $$ = list(collect, $3, nao); } | COLLECT newl clauses UNTIL newl clauses END newl { $$ = list(collect, $3, $6, nao); } | COLLECT newl error { $$ = nil; if (yychar == UNTIL || yychar == END) yyerror("empty collect"); else yybadtoken(yychar, "collect clause"); } ; clause_parts : clauses additional_parts { $$ = cons($1, $2); } ; additional_parts : END newl { $$ = nil; } | AND newl clauses additional_parts { $$ = cons($3, $4); } | OR newl clauses additional_parts { $$ = cons($3, $4); } ; line : elems_opt '\n' { $$ = $1; } ; elems_opt : elems { $$ = cons(num(lineno - 1), $1); } | { $$ = nil; } ; elems : elem { $$ = cons($1, nil); } | elem elems { $$ = cons($1, $2); } | rep_elem { $$ = nil; yyerror("rep outside of output"); } ; elem : TEXT { $$ = string_own($1); } | var { $$ = $1; } | list { $$ = $1; } | regex { $$ = cons(regex_compile($1), $1); } | COLL elems END { $$ = list(coll, $2, nao); } | COLL elems UNTIL elems END { $$ = list(coll, $2, $4, nao); } | COLL error { $$ = nil; yybadtoken(yychar, "coll clause"); } ; define_clause : DEFINE exprs ')' newl clauses_opt END newl { $$ = list(define, $2, $5, nao); } | DEFINE ')' newl clauses_opt END newl { $$ = list(define, nil, $4, nao); } | DEFINE error { yybadtoken(yychar, "list expression"); } | DEFINE exprs ')' newl error { yybadtoken(yychar, "define"); } | DEFINE ')' newl error { yybadtoken(yychar, "define"); } ; try_clause : TRY newl clauses catch_clauses_opt END newl { $$ = list(try, flatten(mapcar(func_n1(second), $4)), $3, $4, nao); } | TRY newl error { $$ = nil; if (yychar == END || yychar == CATCH || yychar == FINALLY) yyerror("empty try clause"); else yybadtoken(yychar, "try clause"); } | TRY newl clauses error { $$ = nil; yybadtoken(yychar, "try clause"); } ; catch_clauses_opt : CATCH ')' newl clauses_opt catch_clauses_opt { $$ = cons(list(catch, cons(t, nil), $4, nao), $5); } | CATCH exprs ')' newl clauses_opt catch_clauses_opt { $$ = cons(list(catch, $2, $5, nao), $6); } | FINALLY newl clauses_opt { $$ = cons(list(finally, nil, $3, nao), nil); } | { $$ = nil; } | CATCH ')' newl error { yybadtoken(yychar, "try clause"); } | CATCH exprs ')' newl error { yybadtoken(yychar, "try clause"); } | FINALLY newl error { yybadtoken(yychar, "try clause"); } ; output_clause : OUTPUT ')' o_elems '\n' out_clauses END newl { $$ = list(output, $5, $3, nao); } | OUTPUT ')' newl out_clauses END newl { $$ = list(output, $4, nao); } | OUTPUT exprs ')' newl out_clauses END newl { $$ = list(output, $5, nil, $2, nao); } | OUTPUT exprs ')' o_elems '\n' out_clauses END newl { yyerror("invalid combination of old and " "new syntax in output directive"); } | OUTPUT error { yybadtoken(yychar, "list expression"); } | OUTPUT ')' o_elems '\n' error { $$ = nil; yybadtoken(yychar, "output clause"); } | OUTPUT ')' newl error { $$ = nil; yybadtoken(yychar, "output clause"); } | OUTPUT exprs ')' o_elems '\n' error { $$ = nil; yybadtoken(yychar, "output clause"); } | OUTPUT exprs ')' newl error { $$ = nil; yybadtoken(yychar, "output clause"); } ; out_clauses : out_clause { $$ = cons($1, nil); } | out_clause out_clauses { $$ = cons($1, $2); } ; out_clause : repeat_clause { $$ = list(num(lineno - 1), $1, nao); } | o_line { $$ = $1; } | all_clause { $$ = nil; yyerror("match clause in output"); } | some_clause { $$ = nil; yyerror("match clause in output"); } | none_clause { $$ = nil; yyerror("match clause in output"); } | maybe_clause { $$ = nil; yyerror("match clause in output"); } | cases_clause { $$ = nil; yyerror("match clause in output"); } | collect_clause { $$ = nil; yyerror("match clause in output"); } | define_clause { $$ = nil; yyerror("match clause in output"); } | try_clause { $$ = nil; yyerror("match clause in output"); } | output_clause { $$ = nil; yyerror("match clause in output"); } ; repeat_clause : REPEAT newl out_clauses repeat_parts_opt END newl { $$ = repeat_rep_helper(repeat, $3, $4); } | REPEAT newl error { $$ = nil; yybadtoken(yychar, "repeat clause"); } ; repeat_parts_opt : SINGLE newl out_clauses_opt repeat_parts_opt { $$ = cons(cons(single, $3), $4); } | FIRST newl out_clauses_opt repeat_parts_opt { $$ = cons(cons(frst, $3), $4); } | LAST newl out_clauses_opt repeat_parts_opt { $$ = cons(cons(lst, $3), $4); } | EMPTY newl out_clauses_opt repeat_parts_opt { $$ = cons(cons(empty, $3), $4); } | /* empty */ { $$ = nil; } ; out_clauses_opt : out_clauses { $$ = $1; } | /* empty */ { $$ = null_list; } o_line : o_elems_opt '\n' { $$ = $1; } ; o_elems_opt : o_elems { $$ = cons(num(lineno - 1), $1); } | { $$ = nil; } ; o_elems_opt2 : o_elems { $$ = $1; } | { $$ = null_list; } ; o_elems : o_elem { $$ = cons($1, nil); } | o_elem o_elems { $$ = cons($1, $2); } ; o_elem : TEXT { $$ = string_own($1); } | var { $$ = $1; } | rep_elem { $$ = $1; } ; rep_elem : REP o_elems rep_parts_opt END { $$ = repeat_rep_helper(rep, $2, $3); } | REP error { $$ = nil; yybadtoken(yychar, "rep clause"); } ; rep_parts_opt : SINGLE o_elems_opt2 rep_parts_opt { $$ = cons(cons(single, $2), $3); } | FIRST o_elems_opt2 rep_parts_opt { $$ = cons(cons(frst, $2), $3); } | LAST o_elems_opt2 rep_parts_opt { $$ = cons(cons(lst, $2), $3); } | EMPTY o_elems_opt2 rep_parts_opt { $$ = cons(cons(empty, $2), $3); } | /* empty */ { $$ = nil; } ; /* This sucks, but factoring '*' into a nonterminal * that generates an empty phrase causes reduce/reduce conflicts. */ var : IDENT { $$ = list(var, intern(string_own($1)), nao); } | IDENT elem { $$ = list(var, intern(string_own($1)), $2, nao); } | '{' IDENT '}' { $$ = list(var, intern(string_own($2)), nao); } | '{' IDENT '}' elem { $$ = list(var, intern(string_own($2)), $4, nao); } | '{' IDENT regex '}' { $$ = list(var, intern(string_own($2)), nil, cons(regex_compile($3), $3), nao); } | '{' IDENT NUMBER '}' { $$ = list(var, intern(string_own($2)), nil, num($3), nao); } | var_op IDENT { $$ = list(var, intern(string_own($2)), nil, $1, nao); } | var_op IDENT elem { $$ = list(var, intern(string_own($2)), $3, $1, nao); } | var_op '{' IDENT '}' { $$ = list(var, intern(string_own($3)), nil, $1, nao); } | var_op '{' IDENT '}' elem { $$ = list(var, intern(string_own($3)), $5, $1, nao); } | var_op '{' IDENT regex '}' { yyerror("longest match " "not useable with regex"); } | var_op '{' IDENT NUMBER '}' { yyerror("longest match " "not useable with " "fixed width match"); } | IDENT error { $$ = nil; yybadtoken(yychar, "variable spec"); } | var_op error { $$ = nil; yybadtoken(yychar, "variable spec"); } ; var_op : '*' { $$ = t; } ; list : '(' exprs ')' { $$ = $2; } | '(' ')' { $$ = nil; } | '(' error { $$ = nil; yybadtoken(yychar, "list expression"); } ; exprs : expr { $$ = cons($1, nil); } | expr exprs { $$ = cons($1, $2); } | expr '.' expr { $$ = cons($1, $3); } ; expr : IDENT { $$ = intern(string_own($1)); } | NUMBER { $$ = num($1); } | list { $$ = $1; } | regex { $$ = cons(regex_compile($1), $1); } | chrlit { $$ = $1; } | strlit { $$ = $1; } | quasilit { $$ = $1; } ; regex : '/' regexpr '/' { $$ = $2; } | '/' '/' { $$ = nil; } | '/' error { $$ = nil; yybadtoken(yychar, "regex"); } ; regexpr : regbranch { $$ = $1; } | regbranch '|' regbranch { $$ = list(list(or, $1, $3, nao), nao); } ; regbranch : regterm { $$ = cons($1, nil); } | regterm regbranch { $$ = cons($1, $2); } ; regterm : '[' regclass ']' { $$ = cons(set, $2); } | '[' '^' regclass ']' { $$ = cons(cset, $3); } | '.' { $$ = wild; } | '^' { $$ = chr('^'); } | ']' { $$ = chr(']'); } | '-' { $$ = chr('-'); } | regterm '*' { $$ = list(zeroplus, $1, nao); } | regterm '+' { $$ = list(oneplus, $1, nao); } | regterm '?' { $$ = list(optional, $1, nao); } | REGCHAR { $$ = chr($1); } | '(' regexpr ')' { $$ = cons(compound, $2); } | '(' error { $$ = nil; yybadtoken(yychar, "regex subexpression"); } | '[' error { $$ = nil; yybadtoken(yychar, "regex character class"); } ; regclass : regclassterm { $$ = cons($1, nil); } | regclassterm regclass { $$ = cons($1, $2); } ; regclassterm : regrange { $$ = $1; } | regchar { $$ = chr($1); } ; regrange : regchar '-' regchar { $$ = cons(chr($1), chr($3)); } regchar : '?' { $$ = '?'; } | '.' { $$ = '.'; } | '*' { $$ = '*'; } | '+' { $$ = '+'; } | '(' { $$ = '('; } | ')' { $$ = ')'; } | '^' { $$ = '^'; } | '|' { $$ = '|'; } | REGCHAR { $$ = $1; } ; newl : '\n' | error '\n' { yyerror("newline expected after directive"); yyerrok; } ; strlit : '"' '"' { $$ = null_string; } | '"' litchars '"' { $$ = lit_char_helper($2); } | '"' error { yybadtoken(yychar, "string literal"); } ; chrlit : '\'' '\'' { yyerror("empty character literal"); } { $$ = nil; } | '\'' litchars '\'' { $$ = car($2); if (cdr($2)) yyerror("multiple characters in " "character literal"); } | '\'' error { $$ = nil; yybadtoken(yychar, "character literal"); } ; quasilit : '`' '`' { $$ = null_string; } | '`' quasi_items '`' { $$ = cons(quasi, $2); } | '`' error { $$ = nil; yybadtoken(yychar, "string literal"); } ; quasi_items : quasi_item { $$ = cons($1, nil); } | quasi_item quasi_items { $$ = cons($1, $2); } ; quasi_item : litchars { $$ = lit_char_helper($1); } | TEXT { $$ = string_own($1); } | var { $$ = $1; } | list { $$ = $1; } ; litchars : LITCHAR { $$ = cons(chr($1), nil); } | LITCHAR litchars { $$ = cons(chr($1), $2); } ; %% obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts) { obj_t *single_parts = nil; obj_t *first_parts = nil; obj_t *last_parts = nil; obj_t *empty_parts = nil; obj_t *iter; for (iter = parts; iter != nil; iter = cdr(iter)) { obj_t *part = car(iter); obj_t *sym = car(part); obj_t *clauses = cdr(part); if (sym == single) single_parts = nappend2(single_parts, clauses); else if (sym == frst) first_parts = nappend2(first_parts, clauses); else if (sym == lst) last_parts = nappend2(last_parts, clauses); else if (sym == empty) empty_parts = nappend2(empty_parts, clauses); else abort(); } return list(sym, main, single_parts, first_parts, last_parts, empty_parts, nao); } obj_t *define_transform(obj_t *define_form) { obj_t *sym = first(define_form); obj_t *args = second(define_form); if (define_form == nil) return nil; assert (sym == define); if (args == nil) { yyerror("define requires arguments"); return define_form; } if (!consp(args) || !listp(cdr(args))) { yyerror("bad define argument syntax"); return define_form; } else { obj_t *name = first(args); obj_t *params = second(args); if (!symbolp(name)) { yyerror("function name must be a symbol"); return define_form; } if (!proper_listp(params)) { yyerror("invalid function parameter list"); return define_form; } if (!all_satisfy(params, func_n1(symbolp), nil)) yyerror("function parameters must be symbols"); } return define_form; } obj_t *lit_char_helper(obj_t *litchars) { obj_t *ret = nil; if (litchars) { obj_t *len = length(litchars), *iter, *ix; ret = mkustring(len); for (iter = litchars, ix = zero; iter; iter = cdr(iter), ix = plus(ix, one)) { chr_str_set(ret, ix, car(iter)); } } else { ret = nil; } return ret; } obj_t *get_spec(void) { return parsed_spec; }