diff options
-rw-r--r-- | ChangeLog | 11 | ||||
-rw-r--r-- | parser.h | 1 | ||||
-rw-r--r-- | parser.y | 3 | ||||
-rw-r--r-- | txr.1 | 7 |
4 files changed, 17 insertions, 5 deletions
@@ -1,5 +1,16 @@ 2010-01-19 Kaz Kylheku <kkylheku@gmail.com> + * parser.y (regex): Getting rid of empty '/' '/' production + again. + (regexpr): Re-introducing empty production; this time using + %prec LOW trick to give this interpretation the lowest + possible precedence. Thus expressions like /&/ work again. + (regbranch): New production to allow R1~R2 to be valid. + + * txr.1: Documented. + +2010-01-19 Kaz Kylheku <kkylheku@gmail.com> + * parser.l (grammar): The ^ character is no longer considered a special regex token, just a regular character. @@ -36,3 +36,4 @@ void yyerror(const char *s); void yyerrorf(val s, ...); void yybadtoken(int tok, val context); void end_of_regex(void); +int yylex(void); @@ -451,7 +451,6 @@ expr : IDENT { $$ = intern(string_own($1), nil); } ; regex : '/' regexpr '/' { $$ = $2; end_of_regex(); } - | '/' '/' { $$ = nil; end_of_regex(); } | '/' error { $$ = nil; yybadtoken(yychar, lit("regex")); end_of_regex(); } @@ -463,10 +462,12 @@ regexpr : regbranch { $$ = if3(cdr($1), | regexpr '|' regexpr { $$ = list(or_s, $1, $3, nao); } | regexpr '&' regexpr { $$ = list(and_s, $1, $3, nao); } | '~' regexpr { $$ = list(compl_s, $2, nao); } + | /* empty */ %prec LOW { $$ = nil; } ; regbranch : regterm %prec LOW { $$ = cons($1, nil); } | regterm regbranch { $$ = cons($1, $2); } + | regterm '~' regexpr { $$ = list($1, list(compl_s, $3, nao), nao); } ; regterm : regterm '*' { $$ = list(zeroplus_s, $1, nao); } @@ -695,7 +695,7 @@ string, then R1%R2 is equivalent to R1*. .IP ~R match the complement of the following expression R; i.e. match those texts that R does not match. This operator is called complement, -or logical not. +or logical not. The form R1~R2 is permitted and means R1(~R2) .IP R1R2 Two consecutive regular expressions denote catenation: the left expression must match, and then the right. @@ -735,9 +735,8 @@ means ab((c*)%(d*ef)). The left argument of % is c*, but the right is the entire expression d*ef. The unary complement operator has the next lower precedence, so -that ~A* means the ~(A*): "match the all text that is not matched by zero -or more repetitions of A", not "match zero or more times the text -not matched by A". +that ~AB means ~(AB) not (~A)B. AB~CD means (AB)~(CD) where +the (CD) is complemented, and catenated to (AB). Catenation is on the next lower precedence rung, so that AB? means A(B?), or "match A, and then optionally B", not "match A and B, as one optional |