From 977d61d2229d3517226edfb3a32b929c6d95d19b Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Thu, 24 Sep 2015 21:16:56 -0700 Subject: regex: major optimization for complement operator. This change a huge improvement for expressions that use complement, directly or via the non-greedy % operator. * regex.c (reg_matches_all): New static function. (reg_derivative): When the dervative is applied to a complement expression, identify situations when the remaining expression cannot possibly match anything, and convert them to the t expression. --- regex.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) (limited to 'regex.c') diff --git a/regex.c b/regex.c index 9cfd60d8..cb490d40 100644 --- a/regex.c +++ b/regex.c @@ -1450,6 +1450,46 @@ static val reg_nullable(val exp) } } +static val reg_matches_all(val exp) +{ + if (atom(exp)) { + return nil; + } else { + val sym = first(exp), args = rest(exp); + + if (sym == set_s || sym == cset_s) { + return nil; + } else if (sym == compound_s) { + val am = nil; + for (; args; args = cdr(args)) { + val arg = car(args); + if (!reg_nullable(arg)) + return nil; + if (!am && reg_matches_all(arg)) + am = t; + } + return am; + } else if (sym == oneplus_s) { + return reg_matches_all(car(args)); + } else if (sym == zeroplus_s) { + val arg = car(args); + if (arg == wild_s || reg_matches_all(arg)) + return t; + return nil; + } else if (sym == optional_s) { + return reg_matches_all(car(args)); + } else if (sym == compl_s) { + return tnil(car(args)); + } else if (sym == or_s) { + return tnil(reg_matches_all(pop(&args)) || reg_matches_all(pop(&args))); + } else if (sym == and_s) { + return tnil(reg_matches_all(pop(&args)) && reg_matches_all(pop(&args))); + } else { + internal_error("bad operator in regex"); + } + } +} + static val flatten_or(val or_expr) { if (atom(or_expr) || car(or_expr) != or_s) { @@ -1588,7 +1628,12 @@ static val reg_derivative(val exp, val ch) return exp; return cons(compound_s, cons(d_arg, cons(exp, nil))); } else if (sym == compl_s) { - return cons(sym, cons(reg_derivative(first(args), ch), nil)); + val d_arg = reg_derivative(first(args), ch); + if (reg_matches_all(d_arg)) + return t; + if (d_arg == t) + return nil; + return cons(sym, cons(d_arg, nil)); } else if (sym == or_s) { val d_arg1 = reg_derivative(first(args), ch); val d_arg2 = reg_derivative(second(args), ch); -- cgit v1.2.3