diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2010-01-26 14:12:35 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2010-01-26 14:12:35 -0800 |
commit | a0d9dcfe97e824e25174a3bfc9116e505eac0a51 (patch) | |
tree | 3b6bd544c75344f0bdb81f0c34226be77519c129 /regex.c | |
parent | 16a6ea72db8469f64f31903bea7b223f0072bca6 (diff) | |
download | txr-a0d9dcfe97e824e25174a3bfc9116e505eac0a51.tar.gz txr-a0d9dcfe97e824e25174a3bfc9116e505eac0a51.tar.bz2 txr-a0d9dcfe97e824e25174a3bfc9116e505eac0a51.zip |
Optimization in derivative-based regex engine.
Exponential memory consumption behavior was observed when
matching the input aaaaaa....
against the regex a?a?a?a?....aaaa....
The fix is to eliminate common subexpressions
from the derivative for the or operator.
Diffstat (limited to 'regex.c')
-rw-r--r-- | regex.c | 55 |
1 files changed, 54 insertions, 1 deletions
@@ -1237,6 +1237,59 @@ static val reg_nullable(val exp) } } +static val flatten_or(val or_expr) +{ + if (atom(or_expr) || car(or_expr) != or_s) { + return cons(or_expr, nil); + } else { + val left = second(or_expr); + val right = third(or_expr); + return nappend2(flatten_or(left), flatten_or(right)); + } +} + +static val unflatten_or(val exlist) +{ + val f = first(exlist); + val r = rest(exlist); + + if (r) { + return cons(or_s, cons(f, cons(unflatten_or(r), nil))); + } else { + return f; + } +} + +static val unique_first(val exlist) +{ + val f = first(exlist); + val r = rest(exlist); + + if (!memqual(f, r)) + return cons(first(exlist), nil); + return nil; +} + +static val reduce_or(val or_expr) +{ + val left = second(or_expr); + val right = third(or_expr); + + /* + * Do optimization only if this is an or of two or expressions. + */ + + if (consp(left) && first(left) == or_s && + consp(right) && first(right) == or_s) + { + val exlist = flatten_or(or_expr); + val repeats_removed = mapcon(func_n1(unique_first), exlist); + return unflatten_or(repeats_removed); + } else { + return or_expr; + } +} + static val reg_derivative(val, val); static val reg_derivative_list(val exp_list, val ch) @@ -1335,7 +1388,7 @@ static val reg_derivative(val exp, val ch) if (d_arg2 == t) return d_arg1; - return cons(or_s, cons(d_arg1, cons(d_arg2, nil))); + return reduce_or(cons(or_s, cons(d_arg1, cons(d_arg2, nil)))); } else if (sym == and_s) { val d_arg1 = reg_derivative(first(args), ch); val d_arg2 = nil; |