diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2012-03-01 01:05:05 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2012-03-01 01:05:05 -0800 |
commit | 90fd6a84d08ac95f4886e6ae7e42429f75561bfa (patch) | |
tree | 9e5df1c2c6f30fa2d296785fedd39231e4efd85d | |
parent | aec0b3b3645d241ea8ad1f06dd64716b7d9c3c00 (diff) | |
download | txr-90fd6a84d08ac95f4886e6ae7e42429f75561bfa.tar.gz txr-90fd6a84d08ac95f4886e6ae7e42429f75561bfa.tar.bz2 txr-90fd6a84d08ac95f4886e6ae7e42429f75561bfa.zip |
Fixing two instances of unintentional O(n*n) behavior and poor memory use
that occur in horizontal matching of basic text patterns.
* lib.c (match_str, match_str_tree): New functions.
* lib.h (match_str, match_str_tree): Declared.
* match.c (do_match_line): Use match_str_tree and match_str when matching
strings and string lists, respectively, rather than stupidly calling
search functions and then asserting that the match was found at the
starting position.
-rw-r--r-- | ChangeLog | 14 | ||||
-rw-r--r-- | lib.c | 35 | ||||
-rw-r--r-- | lib.h | 2 | ||||
-rw-r--r-- | match.c | 11 |
4 files changed, 56 insertions, 6 deletions
@@ -1,3 +1,17 @@ +2012-03-01 Kaz Kylheku <kaz@kylheku.com> + + Fixing two instances of unintentional O(n*n) behavior and poor memory use + that occur in horizontal matching of basic text patterns. + + * lib.c (match_str, match_str_tree): New functions. + + * lib.h (match_str, match_str_tree): Declared. + + * match.c (do_match_line): Use match_str_tree and match_str when matching + strings and string lists, respectively, rather than stupidly calling + search functions and then asserting that the match was found at the + starting position. + 2012-02-28 Kaz Kylheku <kaz@kylheku.com> * match.c (do_match_line): Function takes new argument, "completely". @@ -1523,6 +1523,41 @@ val search_str_tree(val haystack, val tree, val start_num, val from_end) return nil; } +val match_str(val bigstr, val str, val pos) +{ + val i, p; + + for (i = zero; + length_str_gt(bigstr, p = plus(pos, i)) && length_str_gt(str, i); + i = plus(i, one)) + { + if (chr_str(bigstr, p) != chr_str(str, i)) + return nil; + } + + return length_str_le(str, i) ? t : nil; +} + +val match_str_tree(val bigstr, val tree, val pos) +{ + if (stringp(tree)) { + if (match_str(bigstr, tree, pos)) + return length_str(tree); + } else if (consp(tree)) { + val maxlen = nil; + + for (; tree; tree = cdr(tree)) { + val result = match_str_tree(bigstr, car(tree), pos); + if (result && (!maxlen || gt(result, maxlen))) + maxlen = result; + } + + return maxlen; + } + + return nil; +} + static val lazy_sub_str(val lstr, val from, val to) { val len = nil; @@ -425,6 +425,8 @@ val length_str(val str); const wchar_t *c_str(val str); val search_str(val haystack, val needle, val start_num, val from_end); val search_str_tree(val haystack, val tree, val start_num, val from_end); +val match_str(val bigstr, val str, val pos); +val match_str_tree(val bigstr, val tree, val pos); val replace_str(val str_in, val items, val from, val to); val sub_str(val str_in, val from_num, val to_num); val cat_str(val list, val sep); @@ -1121,15 +1121,15 @@ static val do_match_line(match_line_ctx *c, val completely) LOG_MATCH("regex", past); c->pos = past; } else if (consp(directive) || stringp(directive)) { - cons_bind (find, len, search_str_tree(c->dataline, elem, c->pos, nil)); + val len = match_str_tree(c->dataline, elem, c->pos); val newpos; - if (find == nil || !equal(find, c->pos)) { + if (!len) { LOG_MISMATCH("string tree"); debug_return (nil); } - newpos = plus(find, len); + newpos = plus(c->pos, len); LOG_MATCH("string tree", newpos); c->pos = newpos; } else { @@ -1182,13 +1182,12 @@ static val do_match_line(match_line_ctx *c, val completely) break; case STR: { - val find = search_str(c->dataline, elem, c->pos, nil); val newpos; - if (find == nil || !equal(find, c->pos)) { + if (!match_str(c->dataline, elem, c->pos)) { LOG_MISMATCH("string"); debug_return (nil); } - newpos = plus(find, length_str(elem)); + newpos = plus(c->pos, length_str(elem)); LOG_MATCH("string", newpos); c->pos = newpos; break; |