summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2012-03-01 01:05:05 -0800
committerKaz Kylheku <kaz@kylheku.com>2012-03-01 01:05:05 -0800
commit90fd6a84d08ac95f4886e6ae7e42429f75561bfa (patch)
tree9e5df1c2c6f30fa2d296785fedd39231e4efd85d
parentaec0b3b3645d241ea8ad1f06dd64716b7d9c3c00 (diff)
downloadtxr-90fd6a84d08ac95f4886e6ae7e42429f75561bfa.tar.gz
txr-90fd6a84d08ac95f4886e6ae7e42429f75561bfa.tar.bz2
txr-90fd6a84d08ac95f4886e6ae7e42429f75561bfa.zip
Fixing two instances of unintentional O(n*n) behavior and poor memory use
that occur in horizontal matching of basic text patterns. * lib.c (match_str, match_str_tree): New functions. * lib.h (match_str, match_str_tree): Declared. * match.c (do_match_line): Use match_str_tree and match_str when matching strings and string lists, respectively, rather than stupidly calling search functions and then asserting that the match was found at the starting position.
-rw-r--r--ChangeLog14
-rw-r--r--lib.c35
-rw-r--r--lib.h2
-rw-r--r--match.c11
4 files changed, 56 insertions, 6 deletions
diff --git a/ChangeLog b/ChangeLog
index f8f120a4..dfc2da59 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2012-03-01 Kaz Kylheku <kaz@kylheku.com>
+
+ Fixing two instances of unintentional O(n*n) behavior and poor memory use
+ that occur in horizontal matching of basic text patterns.
+
+ * lib.c (match_str, match_str_tree): New functions.
+
+ * lib.h (match_str, match_str_tree): Declared.
+
+ * match.c (do_match_line): Use match_str_tree and match_str when matching
+ strings and string lists, respectively, rather than stupidly calling
+ search functions and then asserting that the match was found at the
+ starting position.
+
2012-02-28 Kaz Kylheku <kaz@kylheku.com>
* match.c (do_match_line): Function takes new argument, "completely".
diff --git a/lib.c b/lib.c
index b23b1598..6b80b848 100644
--- a/lib.c
+++ b/lib.c
@@ -1523,6 +1523,41 @@ val search_str_tree(val haystack, val tree, val start_num, val from_end)
return nil;
}
+val match_str(val bigstr, val str, val pos)
+{
+ val i, p;
+
+ for (i = zero;
+ length_str_gt(bigstr, p = plus(pos, i)) && length_str_gt(str, i);
+ i = plus(i, one))
+ {
+ if (chr_str(bigstr, p) != chr_str(str, i))
+ return nil;
+ }
+
+ return length_str_le(str, i) ? t : nil;
+}
+
+val match_str_tree(val bigstr, val tree, val pos)
+{
+ if (stringp(tree)) {
+ if (match_str(bigstr, tree, pos))
+ return length_str(tree);
+ } else if (consp(tree)) {
+ val maxlen = nil;
+
+ for (; tree; tree = cdr(tree)) {
+ val result = match_str_tree(bigstr, car(tree), pos);
+ if (result && (!maxlen || gt(result, maxlen)))
+ maxlen = result;
+ }
+
+ return maxlen;
+ }
+
+ return nil;
+}
+
static val lazy_sub_str(val lstr, val from, val to)
{
val len = nil;
diff --git a/lib.h b/lib.h
index 8c50eb9b..a143a128 100644
--- a/lib.h
+++ b/lib.h
@@ -425,6 +425,8 @@ val length_str(val str);
const wchar_t *c_str(val str);
val search_str(val haystack, val needle, val start_num, val from_end);
val search_str_tree(val haystack, val tree, val start_num, val from_end);
+val match_str(val bigstr, val str, val pos);
+val match_str_tree(val bigstr, val tree, val pos);
val replace_str(val str_in, val items, val from, val to);
val sub_str(val str_in, val from_num, val to_num);
val cat_str(val list, val sep);
diff --git a/match.c b/match.c
index 736e2bed..a65d6d00 100644
--- a/match.c
+++ b/match.c
@@ -1121,15 +1121,15 @@ static val do_match_line(match_line_ctx *c, val completely)
LOG_MATCH("regex", past);
c->pos = past;
} else if (consp(directive) || stringp(directive)) {
- cons_bind (find, len, search_str_tree(c->dataline, elem, c->pos, nil));
+ val len = match_str_tree(c->dataline, elem, c->pos);
val newpos;
- if (find == nil || !equal(find, c->pos)) {
+ if (!len) {
LOG_MISMATCH("string tree");
debug_return (nil);
}
- newpos = plus(find, len);
+ newpos = plus(c->pos, len);
LOG_MATCH("string tree", newpos);
c->pos = newpos;
} else {
@@ -1182,13 +1182,12 @@ static val do_match_line(match_line_ctx *c, val completely)
break;
case STR:
{
- val find = search_str(c->dataline, elem, c->pos, nil);
val newpos;
- if (find == nil || !equal(find, c->pos)) {
+ if (!match_str(c->dataline, elem, c->pos)) {
LOG_MISMATCH("string");
debug_return (nil);
}
- newpos = plus(find, length_str(elem));
+ newpos = plus(c->pos, length_str(elem));
LOG_MATCH("string", newpos);
c->pos = newpos;
break;