From 27ece3ffc30437f4a70228b96764e306bd5ed4f1 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Tue, 22 Jun 2021 07:15:58 -0700 Subject: lib: optimize mismatch, rmismatch for strings. * lib (mismatch, rmismatch): If the arguments are strings or literals, other than lazy strings, keyfun is identity, and equality is by character identity, the operation can be done with an efficient loop over the wchar_t strings. * tests/012/seq.tl: Tests for string case of mismatch, via starts-with function. Test mismatch via ends-with, and also directly for vectors and strings. --- lib.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++--- tests/012/seq.tl | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 3 deletions(-) diff --git a/lib.c b/lib.c index 8df9757a..dce7b2c3 100644 --- a/lib.c +++ b/lib.c @@ -10882,10 +10882,29 @@ val mismatch(val left, val right, val testfun_in, val keyfun_in) case CONS: case LCONS: return mismatch(right, left, testfun, keyfun); - case VEC: - case LIT: case STR: + case LIT: + switch (type(left)) { + case STR: + case LIT: + if (keyfun == identity_f && (testfun == equal_f || + testfun == eql_f || + testfun == eq_f)) + { + const wchar_t *lft = c_str(left), *le = lft; + const wchar_t *rgt = c_str(right), *ri = rgt; + + while (*le && *ri && *le == *ri) + le++, ri++; + + return if3(*le || *ri, num(le - lft), nil); + } + default: + break; + } + /* fallthrough */ case LSTR: + case VEC: { val llen = length(left); val rlen = length(right); @@ -10913,6 +10932,7 @@ val mismatch(val left, val right, val testfun_in, val keyfun_in) val rmismatch(val left, val right, val testfun_in, val keyfun_in) { + val self = lit("rmismatch"); val testfun = default_arg(testfun_in, equal_f); val keyfun = default_arg(keyfun_in, identity_f); @@ -10980,9 +11000,32 @@ val rmismatch(val left, val right, val testfun_in, val keyfun_in) case CONS: case LCONS: return rmismatch(right, left, testfun, keyfun); - case VEC: case LIT: case STR: + switch (type(left)) { + case STR: + case LIT: + if (keyfun == identity_f && (testfun == equal_f || + testfun == eql_f || + testfun == eq_f)) + { + cnum ll = c_num(length(left), self), li = ll - 1; + cnum rl = c_num(length(right), self), ri = rl - 1; + const wchar_t *lft = c_str(left); + const wchar_t *rgt = c_str(right); + + for (; li >= 0 && ri >= 0; li--, ri--) { + if (lft[li] != rgt[ri]) + break; + } + + return if2(li >= 0 || ri >= 0, num(li - ll)); + } + default: + break; + } + /* fallthrough */ + case VEC: case LSTR: { val llen = length(left); diff --git a/tests/012/seq.tl b/tests/012/seq.tl index 95ba7b6e..936dc8c9 100644 --- a/tests/012/seq.tl +++ b/tests/012/seq.tl @@ -82,3 +82,52 @@ [reduce-left cons #(1) : (op * 10)] 10 [reduce-left cons #(1) 2 (op * 10)] (2 . 10) [reduce-left cons #(2 3) 10 (op * 10)] ((10 . 20) . 30)) + +(mtest + (starts-with "" "") t + (starts-with "" "a") t + (starts-with "a" "") nil + (starts-with "a" "a") t + (starts-with "" "abc") t + (starts-with "abc" "") nil + (starts-with "abc" "abc") t + (starts-with "ab" "abc") t + (starts-with "bc" "abc") nil + ) + +(mtest + (ends-with "" "") t + (ends-with "" "a") t + (ends-with "a" "") nil + (ends-with "a" "a") t + (ends-with "" "abc") t + (ends-with "abc" "") nil + (ends-with "abc" "abc") t + (ends-with "ab" "abc") nil + (ends-with "bc" "abc") t) + +(mtest + (rmismatch #() #()) nil + (rmismatch #(1) #()) -1 + (rmismatch #() #(1)) -1 + (rmismatch #(1) #(1)) nil + (rmismatch #(1 2) #(1 2)) nil + (rmismatch #(2 2) #(1 2)) -2 + (rmismatch #(1 2) #(2 2)) -2 + (rmismatch #(3 2 1) #(1 1)) -2 + (rmismatch #(1 1) #(3 2 1)) -2 + (rmismatch #(3 2 1) #(2 1)) -3 + (rmismatch #(2 1) #(3 2 1)) -3) + +(mtest + (rmismatch "" "") nil + (rmismatch "1" "") -1 + (rmismatch "" "1") -1 + (rmismatch "1" "1") nil + (rmismatch "12" "12") nil + (rmismatch "22" "12") -2 + (rmismatch "12" "22") -2 + (rmismatch "321" "11") -2 + (rmismatch "11" "321") -2 + (rmismatch "321" "21") -3 + (rmismatch "21" "321") -3) -- cgit v1.2.3