summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--regex.c62
-rw-r--r--txr.150
2 files changed, 90 insertions, 22 deletions
diff --git a/regex.c b/regex.c
index 8368a8b8..090bcbdd 100644
--- a/regex.c
+++ b/regex.c
@@ -2506,7 +2506,7 @@ val match_regex_len(val str, val regex, val pos)
}
}
-val match_regex_right(val str, val regex, val end)
+static val match_regex_right_old(val str, val regex, val end)
{
val pos = zero;
val slen = length(str);
@@ -2531,6 +2531,52 @@ val match_regex_right(val str, val regex, val end)
return nil;
}
+val match_regex_right(val str, val regex, val end)
+{
+ val pos = zero;
+ val len = length(str);
+
+ if (null_or_missing_p(end)) {
+ end = len;
+ } else if (minusp(end)) {
+ end = plus(end, len);
+ if (lt(end, zero))
+ return nil;
+ } else if (gt(end, len)) {
+ return nil;
+ }
+
+ while (lt(pos, end)) {
+ regex_machine_t regm;
+ val i ;
+ regm_result_t last_res = REGM_INCOMPLETE;
+
+ regex_machine_init(&regm, regex);
+
+ for (i = pos; lt(i, end); i = plus(i, one)) {
+ last_res = regex_machine_feed(&regm, c_chr(chr_str(str, i)));
+ if (last_res == REGM_FAIL)
+ break;
+ }
+
+ last_res = regex_machine_feed(&regm, 0);
+
+ switch (last_res) {
+ case REGM_MATCH:
+ regex_machine_cleanup(&regm);
+ return minus(end, pos);
+ case REGM_INCOMPLETE:
+ case REGM_FAIL:
+ regex_machine_cleanup(&regm);
+ break;
+ }
+
+ pos = succ(pos);
+ }
+
+ return nil;
+}
+
val regsub(val regex, val repl, val str)
{
val isfunc = functionp(repl);
@@ -2594,6 +2640,14 @@ val match_regst(val str, val regex, val pos_in)
return if2(new_pos, sub_str(str, pos, new_pos));
}
+static val match_regst_right_old(val str, val regex, val end)
+{
+ val len = match_regex_right_old(str, regex, end);
+ return if2(len, if3(null_or_missing_p(end),
+ sub_str(str, neg(len), t),
+ sub_str(str, minus(end, len), end)));
+}
+
val match_regst_right(val str, val regex, val end)
{
val len = match_regex_right(str, regex, end);
@@ -2766,9 +2820,11 @@ void regex_init(void)
match_regex : match_regex_len, 2));
reg_fun(intern(lit("match-regst"), user_package), func_n3o(match_regst, 2));
reg_fun(intern(lit("match-regex-right"), user_package),
- func_n3o(match_regex_right, 2));
+ func_n3o((opt_compat && opt_compat <= 150) ?
+ match_regex_right_old : match_regex_right, 2));
reg_fun(intern(lit("match-regst-right"), user_package),
- func_n3o(match_regst_right, 2));
+ func_n3o((opt_compat && opt_compat <= 150) ?
+ match_regst_right_old : match_regst_right, 2));
reg_fun(intern(lit("regsub"), user_package), func_n3(regsub));
reg_fun(intern(lit("regex-parse"), user_package), func_n2o(regex_parse, 1));
diff --git a/txr.1 b/txr.1
index 60cbee60..91b1a4a0 100644
--- a/txr.1
+++ b/txr.1
@@ -31869,17 +31869,19 @@ matching substring of
.syne
.desc
The
-.code match-regex
-function tests whether
+.code match-regex-right
+function tests whether some substring of
.meta string
-contains a match which ends
-precisely on the character just before
-.metn end-position .
+which terminates at the character position just before
+.meta end-position
+matches
+.metn regex .
If
.meta end-position
is not specified, it defaults to the length of the string, and the function
performs a right-anchored regex match.
+
The
.meta end-position
argument can be a negative integer, in which case it denotes
@@ -31890,23 +31892,26 @@ of the string, then
.code nil
is returned.
+If
+.meta end-position
+is a positive value beyond the length of
+.metn string ,
+then, likewise,
+.code nil
+is returned.
+
If a match is found, then the length of the match is returned.
-The match must terminate just before
-.meta end-position
-in the sense that
-additional characters at
+A more precise way of articulating the role of
.meta end-position
-and beyond can no longer satisfy the
-regular expression. More formally, the function searches, starting from
-position zero, for positions where there occurs a match for the regular
-expression, taking the longest possible match. The length of first such a match
-which terminates on the character just before
+is that for the purposes of matching,
+.code string
+is considered to terminate just before
+.metn end-position :
+in other words, that
.meta end-position
-is returned.
-If no such a match is found, then
-.code nil
-is returned.
+is the length of the string. The match is then anchored to the
+end of this effective string.
The
.code match-regst-right
@@ -31914,7 +31919,7 @@ differs from
.code match-regst-right
in the representation of the return value in the matching case.
Rather than returning the length of the match, it returns
-matching substring of
+the matching substring of
.metn string .
.TP* Examples:
@@ -45715,6 +45720,13 @@ the behavior. The
function was also affected by this issue; however, since it returned nonsense
result not corresponding to the matching text, it was repaired without
backward compatibility.
+Also affected by version 150 compatibility are the
+.code match-regex-right
+and
+.code match-regst-right
+functions. These functions worked as documented; however, their
+specification changes after version 150 to a semantics which is
+more useful and less surprising to the programmer.
.IP 148
Up until version 148, the
.code :postinit