diff options
-rw-r--r-- | regex.c | 62 | ||||
-rw-r--r-- | txr.1 | 50 |
2 files changed, 90 insertions, 22 deletions
@@ -2506,7 +2506,7 @@ val match_regex_len(val str, val regex, val pos) } } -val match_regex_right(val str, val regex, val end) +static val match_regex_right_old(val str, val regex, val end) { val pos = zero; val slen = length(str); @@ -2531,6 +2531,52 @@ val match_regex_right(val str, val regex, val end) return nil; } +val match_regex_right(val str, val regex, val end) +{ + val pos = zero; + val len = length(str); + + if (null_or_missing_p(end)) { + end = len; + } else if (minusp(end)) { + end = plus(end, len); + if (lt(end, zero)) + return nil; + } else if (gt(end, len)) { + return nil; + } + + while (lt(pos, end)) { + regex_machine_t regm; + val i ; + regm_result_t last_res = REGM_INCOMPLETE; + + regex_machine_init(®m, regex); + + for (i = pos; lt(i, end); i = plus(i, one)) { + last_res = regex_machine_feed(®m, c_chr(chr_str(str, i))); + if (last_res == REGM_FAIL) + break; + } + + last_res = regex_machine_feed(®m, 0); + + switch (last_res) { + case REGM_MATCH: + regex_machine_cleanup(®m); + return minus(end, pos); + case REGM_INCOMPLETE: + case REGM_FAIL: + regex_machine_cleanup(®m); + break; + } + + pos = succ(pos); + } + + return nil; +} + val regsub(val regex, val repl, val str) { val isfunc = functionp(repl); @@ -2594,6 +2640,14 @@ val match_regst(val str, val regex, val pos_in) return if2(new_pos, sub_str(str, pos, new_pos)); } +static val match_regst_right_old(val str, val regex, val end) +{ + val len = match_regex_right_old(str, regex, end); + return if2(len, if3(null_or_missing_p(end), + sub_str(str, neg(len), t), + sub_str(str, minus(end, len), end))); +} + val match_regst_right(val str, val regex, val end) { val len = match_regex_right(str, regex, end); @@ -2766,9 +2820,11 @@ void regex_init(void) match_regex : match_regex_len, 2)); reg_fun(intern(lit("match-regst"), user_package), func_n3o(match_regst, 2)); reg_fun(intern(lit("match-regex-right"), user_package), - func_n3o(match_regex_right, 2)); + func_n3o((opt_compat && opt_compat <= 150) ? + match_regex_right_old : match_regex_right, 2)); reg_fun(intern(lit("match-regst-right"), user_package), - func_n3o(match_regst_right, 2)); + func_n3o((opt_compat && opt_compat <= 150) ? + match_regst_right_old : match_regst_right, 2)); reg_fun(intern(lit("regsub"), user_package), func_n3(regsub)); reg_fun(intern(lit("regex-parse"), user_package), func_n2o(regex_parse, 1)); @@ -31869,17 +31869,19 @@ matching substring of .syne .desc The -.code match-regex -function tests whether +.code match-regex-right +function tests whether some substring of .meta string -contains a match which ends -precisely on the character just before -.metn end-position . +which terminates at the character position just before +.meta end-position +matches +.metn regex . If .meta end-position is not specified, it defaults to the length of the string, and the function performs a right-anchored regex match. + The .meta end-position argument can be a negative integer, in which case it denotes @@ -31890,23 +31892,26 @@ of the string, then .code nil is returned. +If +.meta end-position +is a positive value beyond the length of +.metn string , +then, likewise, +.code nil +is returned. + If a match is found, then the length of the match is returned. -The match must terminate just before -.meta end-position -in the sense that -additional characters at +A more precise way of articulating the role of .meta end-position -and beyond can no longer satisfy the -regular expression. More formally, the function searches, starting from -position zero, for positions where there occurs a match for the regular -expression, taking the longest possible match. The length of first such a match -which terminates on the character just before +is that for the purposes of matching, +.code string +is considered to terminate just before +.metn end-position : +in other words, that .meta end-position -is returned. -If no such a match is found, then -.code nil -is returned. +is the length of the string. The match is then anchored to the +end of this effective string. The .code match-regst-right @@ -31914,7 +31919,7 @@ differs from .code match-regst-right in the representation of the return value in the matching case. Rather than returning the length of the match, it returns -matching substring of +the matching substring of .metn string . .TP* Examples: @@ -45715,6 +45720,13 @@ the behavior. The function was also affected by this issue; however, since it returned nonsense result not corresponding to the matching text, it was repaired without backward compatibility. +Also affected by version 150 compatibility are the +.code match-regex-right +and +.code match-regst-right +functions. These functions worked as documented; however, their +specification changes after version 150 to a semantics which is +more useful and less surprising to the programmer. .IP 148 Up until version 148, the .code :postinit |