diff options
-rw-r--r-- | regex.c | 92 | ||||
-rw-r--r-- | txr.1 | 66 |
2 files changed, 110 insertions, 48 deletions
@@ -2394,62 +2394,74 @@ static regm_result_t regex_machine_feed(regex_machine_t *regm, wchar_t ch) val search_regex(val haystack, val needle_regex, val start, val from_end) { + val slen = nil; start = default_arg(start, zero); from_end = default_bool_arg(from_end); - if (length_str_lt(haystack, start)) { - return nil; + if (minusp(start)) { + slen = length_str(haystack); + start = plus(start, slen); + if (minusp(start)) + start = zero; + } + + if (from_end) { + cnum i; + cnum s = c_num(start); + const wchar_t *h = c_str(haystack); + + slen = (slen ? slen : length_str(haystack)); + + if (regex_run(needle_regex, L"") >= 0) + return cons(slen, zero); + + for (i = c_num(slen) - 1; i >= s; i--) { + cnum span = regex_run(needle_regex, h + i); + if (span >= 0) + return cons(num(i), num(span)); + } + + gc_hint(haystack); } else { - if (from_end) { - cnum i; - cnum s = c_num(start); - const wchar_t *h = c_str(haystack); - - for (i = c_num(length_str(haystack)) - 1; i >= s; i--) { - cnum span = regex_run(needle_regex, h + i); - if (span >= 0) - return cons(num(i), num(span)); - } + regex_machine_t regm; + val i, pos = start, retval; + regm_result_t last_res = REGM_INCOMPLETE; - gc_hint(haystack); - } else { - regex_machine_t regm; - val i, pos = start, retval; - regm_result_t last_res = REGM_INCOMPLETE; + if (length_str_lt(haystack, pos)) + return nil; - regex_machine_init(®m, needle_regex); + regex_machine_init(®m, needle_regex); again: - for (i = pos; length_str_gt(haystack, i); i = plus(i, one)) { - last_res = regex_machine_feed(®m, c_chr(chr_str(haystack, i))); + for (i = pos; length_str_gt(haystack, i); i = plus(i, one)) { + last_res = regex_machine_feed(®m, c_chr(chr_str(haystack, i))); + if (last_res == REGM_FAIL) { + last_res = regex_machine_feed(®m, 0); if (last_res == REGM_FAIL) { - last_res = regex_machine_feed(®m, 0); - if (last_res == REGM_FAIL) { - regex_machine_reset(®m); - pos = plus(pos, one); - goto again; - } - break; + regex_machine_reset(®m); + pos = plus(pos, one); + goto again; } + break; } + } - last_res = regex_machine_feed(®m, 0); + last_res = regex_machine_feed(®m, 0); - switch (last_res) { - case REGM_INCOMPLETE: - case REGM_MATCH: - retval = cons(pos, num(regex_machine_match_span(®m))); - regex_machine_cleanup(®m); - return retval; - case REGM_FAIL: - regex_machine_cleanup(®m); - return nil; - } + switch (last_res) { + case REGM_INCOMPLETE: + case REGM_MATCH: + retval = cons(pos, num(regex_machine_match_span(®m))); + regex_machine_cleanup(®m); + return retval; + case REGM_FAIL: + regex_machine_cleanup(®m); + return nil; } - - return nil; } + + return nil; } val range_regex(val haystack, val needle_regex, val start, @@ -32162,6 +32162,7 @@ at position .meta start for a match for .metn regex . + If .meta start is omitted, the search starts at position 0. If @@ -32169,9 +32170,37 @@ is omitted, the search starts at position 0. If is specified and has a .cod2 non- nil value, the search -proceeds in reverse, from the last position in the string, toward +proceeds in reverse, from the position just beyond the last character of +.metn string , +toward .metn start . -This function returns + +if +.meta start +exceeds the length of the string, then +.code search-regex +returns +.codn nil . + +If +.meta start +is negative then it indicates positions from the end of the string, +such that -1 is the last character, -2 the second last and so forth. +If the value is so negative that it refers beyond the start of +the string, then the starting position is deemed to be zero. + +If +.meta start +is equal to the length of +.metn string , +and thus refers to the position one character past its +length, then a match occurs at that position if +.meta regex +admits such a match. + +The +.code search-regex +function returns .code nil if no match is found, otherwise it returns a cons, whose @@ -32180,6 +32209,18 @@ indicates the position of the match, and whose .code cdr indicates the length of the match. +If +.meta regex +is capable of matching empty strings, and no other kind of match +is found within +.metn string , +then search regex reports a zero length match. If +.meta from-end +is false, then this match is reported at +.metn start , +otherwise it is reported at the position one character beyond +the end of the string. + The .code range-regex function is similar to @@ -32733,13 +32774,22 @@ argument defaults to the length of so that the end position coincides with the end of the string. -A value in either parameter which is excessively -negative or positive, such that it indexes before -the start of the string or exceeds its length -results in a failed match and consequently -.code nil +With one exception, a value in either parameter which is excessively negative +or positive, such that it indexes before the start of the string or exceeds its +length results in a failed match and consequently +.codn nil +being returned. The exception is that the +.code rr +function permits a negative +.meta position +value which refers before the start of the string; this is effectively +treated as zero. + +The +.meta from-end +argument defaults to +.codn nil . -being returned. The .code r^$ function tests whether the entire portion of |