diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2016-10-03 06:47:29 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2016-10-03 06:47:29 -0700 |
commit | 4fbc51dadaeb3d25887cec5bf824b8992a960b02 (patch) | |
tree | 2036769ba4ae18dc90aea7ec3a70f93fd61d649d /regex.c | |
parent | 51c3df60e45f40335904ce6255896ee0f661d856 (diff) | |
download | txr-4fbc51dadaeb3d25887cec5bf824b8992a960b02.tar.gz txr-4fbc51dadaeb3d25887cec5bf824b8992a960b02.tar.bz2 txr-4fbc51dadaeb3d25887cec5bf824b8992a960b02.zip |
search-regex improvement: negative start and more.
* regex.c (search_regex): Handle negative starting positions
according to the convention elsewhere and fail excessively
negative ones. Consistently fail on starting positions
exceeding the length of the string. Handle zero length
matches by reporting them against the start position
or position one past the last character, based on the
value of from-end.
* txr.1: search-regex documentation updated.
Diffstat (limited to 'regex.c')
-rw-r--r-- | regex.c | 92 |
1 files changed, 52 insertions, 40 deletions
@@ -2394,62 +2394,74 @@ static regm_result_t regex_machine_feed(regex_machine_t *regm, wchar_t ch) val search_regex(val haystack, val needle_regex, val start, val from_end) { + val slen = nil; start = default_arg(start, zero); from_end = default_bool_arg(from_end); - if (length_str_lt(haystack, start)) { - return nil; + if (minusp(start)) { + slen = length_str(haystack); + start = plus(start, slen); + if (minusp(start)) + start = zero; + } + + if (from_end) { + cnum i; + cnum s = c_num(start); + const wchar_t *h = c_str(haystack); + + slen = (slen ? slen : length_str(haystack)); + + if (regex_run(needle_regex, L"") >= 0) + return cons(slen, zero); + + for (i = c_num(slen) - 1; i >= s; i--) { + cnum span = regex_run(needle_regex, h + i); + if (span >= 0) + return cons(num(i), num(span)); + } + + gc_hint(haystack); } else { - if (from_end) { - cnum i; - cnum s = c_num(start); - const wchar_t *h = c_str(haystack); - - for (i = c_num(length_str(haystack)) - 1; i >= s; i--) { - cnum span = regex_run(needle_regex, h + i); - if (span >= 0) - return cons(num(i), num(span)); - } + regex_machine_t regm; + val i, pos = start, retval; + regm_result_t last_res = REGM_INCOMPLETE; - gc_hint(haystack); - } else { - regex_machine_t regm; - val i, pos = start, retval; - regm_result_t last_res = REGM_INCOMPLETE; + if (length_str_lt(haystack, pos)) + return nil; - regex_machine_init(®m, needle_regex); + regex_machine_init(®m, needle_regex); again: - for (i = pos; length_str_gt(haystack, i); i = plus(i, one)) { - last_res = regex_machine_feed(®m, c_chr(chr_str(haystack, i))); + for (i = pos; length_str_gt(haystack, i); i = plus(i, one)) { + last_res = regex_machine_feed(®m, c_chr(chr_str(haystack, i))); + if (last_res == REGM_FAIL) { + last_res = regex_machine_feed(®m, 0); if (last_res == REGM_FAIL) { - last_res = regex_machine_feed(®m, 0); - if (last_res == REGM_FAIL) { - regex_machine_reset(®m); - pos = plus(pos, one); - goto again; - } - break; + regex_machine_reset(®m); + pos = plus(pos, one); + goto again; } + break; } + } - last_res = regex_machine_feed(®m, 0); + last_res = regex_machine_feed(®m, 0); - switch (last_res) { - case REGM_INCOMPLETE: - case REGM_MATCH: - retval = cons(pos, num(regex_machine_match_span(®m))); - regex_machine_cleanup(®m); - return retval; - case REGM_FAIL: - regex_machine_cleanup(®m); - return nil; - } + switch (last_res) { + case REGM_INCOMPLETE: + case REGM_MATCH: + retval = cons(pos, num(regex_machine_match_span(®m))); + regex_machine_cleanup(®m); + return retval; + case REGM_FAIL: + regex_machine_cleanup(®m); + return nil; } - - return nil; } + + return nil; } val range_regex(val haystack, val needle_regex, val start, |