summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--regex.c92
-rw-r--r--txr.166
2 files changed, 110 insertions, 48 deletions
diff --git a/regex.c b/regex.c
index 80923521..ed2fc2d5 100644
--- a/regex.c
+++ b/regex.c
@@ -2394,62 +2394,74 @@ static regm_result_t regex_machine_feed(regex_machine_t *regm, wchar_t ch)
val search_regex(val haystack, val needle_regex, val start,
val from_end)
{
+ val slen = nil;
start = default_arg(start, zero);
from_end = default_bool_arg(from_end);
- if (length_str_lt(haystack, start)) {
- return nil;
+ if (minusp(start)) {
+ slen = length_str(haystack);
+ start = plus(start, slen);
+ if (minusp(start))
+ start = zero;
+ }
+
+ if (from_end) {
+ cnum i;
+ cnum s = c_num(start);
+ const wchar_t *h = c_str(haystack);
+
+ slen = (slen ? slen : length_str(haystack));
+
+ if (regex_run(needle_regex, L"") >= 0)
+ return cons(slen, zero);
+
+ for (i = c_num(slen) - 1; i >= s; i--) {
+ cnum span = regex_run(needle_regex, h + i);
+ if (span >= 0)
+ return cons(num(i), num(span));
+ }
+
+ gc_hint(haystack);
} else {
- if (from_end) {
- cnum i;
- cnum s = c_num(start);
- const wchar_t *h = c_str(haystack);
-
- for (i = c_num(length_str(haystack)) - 1; i >= s; i--) {
- cnum span = regex_run(needle_regex, h + i);
- if (span >= 0)
- return cons(num(i), num(span));
- }
+ regex_machine_t regm;
+ val i, pos = start, retval;
+ regm_result_t last_res = REGM_INCOMPLETE;
- gc_hint(haystack);
- } else {
- regex_machine_t regm;
- val i, pos = start, retval;
- regm_result_t last_res = REGM_INCOMPLETE;
+ if (length_str_lt(haystack, pos))
+ return nil;
- regex_machine_init(&regm, needle_regex);
+ regex_machine_init(&regm, needle_regex);
again:
- for (i = pos; length_str_gt(haystack, i); i = plus(i, one)) {
- last_res = regex_machine_feed(&regm, c_chr(chr_str(haystack, i)));
+ for (i = pos; length_str_gt(haystack, i); i = plus(i, one)) {
+ last_res = regex_machine_feed(&regm, c_chr(chr_str(haystack, i)));
+ if (last_res == REGM_FAIL) {
+ last_res = regex_machine_feed(&regm, 0);
if (last_res == REGM_FAIL) {
- last_res = regex_machine_feed(&regm, 0);
- if (last_res == REGM_FAIL) {
- regex_machine_reset(&regm);
- pos = plus(pos, one);
- goto again;
- }
- break;
+ regex_machine_reset(&regm);
+ pos = plus(pos, one);
+ goto again;
}
+ break;
}
+ }
- last_res = regex_machine_feed(&regm, 0);
+ last_res = regex_machine_feed(&regm, 0);
- switch (last_res) {
- case REGM_INCOMPLETE:
- case REGM_MATCH:
- retval = cons(pos, num(regex_machine_match_span(&regm)));
- regex_machine_cleanup(&regm);
- return retval;
- case REGM_FAIL:
- regex_machine_cleanup(&regm);
- return nil;
- }
+ switch (last_res) {
+ case REGM_INCOMPLETE:
+ case REGM_MATCH:
+ retval = cons(pos, num(regex_machine_match_span(&regm)));
+ regex_machine_cleanup(&regm);
+ return retval;
+ case REGM_FAIL:
+ regex_machine_cleanup(&regm);
+ return nil;
}
-
- return nil;
}
+
+ return nil;
}
val range_regex(val haystack, val needle_regex, val start,
diff --git a/txr.1 b/txr.1
index 37c5a4a8..ba023aa2 100644
--- a/txr.1
+++ b/txr.1
@@ -32162,6 +32162,7 @@ at position
.meta start
for a match for
.metn regex .
+
If
.meta start
is omitted, the search starts at position 0. If
@@ -32169,9 +32170,37 @@ is omitted, the search starts at position 0. If
is specified and has a
.cod2 non- nil
value, the search
-proceeds in reverse, from the last position in the string, toward
+proceeds in reverse, from the position just beyond the last character of
+.metn string ,
+toward
.metn start .
-This function returns
+
+if
+.meta start
+exceeds the length of the string, then
+.code search-regex
+returns
+.codn nil .
+
+If
+.meta start
+is negative then it indicates positions from the end of the string,
+such that -1 is the last character, -2 the second last and so forth.
+If the value is so negative that it refers beyond the start of
+the string, then the starting position is deemed to be zero.
+
+If
+.meta start
+is equal to the length of
+.metn string ,
+and thus refers to the position one character past its
+length, then a match occurs at that position if
+.meta regex
+admits such a match.
+
+The
+.code search-regex
+function returns
.code nil
if no match is found, otherwise it returns
a cons, whose
@@ -32180,6 +32209,18 @@ indicates the position of the match, and whose
.code cdr
indicates the length of the match.
+If
+.meta regex
+is capable of matching empty strings, and no other kind of match
+is found within
+.metn string ,
+then search regex reports a zero length match. If
+.meta from-end
+is false, then this match is reported at
+.metn start ,
+otherwise it is reported at the position one character beyond
+the end of the string.
+
The
.code range-regex
function is similar to
@@ -32733,13 +32774,22 @@ argument defaults to the length of
so that the end position coincides with the end of the
string.
-A value in either parameter which is excessively
-negative or positive, such that it indexes before
-the start of the string or exceeds its length
-results in a failed match and consequently
-.code nil
+With one exception, a value in either parameter which is excessively negative
+or positive, such that it indexes before the start of the string or exceeds its
+length results in a failed match and consequently
+.codn nil
+being returned. The exception is that the
+.code rr
+function permits a negative
+.meta position
+value which refers before the start of the string; this is effectively
+treated as zero.
+
+The
+.meta from-end
+argument defaults to
+.codn nil .
-being returned.
The
.code r^$
function tests whether the entire portion of