summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2016-09-23 06:39:56 -0700
committerKaz Kylheku <kaz@kylheku.com>2016-09-23 06:39:56 -0700
commit83f7efdc4127c9807bdc46708ef5036d5fdafc51 (patch)
treed39d6b99503a98909ecb27db2fe5b6c703bcc24d
parent6cb0284cc3fe66c4c20a09a651ba897ed6e2f71e (diff)
downloadtxr-83f7efdc4127c9807bdc46708ef5036d5fdafc51.tar.gz
txr-83f7efdc4127c9807bdc46708ef5036d5fdafc51.tar.bz2
txr-83f7efdc4127c9807bdc46708ef5036d5fdafc51.zip
New regex functions: m^$, m^, m$, and others.
* regex.c (do_match_full, do_match_full_offs, do_match_left, do_match_left_offs, do_match_right, do_match_right_offs): New static functions. (regex_match_full_fun, regex_match_right_fun, regex_match_full, regex_match_left, regex_match_right, regex_range_full, regex_range_left, regex_range_right): New functions. (regex_init): Register f^$, f^, f$, m^$, m^, m$, r^$, r^ and r$ intrinsics. * regex.h (regex_match_full_fun, regex_match_right_fun, regex_match_full, regex_match_left, regex_match_right, regex_range_full, regex_range_left, regex_range_right): Declared. * txr.1: Documented new functions.
-rw-r--r--regex.c130
-rw-r--r--regex.h9
-rw-r--r--txr.1233
3 files changed, 372 insertions, 0 deletions
diff --git a/regex.c b/regex.c
index 090bcbdd..fc31394c 100644
--- a/regex.c
+++ b/regex.c
@@ -2655,6 +2655,127 @@ val match_regst_right(val str, val regex, val end)
sub_str(str, neg(len), t),
sub_str(str, minus(end, len), end)));
}
+static val do_match_full(val regex, val str)
+{
+ return if2(eql(match_regex(str, regex, zero), length_str(str)), str);
+}
+
+static val do_match_full_offs(val env, val str)
+{
+ cons_bind (regex, pos_in, env);
+ val len = length_str(str);
+ val pos = if3(minusp(pos_in), plus(pos_in, len), pos_in);
+ return if2(eql(match_regex(str, regex, pos), len),
+ sub_str(str, pos, t));
+}
+
+val regex_match_full_fun(val regex, val pos)
+{
+ if (null_or_missing_p(pos))
+ return func_f1(regex, do_match_full);
+ return func_f1(cons(regex, pos), do_match_full_offs);
+}
+
+static val do_match_left(val regex, val str)
+{
+ return match_regst(str, regex, zero);
+}
+
+static val do_match_left_offs(val env, val str)
+{
+ cons_bind (regex, pos, env);
+ return match_regst(str, regex, pos);
+}
+
+val regex_match_left_fun(val regex, val pos)
+{
+ if (null_or_missing_p(pos))
+ return func_f1(regex, do_match_left);
+ return func_f1(cons(regex, pos), do_match_left_offs);
+}
+
+static val do_match_right(val regex, val str)
+{
+ return match_regst_right(str, regex, nil);
+}
+
+static val do_match_right_offs(val env, val str)
+{
+ cons_bind (regex, end, env);
+ return match_regst_right(str, regex, end);
+}
+
+val regex_match_right_fun(val regex, val end)
+{
+ if (null_or_missing_p(end))
+ return func_f1(regex, do_match_right);
+ return func_f1(cons(regex, end), do_match_right_offs);
+}
+
+val regex_match_full(val regex, val arg1, val arg2)
+{
+ if (null_or_missing_p(arg2)) {
+ val str = arg1;
+ return if2(eql(match_regex(arg1, regex, arg2), length_str(str)), str);
+ } else {
+ val str = arg2;
+ val len = length_str(str);
+ val pos = if3(minusp(arg1), plus(len, arg1), arg1);
+ return if2(eql(match_regex(str, regex, pos), len), sub_str(str, pos, t));
+ }
+}
+
+val regex_match_left(val regex, val arg1, val arg2)
+{
+ if (null_or_missing_p(arg2))
+ return match_regst(arg1, regex, arg2);
+ return match_regst(arg2, regex, arg1);
+}
+
+val regex_match_right(val regex, val arg1, val arg2)
+{
+ if (null_or_missing_p(arg2))
+ return match_regst_right(arg1, regex, arg2);
+ return match_regst_right(arg2, regex, arg1);
+}
+
+val regex_range_full(val regex, val arg1, val arg2)
+{
+ if (null_or_missing_p(arg2)) {
+ val str = arg1;
+ val len = length_str(str);
+ return if2(eql(match_regex(str, regex, zero), len), rcons(zero, len));
+ } else {
+ val str = arg2;
+ val len = length_str(str);
+ val pos = if3(minusp(arg1), plus(len, arg1), arg1);
+ return if2(eql(match_regex(str, regex, pos), len), rcons(pos, len));
+ }
+}
+
+val regex_range_left(val regex, val arg1, val arg2)
+{
+ if (null_or_missing_p(arg2)) {
+ val len = match_regex(arg1, regex, arg2);
+ return if2(len, rcons(zero, len));
+ } else {
+ val pos = if3(lt(arg1, zero), plus(arg1, length_str(arg2)), arg1);
+ val new_pos = match_regex(arg2, regex, pos);
+ return if2(new_pos, rcons(pos, new_pos));
+ }
+}
+
+val regex_range_right(val regex, val arg1, val arg2)
+{
+ if (null_or_missing_p(arg2)) {
+ val len = match_regex_right(arg1, regex, arg2);
+ return if2(len, rcons(zero, len));
+ } else {
+ val end = if3(lt(arg1, zero), plus(arg1, length_str(arg2)), arg1);
+ val len = match_regex_right(arg2, regex, end);
+ return if2(len, rcons(minus(end, len), end));
+ }
+}
val read_until_match(val regex, val stream_in, val include_match_in)
{
@@ -2832,6 +2953,15 @@ void regex_init(void)
func_n1(reg_expand_nongreedy));
reg_fun(intern(lit("reg-optimize"), system_package), func_n1(reg_optimize));
reg_fun(intern(lit("read-until-match"), user_package), func_n3o(read_until_match, 1));
+ reg_fun(intern(lit("f^$"), user_package), func_n2o(regex_match_full_fun, 1));
+ reg_fun(intern(lit("f^"), user_package), func_n2o(regex_match_left_fun, 1));
+ reg_fun(intern(lit("f$"), user_package), func_n2o(regex_match_right_fun, 1));
+ reg_fun(intern(lit("m^$"), user_package), func_n3o(regex_match_full, 2));
+ reg_fun(intern(lit("m^"), user_package), func_n3o(regex_match_left, 2));
+ reg_fun(intern(lit("m$"), user_package), func_n3o(regex_match_right, 2));
+ reg_fun(intern(lit("r^$"), user_package), func_n3o(regex_range_full, 2));
+ reg_fun(intern(lit("r^"), user_package), func_n3o(regex_range_left, 2));
+ reg_fun(intern(lit("r$"), user_package), func_n3o(regex_range_right, 2));
init_special_char_sets();
}
diff --git a/regex.h b/regex.h
index 80494860..cad2df13 100644
--- a/regex.h
+++ b/regex.h
@@ -41,6 +41,15 @@ val match_regst(val str, val regex, val pos);
val match_regst_right(val str, val regex, val end);
val regsub(val regex, val repl, val str);
val read_until_match(val regex, val stream, val keep_match);
+val regex_match_full(val regex, val arg1, val arg2);
+val regex_match_full_fun(val regex, val pos);
+val regex_match_left_fun(val regex, val pos);
+val regex_match_right_fun(val regex, val end);
+val regex_match_left(val regex, val arg1, val arg2);
+val regex_match_right(val regex, val arg1, val arg2);
+val regex_range_full(val regex, val arg1, val arg2);
+val regex_range_left(val regex, val arg1, val arg2);
+val regex_range_right(val regex, val arg1, val arg2);
int wide_display_char_p(wchar_t ch);
void regex_init(void);
void regex_free_all(void);
diff --git a/txr.1 b/txr.1
index e049b259..3a6bf27a 100644
--- a/txr.1
+++ b/txr.1
@@ -32216,6 +32216,239 @@ removed from the stream. If
is true, that matching text is included in
the returned string. Otherwise, it is discarded.
+.coNP Functions @, m^$ @ m^ and @ m$
+.synb
+.mets (m^$ < regex <> [ position ] << string )
+.mets (m^ < regex <> [ position ] << string )
+.mets (m$ < regex <> [ end-position ] << string )
+.syne
+.desc
+These functions provide functionality similar to the
+.meta match-regst
+and
+.meta match-regst-right
+functions, but under alternative interfaces which are more
+convenient.
+
+The
+.code ^
+and
+.code $
+notation used in their names are an allusion to the
+regular expression search anchoring operators found in
+familiar POSIX utilities such as
+.codn grep .
+
+The
+.meta position
+argument, if omitted,
+defaults to zero, so that the
+entire
+.meta string
+is operated upon.
+
+The
+.meta end-position
+argument defaults to the length of
+.metn string ,
+so that the end position coincides with the end of the
+string.
+
+If the
+.meta position
+or
+.meta end-position
+arguments are negative, they index backwards
+from the length of
+.meta string
+so that -1 denotes the last character.
+
+A value in either parameter which is excessively
+negative or positive, such that it indexes before
+the start of the string or exceeds its length
+results in a failed match and consequently
+.code nil
+being returned.
+
+
+The
+.code m^$
+function tests whether the entire portion of
+.meta string
+starting at
+.meta position
+through to the end of the string is in the set of strings
+matched by
+.metn regex .
+If this is true, then that portion of the string is
+returned. Otherwise
+.code nil
+is returned.
+
+The
+.code m^
+function tests whether the portion of the
+.meta string
+starting at
+.meta position
+has a prefix which matches
+.metn regex .
+If so, then this matching prefix is returned.
+Otherwise
+.code nil
+is returned.
+
+The
+.code m$
+function tests whether the portion of
+.meta string
+ending just before
+.meta end-position
+has a suffix which matches
+.metn regex .
+If so, then this matching suffix is returned.
+Otherwise
+.code nil
+is returned.
+
+.coNP Functions @, r^$ @ r^ and @ r$
+.synb
+.mets (r^$ < regex <> [ position ] << string )
+.mets (r^ < regex <> [ position ] << string )
+.mets (r$ < regex <> [ end-position ] << string )
+.syne
+.desc
+These functions perform the same operations as,
+respectively,
+.codn m^$ ,
+.code m^
+and
+.codn m$ ,
+with the same argument conventions. They differ
+in return value. When a match is found, they
+return a range value indicating the extent of
+the matching substring within
+.meta string
+rather than the matching substring itself.
+
+The
+.meta position
+argument, if omitted,
+defaults to zero, so that the
+entire
+.meta string
+is operated upon.
+
+The
+.meta end-position
+argument defaults to the length of
+.metn string ,
+so that the end position coincides with the end of the
+string.
+
+A value in either parameter which is excessively
+negative or positive, such that it indexes before
+the start of the string or exceeds its length
+results in a failed match and consequently
+.code nil
+
+being returned.
+The
+.code r^$
+function tests whether the entire portion of
+.meta string
+starting at
+.meta position
+through to the end of the string is in the set of strings
+matched by
+.metn regex .
+If this is true, then the matching range is returned,
+as a range object.
+
+The
+.code r^
+function tests whether the portion of the
+.meta string
+starting at
+.meta position
+has a prefix which matches
+.metn regex .
+If so, then the matching range is returned, as a range object.
+Otherwise
+.code nil
+is returned.
+
+The
+.code m$
+function tests whether the portion of
+.meta string
+ending just before
+.meta end-position
+has a suffix which matches
+.metn regex .
+If so, then the matching range is returned.
+Otherwise
+.code nil
+is returned.
+
+.coNP Functions @, f^$ @ f^ and @ f$
+.synb
+.mets (f^$ < regex <> [ position ])
+.mets (f^ < regex <> [ position ])
+.mets (f$ < regex <> [ end-position ])
+.syne
+.desc
+These regular expression functions do not directly
+perform regex operations. Rather, they each return
+a function of one argument which performs a regex
+operation.
+
+The returned functions perform the same operations as,
+respectively,
+.codn m^$ ,
+.code m^
+and
+.codn m$ .
+
+The following equivalences nearly hold, except that the functions
+on the right side produced by
+.code op
+can accept two arguments when only
+.code r
+is curried, whereas the functions on the left take only
+one argument:
+
+.cblk
+ [f^$ r] <--> (op m^$ r)
+ [f^$ r p] <--> (op m^$ r p)
+ [f^ r] <--> (op m^ r)
+ [f^ r p] <--> (op m^ r p)
+ [f$ r] <--> (op m$ r)
+ [f$ r p] <--> (op m$ r p)
+.cble
+
+That is to say,
+.code f^$
+returns a function which binds
+.meta regex
+and possibly the optional
+.metn position .
+When this function is invoked, it must be given an argument
+which is a string. It performs the same operation as
+.code m^$
+being called on
+.meta regex
+and possibly
+.metn position .
+The same holds between
+.code f^
+and
+.codn m^ ,
+and between
+.code f$
+and
+.codn m$ .
+
.SS* Hashing Library
.coNP Functions @ make-hash and @ hash
.synb