summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2021-06-27 20:35:41 -0700
committerKaz Kylheku <kaz@kylheku.com>2021-06-27 20:35:41 -0700
commit76ab4a2923919f837817e63f86dca9cd6d4ed82c (patch)
treeb9728b0b78d54737cf535ec35f6809f686c5f30f
parent5d2ef0c1daf3d44db1acea0d201712a7b45875ea (diff)
downloadtxr-76ab4a2923919f837817e63f86dca9cd6d4ed82c.tar.gz
txr-76ab4a2923919f837817e63f86dca9cd6d4ed82c.tar.bz2
txr-76ab4a2923919f837817e63f86dca9cd6d4ed82c.zip
regex: exposing optimization pass a regex-optimize
* regex.c (regex_optimize): New static function, capturing the three optimization passes. (regex_compile): Code moved into regex_optimize. (regex_init): Remove sys:reg-optimize function. Register regex-optimize. * txr.1: Documented. * stdlib/doc-syms.tl: Updated.
-rw-r--r--regex.c9
-rw-r--r--stdlib/doc-syms.tl1
-rw-r--r--txr.145
3 files changed, 53 insertions, 2 deletions
diff --git a/regex.c b/regex.c
index 41c912a8..0033c2b9 100644
--- a/regex.c
+++ b/regex.c
@@ -2209,6 +2209,11 @@ static val regex_requires_dv(val exp)
}
}
+static val regex_optimize(val regex_sexp)
+{
+ return reg_optimize(reg_expand_nongreedy(reg_nary_to_bin(regex_sexp)));
+}
+
val regex_compile(val regex_sexp, val error_stream)
{
val regex_source = regex_sexp;
@@ -2218,7 +2223,7 @@ val regex_compile(val regex_sexp, val error_stream)
return if2(regex_sexp, regex_compile(regex_sexp, error_stream));
}
- regex_sexp = reg_optimize(reg_expand_nongreedy(reg_nary_to_bin(regex_sexp)));
+ regex_sexp = regex_optimize(regex_sexp);
if (opt_derivative_regex || regex_requires_dv(regex_sexp)) {
regex_t *regex = coerce(regex_t *, chk_malloc(sizeof *regex));
@@ -3358,7 +3363,7 @@ void regex_init(void)
reg_fun(intern(lit("reg-expand-nongreedy"), system_package),
func_n1(reg_expand_nongreedy));
- reg_fun(intern(lit("reg-optimize"), system_package), func_n1(reg_optimize));
+ reg_fun(intern(lit("regex-optimize"), user_package), func_n1(regex_optimize));
reg_fun(intern(lit("read-until-match"), user_package), func_n3o(read_until_match, 1));
reg_fun(intern(lit("scan-until-match"), user_package), func_n2(scan_until_match));
reg_fun(intern(lit("count-until-match"), user_package), func_n2(count_until_match));
diff --git a/stdlib/doc-syms.tl b/stdlib/doc-syms.tl
index 5bf473ee..cce921a1 100644
--- a/stdlib/doc-syms.tl
+++ b/stdlib/doc-syms.tl
@@ -1497,6 +1497,7 @@
("refset" "N-01A419FB")
("regex-compile" "N-0168C611")
("regex-from-trie" "N-00E48912")
+ ("regex-optimize" "N-008430D8")
("regex-parse" "N-01C9C361")
("regex-prefix-match" "N-02CE60DF")
("regex-source" "N-0218BD2B")
diff --git a/txr.1 b/txr.1
index 643b0e9d..7b6d2693 100644
--- a/txr.1
+++ b/txr.1
@@ -50137,6 +50137,51 @@ The double backslash in the string literal produces a single backslash
in the resulting string object that is processed by
.codn regex-parse .
+.coNP Function @ regex-optimize
+.synb
+.mets (regex-optimize << regex-tree-syntax )
+.syne
+.desc
+The
+.code regex-compile
+function accepts the source code of a regular expression,
+expressed as a Lisp data structure representing an abstract syntax tree,
+and calculates an equivalent structure in which certain simplifications
+have been performed, or in some cases substitutions which eliminate the
+dependence on derivative-based processing.
+
+The
+.meta regex-tree-syntax
+is assumed to be correct, as if it were produced by the
+.code regex-parse
+or
+.code regex-from-trie
+functions. Incorrect syntax produces unspecified results; an exception may be
+thrown, or some object may appear to be successfully returned.
+
+Note: it is unnecessary to call this function to prepare the input for
+.code regex-compile
+because that function optimizes internally. However, the source code attached
+to a compiled regular expression object is the original unoptimized syntax
+tree, and that is used for rendering the
+.code #/.../
+notation when the object is printed. If the syntax is passed through
+.code regex-optimize
+before
+.codn regex-compile ,
+the resulting object will have the optimized code attached to it, and
+subsequently render that way in printed form.
+
+.TP* Examples:
+
+.verb
+ ;; a|b|c -> [abc]
+ (regex-optimize '(or #\ea (or #\eb #\ec))) -> (set #\ea #\eb #\ec)
+
+ ;; (a|) -> a?
+ (regex-optimize '(or #\ea nil)) -> (? #\ea)
+.brev
+
.coNP Function @ read-until-match
.synb
.mets (read-until-match < regex >> [ stream <> [ include-match ]])