diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2021-06-27 20:35:41 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2021-06-27 20:35:41 -0700 |
commit | 76ab4a2923919f837817e63f86dca9cd6d4ed82c (patch) | |
tree | b9728b0b78d54737cf535ec35f6809f686c5f30f | |
parent | 5d2ef0c1daf3d44db1acea0d201712a7b45875ea (diff) | |
download | txr-76ab4a2923919f837817e63f86dca9cd6d4ed82c.tar.gz txr-76ab4a2923919f837817e63f86dca9cd6d4ed82c.tar.bz2 txr-76ab4a2923919f837817e63f86dca9cd6d4ed82c.zip |
regex: exposing optimization pass a regex-optimize
* regex.c (regex_optimize): New static function, capturing the
three optimization passes.
(regex_compile): Code moved into regex_optimize.
(regex_init): Remove sys:reg-optimize function. Register
regex-optimize.
* txr.1: Documented.
* stdlib/doc-syms.tl: Updated.
-rw-r--r-- | regex.c | 9 | ||||
-rw-r--r-- | stdlib/doc-syms.tl | 1 | ||||
-rw-r--r-- | txr.1 | 45 |
3 files changed, 53 insertions, 2 deletions
@@ -2209,6 +2209,11 @@ static val regex_requires_dv(val exp) } } +static val regex_optimize(val regex_sexp) +{ + return reg_optimize(reg_expand_nongreedy(reg_nary_to_bin(regex_sexp))); +} + val regex_compile(val regex_sexp, val error_stream) { val regex_source = regex_sexp; @@ -2218,7 +2223,7 @@ val regex_compile(val regex_sexp, val error_stream) return if2(regex_sexp, regex_compile(regex_sexp, error_stream)); } - regex_sexp = reg_optimize(reg_expand_nongreedy(reg_nary_to_bin(regex_sexp))); + regex_sexp = regex_optimize(regex_sexp); if (opt_derivative_regex || regex_requires_dv(regex_sexp)) { regex_t *regex = coerce(regex_t *, chk_malloc(sizeof *regex)); @@ -3358,7 +3363,7 @@ void regex_init(void) reg_fun(intern(lit("reg-expand-nongreedy"), system_package), func_n1(reg_expand_nongreedy)); - reg_fun(intern(lit("reg-optimize"), system_package), func_n1(reg_optimize)); + reg_fun(intern(lit("regex-optimize"), user_package), func_n1(regex_optimize)); reg_fun(intern(lit("read-until-match"), user_package), func_n3o(read_until_match, 1)); reg_fun(intern(lit("scan-until-match"), user_package), func_n2(scan_until_match)); reg_fun(intern(lit("count-until-match"), user_package), func_n2(count_until_match)); diff --git a/stdlib/doc-syms.tl b/stdlib/doc-syms.tl index 5bf473ee..cce921a1 100644 --- a/stdlib/doc-syms.tl +++ b/stdlib/doc-syms.tl @@ -1497,6 +1497,7 @@ ("refset" "N-01A419FB") ("regex-compile" "N-0168C611") ("regex-from-trie" "N-00E48912") + ("regex-optimize" "N-008430D8") ("regex-parse" "N-01C9C361") ("regex-prefix-match" "N-02CE60DF") ("regex-source" "N-0218BD2B") @@ -50137,6 +50137,51 @@ The double backslash in the string literal produces a single backslash in the resulting string object that is processed by .codn regex-parse . +.coNP Function @ regex-optimize +.synb +.mets (regex-optimize << regex-tree-syntax ) +.syne +.desc +The +.code regex-compile +function accepts the source code of a regular expression, +expressed as a Lisp data structure representing an abstract syntax tree, +and calculates an equivalent structure in which certain simplifications +have been performed, or in some cases substitutions which eliminate the +dependence on derivative-based processing. + +The +.meta regex-tree-syntax +is assumed to be correct, as if it were produced by the +.code regex-parse +or +.code regex-from-trie +functions. Incorrect syntax produces unspecified results; an exception may be +thrown, or some object may appear to be successfully returned. + +Note: it is unnecessary to call this function to prepare the input for +.code regex-compile +because that function optimizes internally. However, the source code attached +to a compiled regular expression object is the original unoptimized syntax +tree, and that is used for rendering the +.code #/.../ +notation when the object is printed. If the syntax is passed through +.code regex-optimize +before +.codn regex-compile , +the resulting object will have the optimized code attached to it, and +subsequently render that way in printed form. + +.TP* Examples: + +.verb + ;; a|b|c -> [abc] + (regex-optimize '(or #\ea (or #\eb #\ec))) -> (set #\ea #\eb #\ec) + + ;; (a|) -> a? + (regex-optimize '(or #\ea nil)) -> (? #\ea) +.brev + .coNP Function @ read-until-match .synb .mets (read-until-match < regex >> [ stream <> [ include-match ]]) |