summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2017-10-25 18:54:40 -0700
committerKaz Kylheku <kaz@kylheku.com>2017-10-25 18:54:40 -0700
commitd01991e9b250ca65d9afbfd7e5efd9ed4f0ef923 (patch)
tree69c443ab1377e4d8a14e5ca97f1eea40a3108a8e
parentb72c9309c8d8f1af320dce616a69412510531b48 (diff)
downloadtxr-d01991e9b250ca65d9afbfd7e5efd9ed4f0ef923.tar.gz
txr-d01991e9b250ca65d9afbfd7e5efd9ed4f0ef923.tar.bz2
txr-d01991e9b250ca65d9afbfd7e5efd9ed4f0ef923.zip
awk: five new range operators.
* share/txr/stdlib/awk.tl (sys;awk-mac-let): Provide the implementation for the local macros --rng, --rng-, rng+, -rng+ and --rng+. * tests/015/awk-rng.tl: New file. * tests/015/awk-rng.expected: New file. * txr.1: Documented.
-rw-r--r--share/txr/stdlib/awk.tl75
-rw-r--r--tests/015/awk-rng.expected25
-rw-r--r--tests/015/awk-rng.tl18
-rw-r--r--txr.1242
4 files changed, 301 insertions, 59 deletions
diff --git a/share/txr/stdlib/awk.tl b/share/txr/stdlib/awk.tl
index 6b2f6ee2..57e7c271 100644
--- a/share/txr/stdlib/awk.tl
+++ b/share/txr/stdlib/awk.tl
@@ -297,13 +297,17 @@
(next-file () '(return-from :awk-file))
(sys:rng (form from-expr to-expr :env e)
(let ((style (car form))
+ (need-mid (member (car form) '(--rng --rng- --rng+)))
+ (need-end (member (car form) '(rng+ -rng+ --rng+)))
(ix (pinc (qref ,awc nranges)))
(rng-temp (gensym))
(from-expr-ex (sys:expand from-expr e))
(to-expr-ex (sys:expand to-expr e))
(flag-old (gensym))
(flag-act (gensym))
- (flag-deact (gensym)))
+ (flag-deact (gensym))
+ (flag-mid (gensym))
+ (from-expr-val (gensym)))
(tree-bind ((from-expr-ex fe-fv fe-ff fe-ev fe-ef)
(to-expr-ex te-fv te-ff te-ev te-ef)
(from-expr-orig to-expr-orig))
@@ -331,21 +335,48 @@
'functions)
(push rng-temp (qref ,awc rng-expr-temps))
(push ^(placelet ((flag (vecref (qref ,',aws-sym rng-vec) ,ix)))
- (let ((,flag-old flag) ,flag-act ,flag-deact)
- (when (or ,flag-old ,from-expr-ex)
- (set ,flag-act t))
- (when (and ,flag-act ,to-expr-ex)
- (set ,flag-act nil)
- (set ,flag-deact t))
+ (let ((,flag-old flag) ,flag-act ,flag-deact
+ ,*(if need-mid ^(,flag-mid (,from-expr-val ,from-expr-ex))))
+ ,*(if need-mid
+ ^((when (and ,flag-old (not ,from-expr-val))
+ (set ,flag-mid t))
+ (cond
+ (,flag-old (set ,flag-act ,flag-old))
+ (,from-expr-val (set ,flag-act t))))
+ ^((cond
+ (,flag-old (set ,flag-act ,flag-old))
+ (,from-expr-ex (set ,flag-act t)))))
+ ,(if need-end
+ ^(caseq ,flag-act
+ ((t) (when ,to-expr-ex
+ (set ,flag-act :end)
+ (set ,flag-deact t)
+ ,*(if need-mid
+ ^((set ,flag-mid nil)))))
+ (:end (cond
+ (,to-expr-ex (set ,flag-deact t))
+ (,(if need-mid from-expr-val from-expr-ex)
+ (set ,flag-act t ,flag-old nil))
+ (t (set ,flag-act nil)))
+ ,*(if need-mid
+ ^((set ,flag-mid nil)))))
+ ^(when (and ,flag-act ,to-expr-ex)
+ (set ,flag-act nil)
+ (set ,flag-deact t)
+ ,*(if need-mid
+ ^((set ,flag-mid nil)))))
,*(caseq style
- (rng ^((or (set flag ,flag-act)
- ,(if (and (plusp sys:compat)
+ ((rng rng+) ^((or (set flag ,flag-act)
+ ,(if (and (plusp sys:compat)
(<= sys:compat 177))
- flag-old
- flag-deact))))
+ flag-old
+ flag-deact))))
(-rng- ^((and (set flag ,flag-act) ,flag-old)))
(rng- ^((set flag ,flag-act)))
- (-rng ^((set flag ,flag-act) ,flag-old)))))
+ (-rng ^((set flag ,flag-act) ,flag-old))
+ (-rng+ ^((set flag ,flag-act) (if ,flag-act ,flag-old)))
+ (--rng- ^((set flag ,flag-act) ,flag-mid))
+ ((--rng --rng+) ^((set flag ,flag-act) (or ,flag-mid ,flag-deact))))))
(qref ,awc rng-exprs))
rng-temp)))
(rng (:form form from-expr to-expr)
@@ -364,6 +395,26 @@
^(sys:rng ,form
(sys:awk-test ,from-expr ,(qref ,awc rng-rec-temp))
(sys:awk-test ,to-expr ,(qref ,awc rng-rec-temp))))
+ (--rng (:form form from-expr to-expr)
+ ^(sys:rng ,form
+ (sys:awk-test ,from-expr ,(qref ,awc rng-rec-temp))
+ (sys:awk-test ,to-expr ,(qref ,awc rng-rec-temp))))
+ (--rng- (:form form from-expr to-expr)
+ ^(sys:rng ,form
+ (sys:awk-test ,from-expr ,(qref ,awc rng-rec-temp))
+ (sys:awk-test ,to-expr ,(qref ,awc rng-rec-temp))))
+ (rng+ (:form form from-expr to-expr)
+ ^(sys:rng ,form
+ (sys:awk-test ,from-expr ,(qref ,awc rng-rec-temp))
+ (sys:awk-test ,to-expr ,(qref ,awc rng-rec-temp))))
+ (-rng+ (:form form from-expr to-expr)
+ ^(sys:rng ,form
+ (sys:awk-test ,from-expr ,(qref ,awc rng-rec-temp))
+ (sys:awk-test ,to-expr ,(qref ,awc rng-rec-temp))))
+ (--rng+ (:form form from-expr to-expr)
+ ^(sys:rng ,form
+ (sys:awk-test ,from-expr ,(qref ,awc rng-rec-temp))
+ (sys:awk-test ,to-expr ,(qref ,awc rng-rec-temp))))
(ff (. opip-args)
^(symacrolet ((f (rslot ,',aws-sym 'fields 'f-to-rec)))
(set f [(opip ,*opip-args) f])))
diff --git a/tests/015/awk-rng.expected b/tests/015/awk-rng.expected
new file mode 100644
index 00000000..ad33b30b
--- /dev/null
+++ b/tests/015/awk-rng.expected
@@ -0,0 +1,25 @@
+rec rng -rng rng- -rng- --rng- --rng rng+ -rng+ --rng+ Y1Y2
+X1 t t t
+a t t t t t t
+b t t t t t t
+X1 t t t t t t
+c t t t t t t
+X2 t t t t t t
+1 t t t t t t t t t
+2 t t t t t t t t t
+3 t t t t t t t t t
+Y1 t t t end t t t
+r end end t t
+s end end t t
+Y2 end end t t
+t
+Y2
+X1X2 t t t
+Y1Y2 t t t end t t t
+X1X2 t t t
+Y1 t t t end t t t
+a end end t t
+Y2 end end t t
+X1 t t t
+b t t t t t t
+X2 t t t t t t
diff --git a/tests/015/awk-rng.tl b/tests/015/awk-rng.tl
new file mode 100644
index 00000000..8bf2fa3c
--- /dev/null
+++ b/tests/015/awk-rng.tl
@@ -0,0 +1,18 @@
+(awk (:inputs '#"X1 a b X1 c X2 1 2 3 Y1 r s Y2 t Y2"
+ '#"X1X2 Y1Y2"
+ '#"X1X2 Y1 a Y2 X1 b X2")
+ (:set ofs "\t")
+ (:begin
+ (prn 'rec 'rng '-rng 'rng- '-rng- '--rng- '--rng 'rng+ '-rng+ '--rng+ 'Y1Y2))
+ (t (prn rec
+ (rng (rng #/X1/ #/X2/) (rng #/Y1/ #/Y2/))
+ (-rng (rng #/X1/ #/X2/) (rng #/Y1/ #/Y2/))
+ (rng- (rng #/X1/ #/X2/) (rng #/Y1/ #/Y2/))
+ (-rng- (rng #/X1/ #/X2/) (rng #/Y1/ #/Y2/))
+ (--rng- (rng #/X1/ #/X2/) (rng #/Y1/ #/Y2/))
+ (--rng (rng #/X1/ #/X2/) (rng #/Y1/ #/Y2/))
+ (rng+ (rng #/X1/ #/X2/) (rng #/Y1/ #/Y2/))
+ (-rng+ (rng #/X1/ #/X2/) (rng #/Y1/ #/Y2/))
+ (--rng+ (rng #/X1/ #/X2/) (rng #/Y1/ #/Y2/))
+ (rng #/Y1/ #/Y2/))))
+
diff --git a/txr.1 b/txr.1
index 220259f8..92aa7bb6 100644
--- a/txr.1
+++ b/txr.1
@@ -47037,56 +47037,98 @@ next one. If there is no next input source,
.code awk
terminates.
-.coNP Macros @, rng @, -rng @ rng- and @ -rng-
+.coNP Macros @, rng @, -rng @ rng- @, -rng- @, --rng @, --rng- @, rng+ @ -rng+ and @ --rng+
.synb
.mets (rng < from-condition << to-condition )
.mets (-rng < from-condition << to-condition )
.mets (rng- < from-condition << to-condition )
.mets (-rng- < from-condition << to-condition )
+.mets (--rng < from-condition << to-condition )
+.mets (--rng- < from-condition << to-condition )
+.mets (rng+ < from-condition << to-condition )
+.mets (-rng+ < from-condition << to-condition )
+.mets (--rng+ < from-condition << to-condition )
.syne
.desc
-The awk macro
+The nine awk macros in the
.code rng
-may be used anywhere within an ordinary condition-pattern
+family may be used anywhere within an ordinary condition-pattern
.code awk
clause.
-It provides a Boolean test which is true if the current record lands within
-a range of records, delimited by conditions.
-The range begins when
+
+Each provides a Boolean test which is true if the current record lands within
+a range of records delimited by conditions. Each provides its own
+distinct, useful nuance, which is identified by the mnemonic characters
+prefixed or suffixed to the name.
+
+The basic
+.code rng
+macro inclusively matches ranges of records. Each such range begins with a record
+for which
.meta from-condition
-is found to be true, and ends when
+yields true, and ends on the record for which
.meta to-condition
-is true. Over this interval, range is said to be
-.IR active .
-
-The
+is true. What it means to match is that the
.code rng
-macro's result value is true for the starting record which matches
-.metn from-condition ,
-the final record which matches
+expression yields a Boolean true value when it is evaluated in the context
+of processing any of the records which are included in the range.
+
+The table below summarizes the semantic variations of these nine
+range macro operators. The leftmost column represents the file of records
+being processed. The remaining columns indicate, using the character
+.code X
+those rows for each of the nine range operators yield true. Each operator
+is assumed to be invoked with the arguments
+.code #/H/
+and
+.code #/T/
+as its
+.meta from-condition
+and
.metn to-condition ,
-and all records in between, if any.
-The remaining macros work exactly like
-.codn rng ,
-identifying the active range in the same manner. They differ
-in that the Boolean value they produce excludes the starting record,
-final record, or both, depending on which macro. The
-.code -rng
-macro yields
-.code nil
-for the initial record. The
-.code rng-
-macro yields
-.code nil
-for the final record. The
-.code -rng-
-macro yields
-.code nil
-for both the initial and final record.
+respectively: for example,
+.code "(rng #/H/ #/T/)"
+in the case of
+.codn rng :
-Ranges expressed using
+.cblk
+ DATA rng -rng rng- -rng- --rng --rng- rng+ -rng+ --rng+
+ ----------------------------------------------------------
+ PROLOG
+ H1 X X X
+ H2 X X X X X X
+ H3 X X X X X X
+ B1 X X X X X X X X X
+ B2 X X X X X X X X
+ T1 X X X X X X
+ T2 X X X
+ T3 X X X
+ EPILOG
+.cble
+
+The prefix or suffix characters are mnemonic. A single
+.code -
+(dash) indicates the exclusion of one record. A double
+.code --
+(dash dash)
+indicates the exclusion of all leading records which match
+.metn from-condition ;
+this appears on the left side only.
+The
+.code +
+character, appearing on the right only, indicates that
+all consecutive records which match
+.meta to-condition
+are included in the range, not only the first one.
+
+Ranges are oblivious to the division between successive sources of input; a
+range can start in one file of records and terminate in another.
+To prevent a range from spanning input transitions, additional complexity
+is required in the expression.
+
+Ranges expressed using the
.code rng
-may combine with other expressions, including
+family macros may combine with other expressions, including
other ranges, and allow arbitrary nesting: the
.meta from-condition
or
@@ -47167,19 +47209,125 @@ or
.codn kfs .
It is not recommended to place any side effects into range expressions.
-Evaluation of ranges obeys the following logic, which is applied to
-each range, prior to the processing of condition-action clauses.
-If a range is not currently active, its
-.meta from-condition
-is evaluated. If it yields true, the range is activated.
-If a range is currently active (either already so, from a previous
-record-processing pass, or because it was just activated by
-.metn from-condition )
-then the
-.meta to-condition
-is evaluated. If it is true, then the range stays active for
-the current record, but is deactivated when the processing of
-the record completes.
+A more detailed description of the range operators follows.
+.RS
+.meIP (rng < from << to )
+This type of range becomes active when a record is encountered for which the
+.meta from
+expression yields true. While the range is active, the expression evaluates
+true. If, when the range is active, a record is encountered for which the
+.meta to
+expression yields true, the range remains active for that record and is
+deactivated after the completion of processing for that record. If
+the range is inactive and a record is encountered or which both
+.meta from
+and
+.meta to
+are true, then the range is activated for that record and then deactivated
+when that record is processed.
+Records for which
+.meta from
+and
+.meta to
+are not true do not affect the range's activation state.
+.meIP (-rng < from << to )
+This type of range is active under the same conditions as the
+.code rng
+type. However, the expression yields a Boolean false value for the
+first record which begins a range. That is to say, when the range is
+inactive, and a record is scanned for which
+.meta from
+is true, the range activates, but the range expression yields
+.codn nil .
+This is true regardless of whether the
+.meta to
+expression yields true for that record. If there are additional records
+in the range, the expression yields a true value for those records.
+.meIP (rng- < from << to )
+This type of range is active under the same conditions as the
+.code rng
+type. However, the range expression yields
+.code nil
+for the record for which
+.code to
+yields true which terminates the range. This occurs even if that is
+the same record which activated the range by triggering the
+.meta from
+condition. Note that if a range terminates abruptly due to no more records
+being available, the range expression still yields true for the last record.
+.meIP (-rng- < from << to )
+This type of range is active under the same conditions as the
+.code rng
+type. However, the range expression yields
+.code nil
+for the first record which activates the range, and for the last
+record which deactivates the range by activating the
+.code to
+condition. If the range is active over fewer than three records, then
+the expression never yields true for that range. If the range terminates
+abruptly due to no more records being available, and if the last record
+processed isn't the one which activated the range due to triggering the
+.code from
+condition, the expression yields true for that record.
+.meIP (--rng < from << to )
+This type of range is active under the same conditions as
+.codn rng .
+However, the range expression yields
+.code nil
+for the entire leading sequence of consecutive records for which
+.meta from
+is true. Regardless of this, it yields true for the last record
+for which
+.meta to
+is true.
+.meIP (--rng- < from << to )
+This type of range is active under the same conditions as
+However, the range expression yields
+.code nil
+for the entire leading sequence of consecutive records for which
+.meta from
+is true, and also yields nil for the last record which trigger the
+.meta to
+condition.
+.meIP (rng+ < from << to )
+This range is active under different conditions compared to
+.codn rng .
+Though it becomes active in the same way, when the
+.meta from
+expression yields true, the deactivation logic is different.
+The range is deactivated when a record for which
+.meta to
+is true is followed by a record for which
+.meta to
+is not true. That record is excluded from the range; if the
+.meta from
+expression happens to be true for that record, a new range begins
+at that record. Thus, effectively, the range is terminated not
+by single record which triggers
+.meta to
+but by a sequence of one or more such consecutive records.
+.meIP (-rng+ < from << to )
+This range is active under the same conditions as
+.codn rng+ .
+However, the range expression yields
+.code nil
+for the first record in the range. If the range contains only one record, then
+it returns
+.code nil
+for that record.
+.meIP (--rng+ < from << to )
+This range is active under the same conditions as
+.codn rng+ .
+However, the range expression yields
+.code nil
+for the entire leading sequence of consecutive records for which
+.meta from
+is true, and for which
+.meta to
+is false. For the terminating records for which
+.meta to
+is true, it yields true.
+.RE
.coNP Macro @ ff
.synb