summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2017-10-29 19:58:52 -0700
committerKaz Kylheku <kaz@kylheku.com>2017-10-29 19:58:52 -0700
commit78a77b00d40befdacd4f5044f504ed132755609e (patch)
tree1f8b89a8f005d299ddd6d850d9b43f0a0ba16e6f
parentd7a93957e27bbfe6eaebc25b9d539f82dd9e4df3 (diff)
downloadtxr-78a77b00d40befdacd4f5044f504ed132755609e.tar.gz
txr-78a77b00d40befdacd4f5044f504ed132755609e.tar.bz2
txr-78a77b00d40befdacd4f5044f504ed132755609e.zip
awk: implement ranges right using functions.
* share/txr/stdlib/awk.tl (sys:awk%--rng, sys:awk%--rng-, sys:awk%rng+, sys:awk%-rng+, sys:awk%--rng+): New functions. (sys:awk-mac-let): Rewritten range expander. The four basic ranges rng, rng-, -rng and -rng- are handled with in-line expansion, because by doing that we avoid unnecessarily evaluating the from-expression. The remaining cases expand to function calls to the new functions, which receive the flag vector, the index position in that vector and the values of the from and to expressions. The behavior change is that that the -- forms now do the right thing: they hide all leading records that satisfy the from-expression, right to the last record of the range if necessary. * tests/015/awk-rng.expected: Updated. * txr.1: Revise semantic description the -- range types, plus minor fixes.
-rw-r--r--share/txr/stdlib/awk.tl164
-rw-r--r--tests/015/awk-rng.expected26
-rw-r--r--txr.117
3 files changed, 133 insertions, 74 deletions
diff --git a/share/txr/stdlib/awk.tl b/share/txr/stdlib/awk.tl
index c31d8d0f..2d8c4bc6 100644
--- a/share/txr/stdlib/awk.tl
+++ b/share/txr/stdlib/awk.tl
@@ -208,6 +208,80 @@
((regex fun) (call val rec))
(t val)))
+(defun sys:awk%--rng (rng-vec idx from-val to-val)
+ (placelet ((state (vecref rng-vec idx)))
+ (caseq state
+ (nil (cond
+ ((and from-val to-val) nil)
+ (from-val (set state :mid) nil)))
+ (:mid (cond
+ (to-val (set state nil) (not from-val))
+ (from-val nil)
+ (t (set state t))))
+ (t (cond
+ (to-val (set (vecref rng-vec idx) nil) t)
+ (t t))))))
+
+(defun sys:awk%--rng- (rng-vec idx from-val to-val)
+ (placelet ((state (vecref rng-vec idx)))
+ (caseq state
+ (nil (cond
+ ((and from-val to-val) nil)
+ (from-val (set state :mid) nil)))
+ (:mid (cond
+ (to-val (set state nil))
+ (from-val nil)
+ (t (set state t))))
+ (t (cond
+ (to-val (set (vecref rng-vec idx) nil))
+ (t t))))))
+
+(defun sys:awk%rng+ (rng-vec idx from-val to-val)
+ (placelet ((state (vecref rng-vec idx)))
+ (caseq state
+ (nil (cond
+ ((and from-val to-val) (set state :end) t)
+ (from-val (set state t))))
+ (:end (cond
+ (to-val t)
+ (from-val (set state t))
+ (t (set state nil) nil)))
+ (t (cond
+ (to-val (set state :end) t)
+ (t t))))))
+
+(defun sys:awk%-rng+ (rng-vec idx from-val to-val)
+ (placelet ((state (vecref rng-vec idx)))
+ (caseq state
+ (nil (cond
+ ((and from-val to-val) (set state :end) nil)
+ (from-val (set state t) nil)))
+ (:end (cond
+ (to-val t)
+ (from-val (set state t) nil)
+ (t (set state nil) nil)))
+ (t (cond
+ (to-val (set state :end) t)
+ (t t))))))
+
+(defun sys:awk%--rng+ (rng-vec idx from-val to-val)
+ (placelet ((state (vecref rng-vec idx)))
+ (caseq state
+ (nil (cond
+ ((and from-val to-val) (set state :mid) nil)
+ (from-val (set state :mid) nil)))
+ (:mid (cond
+ (to-val (set state :end) (not from-val))
+ (from-val nil)
+ (t (set state t))))
+ (:end (cond
+ (to-val t)
+ (from-val (set state t) nil)
+ (t (set state nil) nil)))
+ (t (cond
+ (to-val (set state :end) t)
+ (t t))))))
+
(defmacro sys:awk-redir (aws-sym stream-var kind mode path body)
(with-gensyms (res-sym)
^(let ((,stream-var (qref ,aws-sym (ensure-stream ,kind ,path, mode))))
@@ -297,19 +371,23 @@
(again () '(return-from :awk-rec :awk-again))
(next-file () '(return-from :awk-file))
(sys:rng (form from-expr to-expr :env e)
- (let ((style (car form))
- (need-mid (member (car form) '(--rng --rng- --rng+)))
- (need-end (member (car form) '(rng+ -rng+ --rng+)))
- (ix (pinc (qref ,awc nranges)))
- (rng-temp (gensym))
- (from-expr-ex (sys:expand from-expr e))
- (to-expr-ex (sys:expand to-expr e))
- (flag (gensym))
- (flag-old (gensym))
- (flag-act (gensym))
- (flag-deact (gensym))
- (flag-mid (gensym))
- (from-expr-val (gensym)))
+ (let* ((style (car form))
+ (ix (pinc (qref ,awc nranges)))
+ (rng-temp (gensym))
+ (from-expr-ex (sys:expand from-expr e))
+ (from-expr-val (gensym))
+ (to-expr-ex (sys:expand to-expr e))
+ (to-expr-val (gensym))
+ (vec-temp (qref ,awc rng-vec-temp))
+ (emul-broken (and (plusp sys:compat) (<= sys:compat 177)))
+ (rng-fun
+ (caseq style
+ (--rng 'sys:awk%--rng)
+ (--rng- 'sys:awk%--rng-)
+ (rng+ 'sys:awk%rng+)
+ (-rng+ 'sys:awk%-rng+)
+ (--rng+ 'sys:awk%--rng+)))
+ (state (gensym)))
(tree-bind ((from-expr-ex fe-fv fe-ff fe-ev fe-ef)
(to-expr-ex te-fv te-ff te-ev te-ef)
(from-expr-orig to-expr-orig))
@@ -336,46 +414,26 @@
(set-diff te-ef te-ff)
'functions)
(push rng-temp (qref ,awc rng-expr-temps))
- (push ^(placelet ((,flag (vecref ,(qref ,awc rng-vec-temp) ,ix)))
- (let ((,flag-old ,flag) ,flag-act ,flag-deact
- ,*(if need-mid ^((,from-expr-val ,from-expr-ex))))
- ,*(if need-mid
- ^((set ,flag-act
- (caseq ,flag-old
- (nil (set ,flag-act (true ,from-expr-val)))
- (:end :end)
- (:mid :mid)
- ((t) (if ,from-expr-val t :mid)))))
- ^((cond
- (,flag-old (set ,flag-act ,flag-old))
- (,from-expr-ex (set ,flag-act t)))))
- ,(if need-end
- ^(caseq ,flag-act
- ((t ,*(if need-mid '(:mid))) (when ,to-expr-ex
- (set ,flag-act :end)
- (set ,flag-deact t)))
- (:end (cond
- (,to-expr-ex (set ,flag-deact t))
- (,(if need-mid from-expr-val from-expr-ex)
- (set ,flag-act t ,flag-old nil))
- (t (set ,flag-act nil)))))
- ^(when (and ,flag-act ,to-expr-ex)
- (set ,flag-act nil)
- (set ,flag-deact t)))
- ,*(caseq style
- ((rng rng+) ^((or (set ,flag ,flag-act)
- ,(if (and (plusp sys:compat)
- (<= sys:compat 177))
- flag-old
- flag-deact))))
- (-rng- ^((and (set ,flag ,flag-act) ,flag-old)))
- (rng- ^((set ,flag ,flag-act)))
- (-rng ^((set ,flag ,flag-act) ,flag-old))
- (-rng+ ^((set ,flag ,flag-act) (if ,flag-act ,flag-old)))
- (--rng- ^((set ,flag ,flag-act) (eq ,flag-act :mid)))
- ((--rng --rng+) ^((set ,flag ,flag-act)
- (or (eq ,flag-act :mid) ,flag-deact))))))
- (qref ,awc rng-exprs))
+ (caseq style
+ ((--rng --rng- rng+ -rng+ --rng+)
+ (push
+ ^(,rng-fun ,vec-temp ,ix ,from-expr-ex ,to-expr-ex)
+ (qref ,awc rng-exprs)))
+ (t (push
+ ^(placelet ((,state (vecref ,(qref ,awc rng-vec-temp) ,ix)))
+ (let ((,to-expr-val ,to-expr-ex))
+ (caseq ,state
+ (nil (let ((,from-expr-val ,from-expr-ex))
+ (cond
+ ((and ,from-expr-val ,to-expr-val)
+ ,(if (and (eq style 'rng) (not emul-broken)) t))
+ (,from-expr-val (set ,state t)
+ ,(if (memq style '(rng rng-)) t)))))
+ (t (cond
+ (,to-expr-val (set ,state nil)
+ ,(if (memq style '(rng -rng)) t))
+ (t t))))))
+ (qref ,awc rng-exprs))))
rng-temp)))
(rng (:form form from-expr to-expr)
^(sys:rng ,form
diff --git a/tests/015/awk-rng.expected b/tests/015/awk-rng.expected
index aa79a932..25c4626b 100644
--- a/tests/015/awk-rng.expected
+++ b/tests/015/awk-rng.expected
@@ -8,22 +8,22 @@ X2 t t t t t t
1 t t t t t t t t t
2 t t t t t t t t t
3 t t t t t t t t t
-Y1 t t t end t t t
-r end end t t
-s end end t t
-Y2 end end t t
+Y1 t t t t t t t
+r t t t t
+s t t t t
+Y2 t t t t
t
Y2
X1X2 t t t
-Y1Y2 t t t end t t t
+Y1Y2 t t t t t t t
X1X2 t t t
-Y1 t t t end t t t
-a end end t t
-Y2 end end t t
-X1X2Y1Y2 t t end end t t
+Y1 t t t t t t t
+a t t t t
+Y2 t t t t
+X1X2Y1Y2 t t t t t
a
-X1X2Y1Y2 t t end t t
-X1X2Y1Y2 t t end end t t
+X1X2Y1Y2 t t t
+X1X2Y1Y2 t t t t
X1 t t t
-b t t t t t t
-X2 t t t t t t
+b t t t t t t t
+X2 t t t t t t t
diff --git a/txr.1 b/txr.1
index 53b0836b..6eedd5cc 100644
--- a/txr.1
+++ b/txr.1
@@ -47280,17 +47280,20 @@ However, the range expression yields
.code nil
for the entire leading sequence of consecutive records for which
.meta from
-is true. Regardless of this, it yields true for the last record
-for which
+is true. If
+.meta from
+is true of the
.meta to
-is true.
+record which terminates the range,
+.code nil
+is returned for that record also.
.meIP (--rng- < from << to )
This type of range is active under the same conditions as
However, the range expression yields
.code nil
for the entire leading sequence of consecutive records for which
.meta from
-is true, and also yields nil for the last record which trigger the
+is true, and also yields nil for the last record which triggers the
.meta to
condition.
.meIP (rng+ < from << to )
@@ -47326,11 +47329,9 @@ However, the range expression yields
.code nil
for the entire leading sequence of consecutive records for which
.meta from
-is true, and for which
-.meta to
-is false. For the terminating records for which
+is true. This is the case even for those for which the
.meta to
-is true, it yields true.
+expression is true.
.RE
.coNP Macro @ ff