From bb71dcaa3ce19ec48d8b352566a21cb1f5f26c2f Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Tue, 28 Jan 2025 20:25:37 -0800 Subject: awk: use prepared lambdas for field separation. Handle field separations with lambdas, similarly to record separation. The idea is that we replace the rec-to-f method, which contains a cond statement checking the variables for which field separation discipline applies, with a lambda which is updated whenever any of those ariables change. * awk.tl (awk-state): New instance slot, rec-to-f. (awk-state :postinit): Call new upd-rec-to-f method so that rec-to-f is populated with the default field separating lambda. (awk-state rec-to-f): Method removed. (awk-state upd-rec-to-f): New method, based on rec-to-f. This doesn't perform the field separation, but returns a lambda which will perform it. (awk-state loop): We must call upd-rec-to-f whenever we change par-mode, because it influences field separation. (awk-mac-let): Replace the symbol macros fs, ft, fw and kfs with new implementations that use the reactive slot mechanism provided by rslot. Whenever the awk macro assigns any of these, the upd-rec-to-f method will be called. * tests/015/awk-basic.tl: New file. These basic tests of field separation pass before and after this change. * tests/common.tl (otest, motest): New macros. --- stdlib/awk.tl | 143 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 79 insertions(+), 64 deletions(-) (limited to 'stdlib') diff --git a/stdlib/awk.tl b/stdlib/awk.tl index be1c4758..50121736 100644 --- a/stdlib/awk.tl +++ b/stdlib/awk.tl @@ -42,11 +42,13 @@ rec orig-rec fields nf rng-vec (rng-n 0) par-mode par-mode-fs par-mode-prev-fs + rec-to-f (streams (hash :equal-based)) (:fini (self) (dohash (#:k v self.streams) (close-stream v))) (:postinit (self) + self.(upd-rec-to-f) (set self.inputs (or self.inputs (zap *args*) (list *stdin*))) (if (plusp self.rng-n) (set self.rng-vec (vector self.rng-n))) @@ -68,52 +70,62 @@ rng-exprs outer-env) -(defmeth sys:awk-state rec-to-f (self) - (cond - (self.fw - (unless (eq self.fw-prev self.fw) - (let ((ranges (reduce-left - (tb ((list . sum) item) - (let ((ns (+ sum item))) - ^((,*list #R(,sum ,ns)) . ,ns))) - self.fw '(nil . 0)))) - (set self.fw-prev self.fw - self.fw-ranges (car ranges)))) - (let ((i 0) end - (l (length self.rec))) - (set self.fields - (build (each ((r self.fw-ranges)) - (set end (to r)) - (if (>= (from r) l) - (return nil)) - (add [self.rec r]) - (inc i)) - (if (< end l) - (add [self.rec end..:]))) - self.nf i))) - (self.fs - (when self.ft - (awk-error "both fs and ft set")) - (if (and (not self.kfs) (equal self.rec "")) - (set self.fields nil - self.nf 0) - (let ((eff-fs (if self.par-mode - (if (equal self.fs self.par-mode-prev-fs) - self.par-mode-fs - (set self.par-mode-prev-fs self.fs - self.par-mode-fs - (regex-compile ^(or ,(if (regexp self.fs) - (regex-source self.fs) - self.fs) - "\n")))) - self.fs))) - (set self.fields (split-str self.rec eff-fs self.kfs) - self.nf (length self.fields))))) - (self.ft - (set self.fields (tok-str self.rec self.ft self.kfs) - self.nf (length self.fields))) - ((set self.fields (tok-str self.rec #/[^ \t\n]+/ self.kfs) - self.nf (length self.fields))))) +(defmeth sys:awk-state upd-rec-to-f (self) + (set self.rec-to-f + (cond + (self.fw + (unless (eq self.fw-prev self.fw) + (let ((ranges (reduce-left + (tb ((list . sum) item) + (let ((ns (+ sum item))) + ^((,*list #R(,sum ,ns)) . ,ns))) + self.fw '(nil . 0)))) + (set self.fw-prev self.fw + self.fw-ranges (car ranges)))) + (lambda (self) + (let ((i 0) end + (l (length self.rec))) + (set self.fields + (build (each ((r self.fw-ranges)) + (set end (to r)) + (if (>= (from r) l) + (return nil)) + (add [self.rec r]) + (inc i)) + (if (< end l) + (add [self.rec end..:]))) + self.nf i)))) + (self.fs + (when self.ft + (awk-error "both fs and ft set")) + (let ((eff-fs (if self.par-mode + (if (equal self.fs self.par-mode-prev-fs) + self.par-mode-fs + (set self.par-mode-prev-fs self.fs + self.par-mode-fs + (regex-compile ^(or ,(if (regexp self.fs) + (regex-source self.fs) + self.fs) + "\n")))) + self.fs))) + (if self.kfs + (lambda (self) + (set self.fields (split-str self.rec eff-fs t) + self.nf (length self.fields))) + (lambda (self) + (if (equal self.rec "") + (set self.fields nil + self.nf 0) + (set self.fields (split-str self.rec eff-fs nil) + self.nf (length self.fields))))))) + (self.ft + (lambda (self) + (set self.fields (tok-str self.rec self.ft self.kfs) + self.nf (length self.fields)))) + (t + (lambda (self) + (set self.fields (tok-str self.rec #/[^ \t\n]+/ self.kfs) + self.nf (length self.fields))))))) (defmeth sys:awk-state f-to-rec (self) (set self.rec `@{self.fields self.ofs}`)) @@ -147,23 +159,26 @@ (set cached-rr (cond ((and (equal aws.rs "\n") (not aws.krs)) - (set aws.par-mode nil) - (lambda () (get-line *stdin*))) + (set aws.par-mode nil) + aws.(upd-rec-to-f) + (lambda () (get-line *stdin*))) ((null aws.rs) - (set aws.par-mode t) - (let ((rin (record-adapter #/\n[ \n\t]*\n/)) - (flag t)) - (lambda () - (let ((r (get-line rin))) - (cond - (flag - (set flag nil) - (if (equal r "") - (get-line rin) - r)) - (t r)))))) + (set aws.par-mode t) + aws.(upd-rec-to-f) + (let ((rin (record-adapter #/\n[ \n\t]*\n/)) + (flag t)) + (lambda () + (let ((r (get-line rin))) + (cond + (flag + (set flag nil) + (if (equal r "") + (get-line rin) + r)) + (t r)))))) (t (set aws.par-mode nil) + aws.(upd-rec-to-f) (let ((rin (record-adapter (if (regexp aws.rs) aws.rs (regex-compile ^(compound, aws.rs))) @@ -377,10 +392,10 @@ (fname (qref ,aws-sym file-name)) (rs (qref ,aws-sym rs)) (krs (qref ,aws-sym krs)) - (fs (qref ,aws-sym fs)) - (ft (qref ,aws-sym ft)) - (fw (qref ,aws-sym fw)) - (kfs (qref ,aws-sym kfs)) + (fs (usr:rslot ,aws-sym 'fs 'upd-rec-to-f)) + (ft (usr:rslot ,aws-sym 'ft 'upd-rec-to-f)) + (fw (usr:rslot ,aws-sym 'fw 'upd-rec-to-f)) + (kfs (usr:rslot ,aws-sym 'kfs 'upd-rec-to-f)) (ofs (qref ,aws-sym ofs)) (ors (qref ,aws-sym ors))) (macrolet ((next () '(return-from :awk-rec)) -- cgit v1.2.3