summaryrefslogtreecommitdiffstats
path: root/stdlib
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2025-01-28 20:25:37 -0800
committerKaz Kylheku <kaz@kylheku.com>2025-01-28 20:25:37 -0800
commitbb71dcaa3ce19ec48d8b352566a21cb1f5f26c2f (patch)
treed5c9c53cac417f0607929a3204fb8375c4f39a3b /stdlib
parent66f3e6b72dad5a2641bf42fc3c1b32f3c5dfaadd (diff)
downloadtxr-bb71dcaa3ce19ec48d8b352566a21cb1f5f26c2f.tar.gz
txr-bb71dcaa3ce19ec48d8b352566a21cb1f5f26c2f.tar.bz2
txr-bb71dcaa3ce19ec48d8b352566a21cb1f5f26c2f.zip
awk: use prepared lambdas for field separation.
Handle field separations with lambdas, similarly to record separation. The idea is that we replace the rec-to-f method, which contains a cond statement checking the variables for which field separation discipline applies, with a lambda which is updated whenever any of those ariables change. * awk.tl (awk-state): New instance slot, rec-to-f. (awk-state :postinit): Call new upd-rec-to-f method so that rec-to-f is populated with the default field separating lambda. (awk-state rec-to-f): Method removed. (awk-state upd-rec-to-f): New method, based on rec-to-f. This doesn't perform the field separation, but returns a lambda which will perform it. (awk-state loop): We must call upd-rec-to-f whenever we change par-mode, because it influences field separation. (awk-mac-let): Replace the symbol macros fs, ft, fw and kfs with new implementations that use the reactive slot mechanism provided by rslot. Whenever the awk macro assigns any of these, the upd-rec-to-f method will be called. * tests/015/awk-basic.tl: New file. These basic tests of field separation pass before and after this change. * tests/common.tl (otest, motest): New macros.
Diffstat (limited to 'stdlib')
-rw-r--r--stdlib/awk.tl143
1 files changed, 79 insertions, 64 deletions
diff --git a/stdlib/awk.tl b/stdlib/awk.tl
index be1c4758..50121736 100644
--- a/stdlib/awk.tl
+++ b/stdlib/awk.tl
@@ -42,11 +42,13 @@
rec orig-rec fields nf
rng-vec (rng-n 0)
par-mode par-mode-fs par-mode-prev-fs
+ rec-to-f
(streams (hash :equal-based))
(:fini (self)
(dohash (#:k v self.streams)
(close-stream v)))
(:postinit (self)
+ self.(upd-rec-to-f)
(set self.inputs (or self.inputs (zap *args*) (list *stdin*)))
(if (plusp self.rng-n)
(set self.rng-vec (vector self.rng-n)))
@@ -68,52 +70,62 @@
rng-exprs
outer-env)
-(defmeth sys:awk-state rec-to-f (self)
- (cond
- (self.fw
- (unless (eq self.fw-prev self.fw)
- (let ((ranges (reduce-left
- (tb ((list . sum) item)
- (let ((ns (+ sum item)))
- ^((,*list #R(,sum ,ns)) . ,ns)))
- self.fw '(nil . 0))))
- (set self.fw-prev self.fw
- self.fw-ranges (car ranges))))
- (let ((i 0) end
- (l (length self.rec)))
- (set self.fields
- (build (each ((r self.fw-ranges))
- (set end (to r))
- (if (>= (from r) l)
- (return nil))
- (add [self.rec r])
- (inc i))
- (if (< end l)
- (add [self.rec end..:])))
- self.nf i)))
- (self.fs
- (when self.ft
- (awk-error "both fs and ft set"))
- (if (and (not self.kfs) (equal self.rec ""))
- (set self.fields nil
- self.nf 0)
- (let ((eff-fs (if self.par-mode
- (if (equal self.fs self.par-mode-prev-fs)
- self.par-mode-fs
- (set self.par-mode-prev-fs self.fs
- self.par-mode-fs
- (regex-compile ^(or ,(if (regexp self.fs)
- (regex-source self.fs)
- self.fs)
- "\n"))))
- self.fs)))
- (set self.fields (split-str self.rec eff-fs self.kfs)
- self.nf (length self.fields)))))
- (self.ft
- (set self.fields (tok-str self.rec self.ft self.kfs)
- self.nf (length self.fields)))
- ((set self.fields (tok-str self.rec #/[^ \t\n]+/ self.kfs)
- self.nf (length self.fields)))))
+(defmeth sys:awk-state upd-rec-to-f (self)
+ (set self.rec-to-f
+ (cond
+ (self.fw
+ (unless (eq self.fw-prev self.fw)
+ (let ((ranges (reduce-left
+ (tb ((list . sum) item)
+ (let ((ns (+ sum item)))
+ ^((,*list #R(,sum ,ns)) . ,ns)))
+ self.fw '(nil . 0))))
+ (set self.fw-prev self.fw
+ self.fw-ranges (car ranges))))
+ (lambda (self)
+ (let ((i 0) end
+ (l (length self.rec)))
+ (set self.fields
+ (build (each ((r self.fw-ranges))
+ (set end (to r))
+ (if (>= (from r) l)
+ (return nil))
+ (add [self.rec r])
+ (inc i))
+ (if (< end l)
+ (add [self.rec end..:])))
+ self.nf i))))
+ (self.fs
+ (when self.ft
+ (awk-error "both fs and ft set"))
+ (let ((eff-fs (if self.par-mode
+ (if (equal self.fs self.par-mode-prev-fs)
+ self.par-mode-fs
+ (set self.par-mode-prev-fs self.fs
+ self.par-mode-fs
+ (regex-compile ^(or ,(if (regexp self.fs)
+ (regex-source self.fs)
+ self.fs)
+ "\n"))))
+ self.fs)))
+ (if self.kfs
+ (lambda (self)
+ (set self.fields (split-str self.rec eff-fs t)
+ self.nf (length self.fields)))
+ (lambda (self)
+ (if (equal self.rec "")
+ (set self.fields nil
+ self.nf 0)
+ (set self.fields (split-str self.rec eff-fs nil)
+ self.nf (length self.fields)))))))
+ (self.ft
+ (lambda (self)
+ (set self.fields (tok-str self.rec self.ft self.kfs)
+ self.nf (length self.fields))))
+ (t
+ (lambda (self)
+ (set self.fields (tok-str self.rec #/[^ \t\n]+/ self.kfs)
+ self.nf (length self.fields)))))))
(defmeth sys:awk-state f-to-rec (self)
(set self.rec `@{self.fields self.ofs}`))
@@ -147,23 +159,26 @@
(set cached-rr
(cond
((and (equal aws.rs "\n") (not aws.krs))
- (set aws.par-mode nil)
- (lambda () (get-line *stdin*)))
+ (set aws.par-mode nil)
+ aws.(upd-rec-to-f)
+ (lambda () (get-line *stdin*)))
((null aws.rs)
- (set aws.par-mode t)
- (let ((rin (record-adapter #/\n[ \n\t]*\n/))
- (flag t))
- (lambda ()
- (let ((r (get-line rin)))
- (cond
- (flag
- (set flag nil)
- (if (equal r "")
- (get-line rin)
- r))
- (t r))))))
+ (set aws.par-mode t)
+ aws.(upd-rec-to-f)
+ (let ((rin (record-adapter #/\n[ \n\t]*\n/))
+ (flag t))
+ (lambda ()
+ (let ((r (get-line rin)))
+ (cond
+ (flag
+ (set flag nil)
+ (if (equal r "")
+ (get-line rin)
+ r))
+ (t r))))))
(t
(set aws.par-mode nil)
+ aws.(upd-rec-to-f)
(let ((rin (record-adapter
(if (regexp aws.rs) aws.rs
(regex-compile ^(compound, aws.rs)))
@@ -377,10 +392,10 @@
(fname (qref ,aws-sym file-name))
(rs (qref ,aws-sym rs))
(krs (qref ,aws-sym krs))
- (fs (qref ,aws-sym fs))
- (ft (qref ,aws-sym ft))
- (fw (qref ,aws-sym fw))
- (kfs (qref ,aws-sym kfs))
+ (fs (usr:rslot ,aws-sym 'fs 'upd-rec-to-f))
+ (ft (usr:rslot ,aws-sym 'ft 'upd-rec-to-f))
+ (fw (usr:rslot ,aws-sym 'fw 'upd-rec-to-f))
+ (kfs (usr:rslot ,aws-sym 'kfs 'upd-rec-to-f))
(ofs (qref ,aws-sym ofs))
(ors (qref ,aws-sym ors)))
(macrolet ((next () '(return-from :awk-rec))