;; Copyright 2016-2023 ;; Kaz Kylheku ;; Vancouver, Canada ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without ;; modification, are permitted provided that the following conditions are met: ;; ;; 1. Redistributions of source code must retain the above copyright notice, ;; this list of conditions and the following disclaimer. ;; ;; 2. Redistributions in binary form must reproduce the above copyright notice, ;; this list of conditions and the following disclaimer in the documentation ;; and/or other materials provided with the distribution. ;; ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ;; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE ;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ;; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ;; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ;; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ;; POSSIBILITY OF SUCH DAMAGE. (load "conv") (defstruct sys:awk-state () (rs "\n") krs fs ft kfs fw fw-prev fw-ranges (ofs " ") (ors "\n") (inputs) (output *stdout*) (file-num 0) file-name (file-rec-num 0) (rec-num 0) rec orig-rec fields nf rng-vec (rng-n 0) par-mode par-mode-fs par-mode-prev-fs (streams (hash :equal-based)) (:fini (self) (dohash (#:k v self.streams) (close-stream v))) (:postinit (self) (set self.inputs (or self.inputs (zap *args*) (list *stdin*))) (if (plusp self.rng-n) (set self.rng-vec (vector self.rng-n))) (unless (streamp self.output) (let ((stream (open-file self.output "w"))) (set [self.streams ^(:outf ,self.output)] stream self.output stream))))) (defstruct sys:awk-compile-time () inputs output name lets funs begin-file-actions end-file-actions begin-actions end-actions cond-actions field-name-conv (nranges 0) (rng-rec-temp (gensym)) (rng-vec-temp (gensym)) rng-expr-temps rng-exprs outer-env) (defmeth sys:awk-state rec-to-f (self) (cond (self.fw (unless (eq self.fw-prev self.fw) (let ((ranges (reduce-left (tb ((list . sum) item) (let ((ns (+ sum item))) ^((,*list #R(,sum ,ns)) . ,ns))) self.fw '(nil . 0)))) (set self.fw-prev self.fw self.fw-ranges (car ranges)))) (let ((i 0) end (l (length self.rec))) (set self.fields (build (each ((r self.fw-ranges)) (set end (to r)) (if (>= (from r) l) (return nil)) (add [self.rec r]) (inc i)) (if (< end l) (add [self.rec end..:]))) self.nf i))) (self.fs (when self.ft (awk-error "both fs and ft set")) (if (and (not self.kfs) (equal self.rec "")) (set self.fields nil self.nf 0) (let ((eff-fs (if self.par-mode (if (equal self.fs self.par-mode-prev-fs) self.par-mode-fs (set self.par-mode-prev-fs self.fs self.par-mode-fs (regex-compile ^(or ,(if (regexp self.fs) (regex-source self.fs) self.fs) "\n")))) self.fs))) (set self.fields (split-str self.rec eff-fs self.kfs) self.nf (length self.fields))))) (self.ft (set self.fields (tok-str self.rec self.ft self.kfs) self.nf (length self.fields))) ((set self.fields (tok-str self.rec #/[^ \t\n]+/ self.kfs) self.nf (length self.fields))))) (defmeth sys:awk-state f-to-rec (self) (set self.rec `@{self.fields self.ofs}`)) (defmeth sys:awk-state nf-to-f (self) (set self.fields (take self.nf (append self.fields (repeat '(""))))) self.(f-to-rec)) (defmeth sys:awk-state loop (aws func beg-file-func end-file-func) (whilet ((in (pop aws.inputs))) (block :awk-file (inc aws.file-num) (set aws.file-name (if (streamp in) (stream-get-prop in :name) in)) (when beg-file-func [beg-file-func aws]) (let* ((*stdin* (cond ((streamp in) in) ((listp in) (make-strlist-input-stream in)) ((open-file in)))) (noted-rs (not aws.rs)) (noted-krs (not aws.krs)) (cached-rr nil)) (flet ((get-rec-reader (*stdin*) (cond ((and (equal noted-rs aws.rs) (eq noted-krs aws.krs)) cached-rr) (t (set noted-rs aws.rs noted-krs aws.krs) (set cached-rr (cond ((and (equal aws.rs "\n") (not aws.krs)) (set aws.par-mode nil) (lambda () (get-line *stdin*))) ((null aws.rs) (set aws.par-mode t) (let ((rin (record-adapter #/\n[ \n\t]*\n/)) (flag t)) (lambda () (let ((r (get-line rin))) (cond (flag (set flag nil) (if (equal r "") (get-line rin) r)) (t r)))))) (t (set aws.par-mode nil) (let ((rin (record-adapter (if (regexp aws.rs) aws.rs (regex-compile ^(compound, aws.rs))) *stdin* aws.krs))) (lambda () (get-line rin)))))))))) (set aws.file-rec-num 0) (unwind-protect (whilet ((rr (get-rec-reader *stdin*)) (rec (call rr))) (set aws.rec rec aws.orig-rec rec) (inc aws.rec-num) (inc aws.file-rec-num) (while* (eq :awk-again (block* :awk-rec [func aws])) aws.(rec-to-f))) (when end-file-func [end-file-func aws]))))))) (defmeth sys:awk-state prn (self . args) (cond (args (for ((a args) next) (a) ((set a next)) (put-string `@(car a)`) (put-string (if (set next (cdr a)) self.ofs self.ors)))) (t (put-string self.rec) (put-string self.ors) nil))) (defmeth sys:awk-state ensure-stream (self kind path mode) (hash-update-1 self.streams ^(,kind ,path) (do or @1 (caseq kind ((:inf :outf :apf) (open-file path mode)) ((:inp :outp) (open-command path mode)))) nil)) (defmeth sys:awk-state close-or-flush (self stream kind path val) (cond ((eq val :close) (whenlet ((s (del [self.streams ^(,kind ,path)]))) (close-stream s))) ((memq kind '(:outf outp)) (flush-stream stream) val) (val))) (defun awk-error (msg . args) (throwf 'eval-error `~s: @msg` 'awk . args)) (defun sys:awk-test (val rec) (caseq (typeof val) ((regex fun) (call val rec)) (t val))) (defun sys:awk%--rng (rng-vec idx from-val to-val) (placelet ((state (vecref rng-vec idx))) (caseq state (nil (cond ((and from-val to-val) nil) (from-val (set state :mid) nil))) (:mid (cond (to-val (set state nil) (not from-val)) (from-val nil) (t (set state t)))) (t (cond (to-val (set (vecref rng-vec idx) nil) t) (t t)))))) (defun sys:awk%--rng- (rng-vec idx from-val to-val) (placelet ((state (vecref rng-vec idx))) (caseq state (nil (cond ((and from-val to-val) nil) (from-val (set state :mid) nil))) (:mid (cond (to-val (set state nil)) (from-val nil) (t (set state t)))) (t (cond (to-val (set (vecref rng-vec idx) nil)) (t t)))))) (defun sys:awk%rng+ (rng-vec idx from-val to-val) (placelet ((state (vecref rng-vec idx))) (caseq state (nil (cond ((and from-val to-val) (set state :end) t) (from-val (set state t)))) (:end (cond (to-val t) (from-val (set state t)) (t (set state nil) nil))) (t (cond (to-val (set state :end) t) (t t)))))) (defun sys:awk%-rng+ (rng-vec idx from-val to-val) (placelet ((state (vecref rng-vec idx))) (caseq state (nil (cond ((and from-val to-val) (set state :end) nil) (from-val (set state t) nil))) (:end (cond (to-val t) (from-val (set state t) nil) (t (set state nil) nil))) (t (cond (to-val (set state :end) t) (t t)))))) (defun sys:awk%--rng+ (rng-vec idx from-val to-val) (placelet ((state (vecref rng-vec idx))) (caseq state (nil (cond ((and from-val to-val) (set state :mid) nil) (from-val (set state :mid) nil))) (:mid (cond (to-val (set state :end) (not from-val)) (from-val nil) (t (set state t)))) (:end (cond (to-val t) (from-val (set state t) nil) (t (set state nil) nil))) (t (cond (to-val (set state :end) t) (t t)))))) (defmacro sys:awk-redir (aws-sym stream-var kind mode path body) (with-gensyms (res-sym) ^(let* ((,res-sym ,path) (,stream-var (qref ,aws-sym (ensure-stream ,kind ,res-sym ,mode)))) ,(if body ^(qref ,aws-sym (close-or-flush ,stream-var ,kind ,res-sym (progn ,*body))) stream-var)))) (defun sys:awk-expander (outer-env clauses) (let ((awc (new sys:awk-compile-time outer-env outer-env))) (each ((cl clauses)) (tree-case cl ((pattern . actions) (caseql pattern (:inputs (when awc.inputs (awk-error "duplicate :input clauses")) (set awc.inputs actions)) (:output (when awc.output (awk-error "duplicate :output clauses")) (when (or (atom actions) (cdr actions)) (awk-error "bad :output syntax")) (set awc.output (car actions))) (:name (when awc.name (awk-error "duplicate :name clauses")) (when (or (atom actions) (cdr actions)) (awk-error "bad :name syntax")) (set awc.name (car actions))) (:let (push actions awc.lets)) (:fun (push actions awc.funs)) (:begin (push actions awc.begin-actions)) (:set (push ^((set ,*actions)) awc.begin-actions)) (:end (push actions awc.end-actions)) (:begin-file (push actions awc.begin-file-actions)) (:set-file (push ^((set ,*actions)) awc.begin-actions)) (:end-file (push actions awc.end-file-actions)) (:fields (when awc.field-name-conv (awk-error "duplicate :fields clauses")) (let ((fnames (collect-each ((fn actions)) (match-case fn (@(bindable @sym) (list sym)) ((@(bindable @sym) @(bindable)) (if (eq sym '-) (awk-error "type given for unnamed field")) fn) ((@(bindable) @type) (awk-error "bad fconv function: ~s" type)) (@else (awk-error "bad :fields item: ~s" else)))))) (let ((nodash [remq '- fnames car])) (unless (equal nodash [unique nodash car]) (awk-error "duplicate field names"))) (set awc.field-name-conv fnames))) (t (push (if actions cl ^(,pattern (prn))) awc.cond-actions)))) (junk (awk-error "bad clause syntax ~s" junk)))) (set awc.lets [apply append (nreverse awc.lets)] awc.funs [apply append (nreverse awc.funs)] awc.begin-actions [apply append (nreverse awc.begin-actions)] awc.end-actions [apply append (nreverse awc.end-actions)] awc.begin-file-actions [apply append (nreverse awc.begin-file-actions)] awc.end-file-actions [apply append (nreverse awc.end-file-actions)] awc.cond-actions (nreverse awc.cond-actions)) awc)) (defun sys:awk-code-move-check (mainform subform suspicious-vars kind) (when suspicious-vars (compile-warning mainform "~!form ~s\n\ is moved out of the apparent scope\n\ and thus cannot refer to ~a ~s" subform kind suspicious-vars))) (defmacro sys:awk-mac-let (awc aws-sym . body) ^(symacrolet ((rec (usr:rslot ,aws-sym 'rec 'rec-to-f)) (orec (usr:rslot ,aws-sym 'orig-rec 'rec-to-f)) (f (usr:rslot ,aws-sym 'fields 'f-to-rec)) (nf (usr:rslot ,aws-sym 'nf 'nf-to-f)) (nr (qref ,aws-sym rec-num)) (fnr (qref ,aws-sym file-rec-num)) (arg (qref ,aws-sym file-num)) (fname (qref ,aws-sym file-name)) (rs (qref ,aws-sym rs)) (krs (qref ,aws-sym krs)) (fs (qref ,aws-sym fs)) (ft (qref ,aws-sym ft)) (fw (qref ,aws-sym fw)) (kfs (qref ,aws-sym kfs)) (ofs (qref ,aws-sym ofs)) (ors (qref ,aws-sym ors))) (macrolet ((next () '(return-from :awk-rec)) (again () '(return-from :awk-rec :awk-again)) (next-file () '(return-from :awk-file)) (sys:rng-if (form from-expr to-expr :env e) ^(sys:rng-impl ,form (sys:awk-test ,from-expr ,(qref ,awc rng-rec-temp)) (sys:awk-test ,to-expr ,(qref ,awc rng-rec-temp)))) (sys:rng-impl (form from-expr to-expr :env e) (let* ((style (car form)) (ix (pinc (qref ,awc nranges))) (rng-temp (gensym)) (from-expr-ex (expand from-expr e)) (from-expr-val (gensym)) (to-expr-ex (expand to-expr e)) (to-expr-val (gensym)) (vec-temp (qref ,awc rng-vec-temp)) (emul-broken (and (plusp sys:compat) (<= sys:compat 177))) (rng-fun (caseq style (--rng 'sys:awk%--rng) (--rng- 'sys:awk%--rng-) (rng+ 'sys:awk%rng+) (-rng+ 'sys:awk%-rng+) (--rng+ 'sys:awk%--rng+))) (state (gensym))) (tree-bind ((from-expr-ex fe-fv fe-ff fe-ev fe-ef) (to-expr-ex te-fv te-ff te-ev te-ef) (from-expr-orig to-expr-orig)) (list (expand-with-free-refs from-expr e ,awc.outer-env) (expand-with-free-refs to-expr e ,awc.outer-env) (list (cadr form) (caddr form))) (sys:awk-code-move-check form from-expr-orig (diff fe-ev fe-fv) 'variables) (sys:awk-code-move-check form from-expr-orig (diff fe-ef fe-ff) 'functions) (sys:awk-code-move-check form to-expr-orig (diff te-ev te-fv) 'variables) (sys:awk-code-move-check form to-expr-orig (diff te-ef te-ff) 'functions) (push rng-temp (qref ,awc rng-expr-temps)) (caseq style ((--rng --rng- rng+ -rng+ --rng+) (push ^(,rng-fun ,vec-temp ,ix ,from-expr-ex ,to-expr-ex) (qref ,awc rng-exprs))) (t (push ^(placelet ((,state (vecref ,(qref ,awc rng-vec-temp) ,ix))) (let ((,to-expr-val ,to-expr-ex)) (caseq ,state (nil (let ((,from-expr-val ,from-expr-ex)) (cond ((and ,from-expr-val ,to-expr-val) ,(if (and (eq style 'rng) (not emul-broken)) t)) (,from-expr-val (set ,state t) ,(if (memq style '(rng rng-)) t))))) (t (cond (,to-expr-val (set ,state nil) ,(if (memq style '(rng -rng)) t)) (t t)))))) (qref ,awc rng-exprs)))) rng-temp))) (rng (:form form from-expr to-expr) ^(sys:rng-if ,form ,from-expr ,to-expr)) (-rng (:form form from-expr to-expr) ^(sys:rng-if ,form ,from-expr ,to-expr)) (rng- (:form form from-expr to-expr) ^(sys:rng-if ,form ,from-expr ,to-expr)) (-rng- (:form form from-expr to-expr) ^(sys:rng-if ,form ,from-expr ,to-expr)) (--rng (:form form from-expr to-expr) ^(sys:rng-if ,form ,from-expr ,to-expr)) (--rng- (:form form from-expr to-expr) ^(sys:rng-if ,form ,from-expr ,to-expr)) (rng+ (:form form from-expr to-expr) ^(sys:rng-if ,form ,from-expr ,to-expr)) (-rng+ (:form form from-expr to-expr) ^(sys:rng-if ,form ,from-expr ,to-expr)) (--rng+ (:form form from-expr to-expr) ^(sys:rng-if ,form ,from-expr ,to-expr)) (ff (. opip-args) ^(symacrolet ((f (usr:rslot ,',aws-sym 'fields 'f-to-rec))) (set f [(opip ,*opip-args) f]))) (mf (. opip-args) ^(symacrolet ((f (usr:rslot ,',aws-sym 'fields 'f-to-rec))) (set f (mapcar (opip ,*opip-args) f)))) (fconv (. conv-args) ^(set f (sys:conv (,*conv-args) f)))) ,*body))) (defmacro sys:awk-mac-let-outer (aws-sym . body) ^(macrolet ((-> (path . body) ^(sys:awk-redir ,',aws-sym *stdout* :outf "w" ,path ,body)) (->> (path . body) ^(sys:awk-redir ,',aws-sym *stdout* :apf "a" ,path ,body)) (<- (path . body) ^(sys:awk-redir ,',aws-sym *stdin* :inf "r" ,path ,body)) (!> (path . body) ^(sys:awk-redir ,',aws-sym *stdout* :outp "w" ,path ,body)) (