diff options
-rw-r--r-- | stdlib/awk.tl | 77 | ||||
-rw-r--r-- | stdlib/conv.tl | 46 | ||||
-rw-r--r-- | tests/015/awk-fields.tl | 21 | ||||
-rw-r--r-- | txr.1 | 115 |
4 files changed, 185 insertions, 74 deletions
diff --git a/stdlib/awk.tl b/stdlib/awk.tl index 497b9e17..5eb409d3 100644 --- a/stdlib/awk.tl +++ b/stdlib/awk.tl @@ -59,7 +59,7 @@ begin-file-actions end-file-actions begin-actions end-actions cond-actions - field-names + field-name-conv (nranges 0) (rng-rec-temp (gensym)) (rng-vec-temp (gensym)) @@ -324,14 +324,24 @@ (:set-file (push ^((set ,*actions)) awc.begin-actions)) (:end-file (push actions awc.end-file-actions)) (:fields - (when awc.field-names + (when awc.field-name-conv (awk-error "duplicate :fields clauses")) - (whenlet ((fn (member-if [notf bindable] actions))) - (awk-error "~s isn't a valid field name" (car fn))) - (let ((syms (remq '- actions))) - (unless (equal syms (uniq syms)) - (awk-error "duplicate field names"))) - (set awc.field-names actions)) + (let ((fnames + (collect-each ((fn actions)) + (match-case fn + (@(bindable @sym) (list sym)) + ((@(bindable @sym) @(bindable @fun)) + (if (eq sym '-) + (awk-error "type given for unnamed field")) + fn) + ((@(bindable) @type) + (awk-error "bad fconv function: ~s" type)) + (@else (awk-error "bad :fields item: ~s" + else)))))) + (let ((nodash [remq '- fnames car])) + (unless (equal nodash [unique nodash car]) + (awk-error "duplicate field names"))) + (set awc.field-name-conv fnames))) (t (push (if actions cl ^(,pattern (prn))) @@ -475,12 +485,34 @@ ,*body)) (defmacro sys:awk-symac-let (awc . body) - ^(symacrolet ,(append-each ((fn awc.field-names) + ^(symacrolet ,(append-each ((fn awc.field-name-conv) (ix 0)) - (if (neq fn '-) - (list ^(,fn [f ,ix])))) + (if (neq (car fn) '-) + (list ^(,(car fn) [f ,ix])))) ,*body)) +(defun sys:awk-field-name-code (awc aws-sym) + (with-gensyms (fiter) + (let* ((nf 0) + (code (append-each ((fnc awc.field-name-conv) + (i 0)) + (set nf (succ i)) + (if (cadr fnc) + ^((rplaca ,fiter + (sys:conv-expand-sym ,(cadr fnc) + (car ,fiter))) + (set ,fiter (cdr ,fiter))) + ^((set ,fiter (cdr ,fiter))))))) + (while-match @(end ((set . @nil))) code + (upd code butlast)) + ^(let ((,fiter (qref ,aws-sym fields))) + (if (< (len ,fiter) ,nf) + (set ,fiter (take ,nf (append ,fiter (repeat '("")))) + (qref ,aws-sym fields) ,fiter + (qref ,aws-sym nf) ,nf)) + ,*code + (qref ,aws-sym (f-to-rec)))))) + (defun sys:awk-fun-shadowing-env (up-env) (make-env nil '((prn . sys:special)) up-env)) @@ -494,6 +526,20 @@ ^(sys:awk-mac-let ,awc ,aws-sym ,*p-actions-xform-unex) (sys:awk-fun-shadowing-env outer-env)))) + (if awc.rng-exprs + (set p-actions-xform + ^(let* ((,awc.rng-rec-temp rec) + (,awc.rng-vec-temp (qref ,aws-sym rng-vec)) + ,*(nreverse + (zip awc.rng-expr-temps + awc.rng-exprs))) + ,p-actions-xform))) + (if (and awc.field-name-conv + [some awc.field-name-conv cdr]) + (set p-actions-xform + ^(progn + ,(sys:awk-field-name-code awc aws-sym) + ,p-actions-xform))) ^(block ,(or awc.name 'awk) (let* (,*awc.lets ,awk-retval (,aws-sym (new sys:awk-state @@ -514,14 +560,7 @@ ,*(if (or awc.cond-actions awc.begin-file-actions awc.end-file-actions awc.end-actions) ^((,awk-fun (lambda (,aws-sym) - ,(if awc.rng-exprs - ^(let* ((,awc.rng-rec-temp rec) - (,awc.rng-vec-temp (qref ,aws-sym rng-vec)) - ,*(nreverse - (zip awc.rng-expr-temps - awc.rng-exprs))) - ,p-actions-xform) - p-actions-xform)))))) + ,p-actions-xform))))) ,*awc.begin-actions (unwind-protect ,(if (or awc.cond-actions awc.begin-file-actions diff --git a/stdlib/conv.tl b/stdlib/conv.tl index f2f5bd4d..ef1088cc 100644 --- a/stdlib/conv.tl +++ b/stdlib/conv.tl @@ -24,31 +24,29 @@ ;; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ;; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ;; POSSIBILITY OF SUCH DAMAGE. + + +(defmacro sys:conv-expand-sym (sym arg-expr) + (caseq sym + (usr:i ^(toint ,arg-expr)) + (usr:o ^(toint ,arg-expr 8)) + (usr:x ^(toint ,arg-expr 16)) + (usr:b ^(toint ,arg-expr 2)) + (usr:c ^(toint ,arg-expr #\c)) + (usr:r ^(tofloat ,arg-expr)) + (usr:iz ^(tointz ,arg-expr)) + (usr:oz ^(tointz ,arg-expr 8)) + (usr:xz ^(tointz ,arg-expr 16)) + (usr:bz ^(tointz ,arg-expr 2)) + (usr:cz ^(tointz ,arg-expr #\c)) + (usr:rz ^(tofloatz ,arg-expr)) + (t ^(,sym ,arg-expr)))) + (defun sys:conv-let (. body) - ^(flet ((usr:i (arg : radix) - (toint arg radix)) - (usr:o (arg) - (toint arg 8)) - (usr:x (arg) - (toint arg 16)) - (usr:b (arg) - (toint arg 2)) - (usr:c (arg) - (toint arg #\c)) - (usr:r (arg) - (tofloat arg)) - (usr:iz (arg : radix) - (tointz arg radix)) - (usr:oz (arg) - (tointz arg 8)) - (usr:xz (arg) - (tointz arg 16)) - (usr:bz (arg) - (tointz arg 2)) - (usr:cz (arg) - (tointz arg #\c)) - (usr:rz (arg) - (tofloatz arg))) + ^(flet ,(collect-each ((sym '(usr:i usr:o usr:x usr:b usr:c + usr:r usr:iz usr:oz usr:xz + usr:bz usr:cz usr:rz))) + ^(,sym (arg) (sys:conv-expand-sym ,sym arg))) ,*body)) (defun sys:do-conv (lfl mfl tfl nm list) diff --git a/tests/015/awk-fields.tl b/tests/015/awk-fields.tl index 7f27e9e4..7bb2f599 100644 --- a/tests/015/awk-fields.tl +++ b/tests/015/awk-fields.tl @@ -6,7 +6,10 @@ (awk (:fields nil) (:begin (return-from awk))) :error (awk (:fields - -) (:begin (return-from awk))) nil (awk (:fields a - - b) (:begin (return-from awk))) nil - (awk (:fields a - - a) (:begin (return-from awk))) :error + (awk (:fields (a foo) - - (a bar)) (:begin (return-from awk))) :error + (awk (:fields (a foo) - - (b bar)) (:begin (return-from awk))) nil + (awk (:fields (a foo) (-) - (b bar)) (:begin (return-from awk))) :error + (awk (:fields (a foo) (- i) - (b bar)) (:begin (return-from awk))) :error (awk (:fields a) (:fields b) (:begin (return-from awk))) :error) (test @@ -16,3 +19,19 @@ (:fields h n - c) (t (add h n c)))) ("How" "now" "cow")) + +(test + (build + (awk + (:inputs '("1 x 2" "a x 1" "4 x b")) + (:fields (foo iz) - (bar iz)) + (t (add (+ foo bar))))) + (3 1 4)) + +(test + (build + (awk + (:inputs '("1")) + (:fields (a iz) - (c iz)) + (t (add (+ a c) nf)))) + (1 3)) @@ -64062,23 +64062,29 @@ clause performs a nonlocal transfer, processing is not triggered, because the processing of the input source is deemed not to have taken place. -.meIP (:fields << sym *) +.meIP (:fields >> { sym | >> ( sym <> [ fun ]) | -}*) The .code :fields -clause may be used to give symbolic names to fields. Every -.meta sym -argument must be either a bindable symbol. The symbol +clause may be specified in order to give symbolic names to fields, +and optionally specify conversions for them. +Every argument must be one of three expressions. It may be +a bindable symbol other than +.code - +(minus). It may be a list whose first element is +a symbol other than +.code - +optionally followed the name of a function. +Or else it may be the .code - -has a special meaning. Symbols other than +symbol, which has a special meaning. +Symbols other than .code - may not be repeated, and the .code :fields clause may appear at most once in a given instance of the .code awk macro. -Each entry in the -.meta sym -list is understood to correspond to a field expression for a successive field, +Each argument is understood to correspond to a field expression for a successive field, starting with the leftmost .meta sym corresponding with the first field, @@ -64094,6 +64100,36 @@ macro. The .code - symbol is a place holder which doesn't bind a symbol macro to the corresponding field. +Additionally, every two-element entry which associates the field symbol +.meta sym +with a function name +.meta fun +specifies a field conversion. After each record is read and divided into +fields, those fields for which +.meta fun +is specified are updated by passing their value to this function +and replacing them by the returned value. +The +.meta fun +symbol may also be one of the short-hand symbols available in the +.code fconv +macro, such as +.codn i , +.code x +and others. +If at least one such conversion is specified in a +.code :fields +clause, then the value of +.code rec +is updated from the converted fields in the usual manner, as if +the fields had been assigned. +Furthermore, it is ensured that every field for which a +.code :fields +clause specifies a conversion exists. Fields with an empty string +value are automatically added so that a field exists for the +rightmost conversion, and the value of +.code nf +is updated to include these fields. .meIP >> ( condition << action *) Clauses which do not have one of the specially recognized keywords @@ -65322,25 +65358,29 @@ effectively providing a shorthand for commonly-needed conversions: .coIP i Provides conversion to integer. It is identical to the .code toint -function. +function, with the default radix. .coIP o Converts a string value holding an octal representation -to the integer which it denotes. The expression -.code "(o str)" -is equivalent to -.codn "(toint str 8)" . +to the integer which it denotes. It is equivalent to +.code toint +with a +.meta radix +argument of 8. .coIP x Converts a string value holding a hexadecimal representation -to the integer which it denotes. The expression -.code "(x str)" +to the integer which it denotes. It is equivalent to +.code toint is equivalent to -.codn "(toint str 16)" . +with a +.meta radix +argument of 16. .coIP b Converts a string value holding a binary (base two) representation -to the integer which it denotes. The expression -.code "(b str)" -is equivalent to -.codn "(toint str 2)" . +to the integer which it denotes. It is equivalent to +.code toint +with a +.meta radix +argument of 2. .coIP c Converts a string value holding a C-language-style representation to the integer which it denotes, meaning that the @@ -65351,16 +65391,18 @@ decimal. These prefixes follow the or .code - sign, if present. -The expression -.code "(c str)" -is equivalent to -.codn "(toint str #\ec)" . +The +.code c +function is equivalent to +.code toint +invoked with a +.meta radix +argument of +.codn #\ec . .coIP r Converts a string holding a floating-point representation to -the floating-point value which it denotes. The expression -.code "(r str)" -is equivalent to -.codn "(tofloat str)" . +the floating-point value which it denotes. It is equivalent to +.codn tofloat . .ccIP @, iz @, oz @, xz @, bz @ cz and @ rz Conversion similar to .codn i , @@ -65370,7 +65412,7 @@ Conversion similar to .code c and .codn r , -but using +but equivalent to using the functions .code tointz and .codn tofloatz . @@ -65407,7 +65449,6 @@ expression can be used as the condition in an clause which triggers the action if one or more fields have been extracted, and performs conversions on them. - Note: although .code fconv is intended for converting textual fields, and the semantic descriptions below @@ -65421,6 +65462,20 @@ means of the .code toint function. +Note: a somewhat less flexible mechanism for converting fields, related to +.codn fconv , +is present in the +.code :fields +clause of the +.code awk +macro, which can specify names for the positional fields, along with +conversion functions. The +.code :fields +clause has different syntax, and doesn't support the +.code : +(colon) separator, instead assuming a fixed number of fields +enumerated from the left. + .TP* Examples: .verb |