summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--stdlib/awk.tl77
-rw-r--r--stdlib/conv.tl46
-rw-r--r--tests/015/awk-fields.tl21
-rw-r--r--txr.1115
4 files changed, 185 insertions, 74 deletions
diff --git a/stdlib/awk.tl b/stdlib/awk.tl
index 497b9e17..5eb409d3 100644
--- a/stdlib/awk.tl
+++ b/stdlib/awk.tl
@@ -59,7 +59,7 @@
begin-file-actions end-file-actions
begin-actions end-actions
cond-actions
- field-names
+ field-name-conv
(nranges 0)
(rng-rec-temp (gensym))
(rng-vec-temp (gensym))
@@ -324,14 +324,24 @@
(:set-file (push ^((set ,*actions)) awc.begin-actions))
(:end-file (push actions awc.end-file-actions))
(:fields
- (when awc.field-names
+ (when awc.field-name-conv
(awk-error "duplicate :fields clauses"))
- (whenlet ((fn (member-if [notf bindable] actions)))
- (awk-error "~s isn't a valid field name" (car fn)))
- (let ((syms (remq '- actions)))
- (unless (equal syms (uniq syms))
- (awk-error "duplicate field names")))
- (set awc.field-names actions))
+ (let ((fnames
+ (collect-each ((fn actions))
+ (match-case fn
+ (@(bindable @sym) (list sym))
+ ((@(bindable @sym) @(bindable @fun))
+ (if (eq sym '-)
+ (awk-error "type given for unnamed field"))
+ fn)
+ ((@(bindable) @type)
+ (awk-error "bad fconv function: ~s" type))
+ (@else (awk-error "bad :fields item: ~s"
+ else))))))
+ (let ((nodash [remq '- fnames car]))
+ (unless (equal nodash [unique nodash car])
+ (awk-error "duplicate field names")))
+ (set awc.field-name-conv fnames)))
(t (push (if actions
cl
^(,pattern (prn)))
@@ -475,12 +485,34 @@
,*body))
(defmacro sys:awk-symac-let (awc . body)
- ^(symacrolet ,(append-each ((fn awc.field-names)
+ ^(symacrolet ,(append-each ((fn awc.field-name-conv)
(ix 0))
- (if (neq fn '-)
- (list ^(,fn [f ,ix]))))
+ (if (neq (car fn) '-)
+ (list ^(,(car fn) [f ,ix]))))
,*body))
+(defun sys:awk-field-name-code (awc aws-sym)
+ (with-gensyms (fiter)
+ (let* ((nf 0)
+ (code (append-each ((fnc awc.field-name-conv)
+ (i 0))
+ (set nf (succ i))
+ (if (cadr fnc)
+ ^((rplaca ,fiter
+ (sys:conv-expand-sym ,(cadr fnc)
+ (car ,fiter)))
+ (set ,fiter (cdr ,fiter)))
+ ^((set ,fiter (cdr ,fiter)))))))
+ (while-match @(end ((set . @nil))) code
+ (upd code butlast))
+ ^(let ((,fiter (qref ,aws-sym fields)))
+ (if (< (len ,fiter) ,nf)
+ (set ,fiter (take ,nf (append ,fiter (repeat '(""))))
+ (qref ,aws-sym fields) ,fiter
+ (qref ,aws-sym nf) ,nf))
+ ,*code
+ (qref ,aws-sym (f-to-rec))))))
+
(defun sys:awk-fun-shadowing-env (up-env)
(make-env nil '((prn . sys:special)) up-env))
@@ -494,6 +526,20 @@
^(sys:awk-mac-let ,awc ,aws-sym
,*p-actions-xform-unex)
(sys:awk-fun-shadowing-env outer-env))))
+ (if awc.rng-exprs
+ (set p-actions-xform
+ ^(let* ((,awc.rng-rec-temp rec)
+ (,awc.rng-vec-temp (qref ,aws-sym rng-vec))
+ ,*(nreverse
+ (zip awc.rng-expr-temps
+ awc.rng-exprs)))
+ ,p-actions-xform)))
+ (if (and awc.field-name-conv
+ [some awc.field-name-conv cdr])
+ (set p-actions-xform
+ ^(progn
+ ,(sys:awk-field-name-code awc aws-sym)
+ ,p-actions-xform)))
^(block ,(or awc.name 'awk)
(let* (,*awc.lets ,awk-retval
(,aws-sym (new sys:awk-state
@@ -514,14 +560,7 @@
,*(if (or awc.cond-actions awc.begin-file-actions
awc.end-file-actions awc.end-actions)
^((,awk-fun (lambda (,aws-sym)
- ,(if awc.rng-exprs
- ^(let* ((,awc.rng-rec-temp rec)
- (,awc.rng-vec-temp (qref ,aws-sym rng-vec))
- ,*(nreverse
- (zip awc.rng-expr-temps
- awc.rng-exprs)))
- ,p-actions-xform)
- p-actions-xform))))))
+ ,p-actions-xform)))))
,*awc.begin-actions
(unwind-protect
,(if (or awc.cond-actions awc.begin-file-actions
diff --git a/stdlib/conv.tl b/stdlib/conv.tl
index f2f5bd4d..ef1088cc 100644
--- a/stdlib/conv.tl
+++ b/stdlib/conv.tl
@@ -24,31 +24,29 @@
;; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
;; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;; POSSIBILITY OF SUCH DAMAGE.
+
+
+(defmacro sys:conv-expand-sym (sym arg-expr)
+ (caseq sym
+ (usr:i ^(toint ,arg-expr))
+ (usr:o ^(toint ,arg-expr 8))
+ (usr:x ^(toint ,arg-expr 16))
+ (usr:b ^(toint ,arg-expr 2))
+ (usr:c ^(toint ,arg-expr #\c))
+ (usr:r ^(tofloat ,arg-expr))
+ (usr:iz ^(tointz ,arg-expr))
+ (usr:oz ^(tointz ,arg-expr 8))
+ (usr:xz ^(tointz ,arg-expr 16))
+ (usr:bz ^(tointz ,arg-expr 2))
+ (usr:cz ^(tointz ,arg-expr #\c))
+ (usr:rz ^(tofloatz ,arg-expr))
+ (t ^(,sym ,arg-expr))))
+
(defun sys:conv-let (. body)
- ^(flet ((usr:i (arg : radix)
- (toint arg radix))
- (usr:o (arg)
- (toint arg 8))
- (usr:x (arg)
- (toint arg 16))
- (usr:b (arg)
- (toint arg 2))
- (usr:c (arg)
- (toint arg #\c))
- (usr:r (arg)
- (tofloat arg))
- (usr:iz (arg : radix)
- (tointz arg radix))
- (usr:oz (arg)
- (tointz arg 8))
- (usr:xz (arg)
- (tointz arg 16))
- (usr:bz (arg)
- (tointz arg 2))
- (usr:cz (arg)
- (tointz arg #\c))
- (usr:rz (arg)
- (tofloatz arg)))
+ ^(flet ,(collect-each ((sym '(usr:i usr:o usr:x usr:b usr:c
+ usr:r usr:iz usr:oz usr:xz
+ usr:bz usr:cz usr:rz)))
+ ^(,sym (arg) (sys:conv-expand-sym ,sym arg)))
,*body))
(defun sys:do-conv (lfl mfl tfl nm list)
diff --git a/tests/015/awk-fields.tl b/tests/015/awk-fields.tl
index 7f27e9e4..7bb2f599 100644
--- a/tests/015/awk-fields.tl
+++ b/tests/015/awk-fields.tl
@@ -6,7 +6,10 @@
(awk (:fields nil) (:begin (return-from awk))) :error
(awk (:fields - -) (:begin (return-from awk))) nil
(awk (:fields a - - b) (:begin (return-from awk))) nil
- (awk (:fields a - - a) (:begin (return-from awk))) :error
+ (awk (:fields (a foo) - - (a bar)) (:begin (return-from awk))) :error
+ (awk (:fields (a foo) - - (b bar)) (:begin (return-from awk))) nil
+ (awk (:fields (a foo) (-) - (b bar)) (:begin (return-from awk))) :error
+ (awk (:fields (a foo) (- i) - (b bar)) (:begin (return-from awk))) :error
(awk (:fields a) (:fields b) (:begin (return-from awk))) :error)
(test
@@ -16,3 +19,19 @@
(:fields h n - c)
(t (add h n c))))
("How" "now" "cow"))
+
+(test
+ (build
+ (awk
+ (:inputs '("1 x 2" "a x 1" "4 x b"))
+ (:fields (foo iz) - (bar iz))
+ (t (add (+ foo bar)))))
+ (3 1 4))
+
+(test
+ (build
+ (awk
+ (:inputs '("1"))
+ (:fields (a iz) - (c iz))
+ (t (add (+ a c) nf))))
+ (1 3))
diff --git a/txr.1 b/txr.1
index afe30fa2..9bfd25cd 100644
--- a/txr.1
+++ b/txr.1
@@ -64062,23 +64062,29 @@ clause performs a nonlocal transfer,
processing is not triggered, because the processing of the input
source is deemed not to have taken place.
-.meIP (:fields << sym *)
+.meIP (:fields >> { sym | >> ( sym <> [ fun ]) | -}*)
The
.code :fields
-clause may be used to give symbolic names to fields. Every
-.meta sym
-argument must be either a bindable symbol. The symbol
+clause may be specified in order to give symbolic names to fields,
+and optionally specify conversions for them.
+Every argument must be one of three expressions. It may be
+a bindable symbol other than
+.code -
+(minus). It may be a list whose first element is
+a symbol other than
+.code -
+optionally followed the name of a function.
+Or else it may be the
.code -
-has a special meaning. Symbols other than
+symbol, which has a special meaning.
+Symbols other than
.code -
may not be repeated, and the
.code :fields
clause may appear at most once in a given instance of the
.code awk
macro.
-Each entry in the
-.meta sym
-list is understood to correspond to a field expression for a successive field,
+Each argument is understood to correspond to a field expression for a successive field,
starting with the leftmost
.meta sym
corresponding with the first field,
@@ -64094,6 +64100,36 @@ macro. The
.code -
symbol is a place holder which doesn't bind a symbol macro to the
corresponding field.
+Additionally, every two-element entry which associates the field symbol
+.meta sym
+with a function name
+.meta fun
+specifies a field conversion. After each record is read and divided into
+fields, those fields for which
+.meta fun
+is specified are updated by passing their value to this function
+and replacing them by the returned value.
+The
+.meta fun
+symbol may also be one of the short-hand symbols available in the
+.code fconv
+macro, such as
+.codn i ,
+.code x
+and others.
+If at least one such conversion is specified in a
+.code :fields
+clause, then the value of
+.code rec
+is updated from the converted fields in the usual manner, as if
+the fields had been assigned.
+Furthermore, it is ensured that every field for which a
+.code :fields
+clause specifies a conversion exists. Fields with an empty string
+value are automatically added so that a field exists for the
+rightmost conversion, and the value of
+.code nf
+is updated to include these fields.
.meIP >> ( condition << action *)
Clauses which do not have one of the specially recognized keywords
@@ -65322,25 +65358,29 @@ effectively providing a shorthand for commonly-needed conversions:
.coIP i
Provides conversion to integer. It is identical to the
.code toint
-function.
+function, with the default radix.
.coIP o
Converts a string value holding an octal representation
-to the integer which it denotes. The expression
-.code "(o str)"
-is equivalent to
-.codn "(toint str 8)" .
+to the integer which it denotes. It is equivalent to
+.code toint
+with a
+.meta radix
+argument of 8.
.coIP x
Converts a string value holding a hexadecimal representation
-to the integer which it denotes. The expression
-.code "(x str)"
+to the integer which it denotes. It is equivalent to
+.code toint
is equivalent to
-.codn "(toint str 16)" .
+with a
+.meta radix
+argument of 16.
.coIP b
Converts a string value holding a binary (base two) representation
-to the integer which it denotes. The expression
-.code "(b str)"
-is equivalent to
-.codn "(toint str 2)" .
+to the integer which it denotes. It is equivalent to
+.code toint
+with a
+.meta radix
+argument of 2.
.coIP c
Converts a string value holding a C-language-style representation
to the integer which it denotes, meaning that the
@@ -65351,16 +65391,18 @@ decimal. These prefixes follow the
or
.code -
sign, if present.
-The expression
-.code "(c str)"
-is equivalent to
-.codn "(toint str #\ec)" .
+The
+.code c
+function is equivalent to
+.code toint
+invoked with a
+.meta radix
+argument of
+.codn #\ec .
.coIP r
Converts a string holding a floating-point representation to
-the floating-point value which it denotes. The expression
-.code "(r str)"
-is equivalent to
-.codn "(tofloat str)" .
+the floating-point value which it denotes. It is equivalent to
+.codn tofloat .
.ccIP @, iz @, oz @, xz @, bz @ cz and @ rz
Conversion similar to
.codn i ,
@@ -65370,7 +65412,7 @@ Conversion similar to
.code c
and
.codn r ,
-but using
+but equivalent to using the functions
.code tointz
and
.codn tofloatz .
@@ -65407,7 +65449,6 @@ expression can be used as the condition in an
clause which triggers the action if one or more fields have been
extracted, and performs conversions on them.
-
Note: although
.code fconv
is intended for converting textual fields, and the semantic descriptions below
@@ -65421,6 +65462,20 @@ means of the
.code toint
function.
+Note: a somewhat less flexible mechanism for converting fields, related to
+.codn fconv ,
+is present in the
+.code :fields
+clause of the
+.code awk
+macro, which can specify names for the positional fields, along with
+conversion functions. The
+.code :fields
+clause has different syntax, and doesn't support the
+.code :
+(colon) separator, instead assuming a fixed number of fields
+enumerated from the left.
+
.TP* Examples:
.verb