summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--share/txr/stdlib/awk.tl20
-rw-r--r--txr.155
2 files changed, 58 insertions, 17 deletions
diff --git a/share/txr/stdlib/awk.tl b/share/txr/stdlib/awk.tl
index e74a0a18..62311fe2 100644
--- a/share/txr/stdlib/awk.tl
+++ b/share/txr/stdlib/awk.tl
@@ -25,7 +25,7 @@
(defstruct sys:awk-state ()
(rs "\n")
- (fs #/[ \t\n]+/)
+ (fs)
(ofs " ")
(ors "\n")
(inputs (or *args* (list *stdin*)))
@@ -52,11 +52,19 @@
(defmeth sys:awk-state rec-to-f (self)
(cond
((equal self.rec "")
- (set self.fields nil)
- (set self.nf 0))
- (t
- (set self.fields (split-str self.rec self.fs))
- (set self.nf (length self.fields)))))
+ (set self.fields nil
+ self.nf 0))
+ (self.fs
+ (set self.fields (split-str self.rec self.fs)
+ self.nf (length self.fields)))
+ ((let ((trimmed (trim-str self.rec)))
+ (cond
+ ((equal trimmed "")
+ (set self.fields nil
+ self.nf 0))
+ (t
+ (set self.fields (split-str trimmed #/[ \t\n]+/)
+ self.nf (length self.fields))))))))
(defmeth sys:awk-state f-to-rec (self)
(set self.rec (cat-str self.fields self.ofs)))
diff --git a/txr.1 b/txr.1
index 23c7bc1f..8447e2cd 100644
--- a/txr.1
+++ b/txr.1
@@ -37618,15 +37618,45 @@ The awk variable
specifies a string or regular expression which is used for
delimiting records into fields.
+The
+.code fs
+variable is initially
+.codn nil .
+
+If
+.code fs
+is nil, then, prior to field splitting, leading and trailing
+whitespace is trimmed from the value of
+.codn rec ,
+using the
+.code trim-str
+function. The subsequent field splitting operates on this
+trimmed value, which isn't stored back into
+.codn rec .
+
Regardless of the value of
.codn fs ,
-an empty record produces no fields:
+a record which is empty (tested after the trimming described above, if that
+takes place) produces no fields:
.code f
is the empty list, and
.code nf
is zero.
-When a record is not empty, matches for the
+If
+.code fs
+is nil, then
+In this special case, the splitting is performed using
+the regular expression
+This means that, by default, fields are separated by one or more consecutive
+whitespace characters, which can be any mixture of spaces, tabs or newlines.
+Newlines are included because they can occur in a record when the value of the
+record separator
+.code rs
+is customized.
+
+When a record is not empty,
+matches for the
.code fs
pattern are identified in it, and those matching parts separate fields:
the fields are the possibly empty non-matching parts between the matches.
@@ -37636,16 +37666,19 @@ If
is not found in the record, then the entire record is taken as a single
field.
-The initial value of
+If
.code fs
-is the regular expression
-.codn "#/[ \et\en]+/" .
-This means that, by default, fields are separated by one or more consecutive
-whitespace characters, which can be any mixture of spaces, tabs or newlines.
-Newlines are included because they can occur in a record when the value of the
-record separator
-.code rs
-is customized.
+is not
+.codn nil ,
+it must specify a string, or a regular expression.
+If it is
+.codn nil ,
+then the regular expression
+.code "#/[ \et\en]+/"
+is used. A string value of
+.code fs
+denotes an exact match for that string; it isn't treated
+as a regular expression.
.coNP Variable @ ofs
.desc