summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2016-09-25 08:49:04 -0700
committerKaz Kylheku <kaz@kylheku.com>2016-09-25 08:49:04 -0700
commit1b8e26d7e59d9b76ee88f9135470cb3f11d399cb (patch)
tree6bb570fffbb954fc6751aeb8ebfdb3e7f7c87e0f
parente4ad31de61b548238cb53b6a572dc7f16d93d78f (diff)
downloadtxr-1b8e26d7e59d9b76ee88f9135470cb3f11d399cb.tar.gz
txr-1b8e26d7e59d9b76ee88f9135470cb3f11d399cb.tar.bz2
txr-1b8e26d7e59d9b76ee88f9135470cb3f11d399cb.zip
awk macro: support paragraph mode.
* share/txr/stdlib/awk.tl (sys:awk-state loop): If the rs variable is nil, provide a record reader which reads paragraphs, like under Awk's paragraph mode when RS is blank. This does not support the requirement that newline is always a field separator, regardless of the value of FS. * txr.1: Documented paragraph mode.
-rw-r--r--share/txr/stdlib/awk.tl31
-rw-r--r--txr.135
2 files changed, 57 insertions, 9 deletions
diff --git a/share/txr/stdlib/awk.tl b/share/txr/stdlib/awk.tl
index 9ae7c569..3efd8cc6 100644
--- a/share/txr/stdlib/awk.tl
+++ b/share/txr/stdlib/awk.tl
@@ -93,15 +93,28 @@
(t
(set noted-rs aws.rs noted-krs aws.krs)
(set cached-rr
- (if (and (equal aws.rs "\n")
- (not aws.krs))
- (lambda () (get-line stin))
- (let ((rin (record-adapter (if (regexp aws.rs)
- aws.rs
- (regex-compile aws.rs))
- stin
- aws.krs)))
- (lambda () (get-line rin)))))))))
+ (cond
+ ((and (equal aws.rs "\n") (not aws.krs))
+ (lambda () (get-line stin)))
+ ((null aws.rs)
+ (let ((rin (record-adapter #/\n[ \n\t]*\n/))
+ (flag t))
+ (lambda ()
+ (let ((r (get-line rin)))
+ (cond
+ (flag
+ (set flag nil)
+ (if (equal r "")
+ (get-line rin)
+ r))
+ (t r))))))
+ (t
+ (let ((rin (record-adapter (if (regexp aws.rs)
+ aws.rs
+ (regex-compile aws.rs))
+ stin
+ aws.krs)))
+ (lambda () (get-line rin))))))))))
(set aws.file-rec-num 0)
(unwind-protect
(whilet ((rr (get-rec-reader stin))
diff --git a/txr.1 b/txr.1
index 00822132..3293ef81 100644
--- a/txr.1
+++ b/txr.1
@@ -38559,6 +38559,14 @@ is
.strn "\en" :
the newline character. This means that, by default, records are lines.
+If
+.code rs
+is changed to the value
+.codn nil ,
+then record separation operates in
+.IR "paragraph mode" ,
+which is described below.
+
If a match for the record separator occurs at the end of the stream,
it is not considered to delimit an empty record, but acts as the
terminator for the previous record.
@@ -38569,6 +38577,33 @@ it has no effect on the most recently scanned and delimited record which is
still current, or previous records. The new value applies to the next, not yet
read record.
+In paragraph mode, records are separated by a newline character followed by one
+or more blank lines (empty lines or lines containing only a mixture of
+tabs and spaces). This means that, effectively, the record-separating
+sequences match the regular expression
+.codn "/\en[ \en\et]*\en/" .
+There is a difference between paragraph mode and simply using the above
+regular expression as
+.codn rs .
+The difference is that if the first record which is read upon entering
+paragraph mode is empty (because the input begins with a match for the
+separator regex), then that record is thrown away, and the next record
+is read.
+
+Note that the POSIX Awk paragraph mode (which occurs when
+.code RS
+is blank) there is an additional difference: regardless of the value
+of the field separator
+.codn FS ,
+newline characters separate fields. This behavior is not implemented
+in the
+.code awk
+macro. Since newlines are included as separators in under the default field
+separation, the behaviors match in that case. Code using a custom
+.code fs
+must explicitly include a match for newline to obtain that as a field
+separator.
+
.coNP Variable @ krs
.desc
The awk variable