summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2016-01-01 12:23:00 -0800
committerKaz Kylheku <kaz@kylheku.com>2016-01-01 12:23:00 -0800
commit27e0a161c083222ef78bcf6192b931aa815583b3 (patch)
treeebb045f0417307f3c7763ce900cbb4b50781e581
parentbccdfcb523d7b0315c20a41dec43a6b0cf302a73 (diff)
downloadtxr-27e0a161c083222ef78bcf6192b931aa815583b3.tar.gz
txr-27e0a161c083222ef78bcf6192b931aa815583b3.tar.bz2
txr-27e0a161c083222ef78bcf6192b931aa815583b3.zip
Record-delimiting stream adapter.
* regex.c (read_until_match): New function. (regex_init): Registered read-until-match intrinsic. * regex.h (read_until_match): Declared. * stream.c (struct delegate_base): New struct type. (delegate_base_mark, delegate_put_string, delegate_put_char, delegate_put_byte, delegate_get_char, delegate_get_byte, delegate_unget_char, delegate_unget_byte, delegate_close, delegate_flush, delegate_seek, delegate_truncate, delegate_get_prop, delegate_set_prop, delegate_get_error, delegate_get_error_str, delegate_clear_error, make_delegate_stream): New static functions. (struct record_adapter_base): New struct type. (record_adapter_base_mark, record_adapter_mark_op, record_adapter_get_line): New static functions. (record_adapter_ops): New static structure. (record_adapter): New function. (stream_init): Registered record-adapter intrinsic. * stream.h (record_adapter): Declared. * txr.1: Documented read-until-match and record-adapter.
-rw-r--r--regex.c41
-rw-r--r--regex.h1
-rw-r--r--stream.c181
-rw-r--r--stream.h1
-rw-r--r--txr.189
5 files changed, 313 insertions, 0 deletions
diff --git a/regex.c b/regex.c
index 87239806..a1294ca9 100644
--- a/regex.c
+++ b/regex.c
@@ -2502,6 +2502,46 @@ val match_regst_right(val str, val regex, val end)
sub_str(str, minus(end, len), end)));
}
+val read_until_match(val regex, val stream)
+{
+ regex_machine_t regm;
+ val out = nil;
+ val ch;
+
+ stream = default_arg(stream, std_input);
+
+ regex_machine_init(&regm, regex);
+
+ if ((ch = get_char(stream))) {
+ if (regex_machine_feed(&regm, c_chr(ch)) == REGM_FAIL) {
+ out = mkstring(one, ch);
+
+ while (ch && (ch = get_char(stream))) {
+ regex_machine_reset(&regm);
+
+ if (regex_machine_feed(&regm, c_chr(ch)) == REGM_FAIL) {
+ string_extend(out, ch);
+ continue;
+ }
+
+ break;
+ }
+ } else {
+ out = mkstring(zero, ch);
+ }
+ }
+
+ while (ch && (ch = get_char(stream))) {
+ if (regex_machine_feed(&regm, c_chr(ch)) == REGM_FAIL) {
+ unget_char(ch, stream);
+ break;
+ }
+ }
+
+ regex_machine_cleanup(&regm);
+ return out;
+}
+
static char_set_t *create_wide_cs(void)
{
#ifdef FULL_UNICODE
@@ -2583,5 +2623,6 @@ void regex_init(void)
reg_fun(intern(lit("reg-expand-nongreedy"), system_package),
func_n1(reg_expand_nongreedy));
reg_fun(intern(lit("reg-optimize"), system_package), func_n1(reg_optimize));
+ reg_fun(intern(lit("read-until-match"), user_package), func_n2o(read_until_match, 1));
init_special_char_sets();
}
diff --git a/regex.h b/regex.h
index 51d24eea..ac84af26 100644
--- a/regex.h
+++ b/regex.h
@@ -39,5 +39,6 @@ val search_regst(val haystack, val needle_regex, val start_num, val from_end);
val match_regst(val str, val regex, val pos);
val match_regst_right(val str, val regex, val end);
val regsub(val regex, val repl, val str);
+val read_until_match(val regex, val stream);
int wide_display_char_p(wchar_t ch);
void regex_init(void);
diff --git a/stream.c b/stream.c
index 1798d901..2d6e1b0b 100644
--- a/stream.c
+++ b/stream.c
@@ -1913,6 +1913,186 @@ val catenated_stream_push(val new_stream, val cat_stream)
}
}
+struct delegate_base {
+ struct strm_base a;
+ val target_stream;
+ struct strm_ops *target_ops;
+};
+
+static void delegate_base_mark(struct delegate_base *db)
+{
+ strm_base_mark(&db->a);
+ gc_mark(db->target_stream);
+}
+
+static val delegate_put_string(val stream, val str)
+{
+ struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle);
+ return s->target_ops->put_string(s->target_stream, str);
+}
+
+static val delegate_put_char(val stream, val ch)
+{
+ struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle);
+ return s->target_ops->put_char(s->target_stream, ch);
+}
+
+static val delegate_put_byte(val stream, int byte)
+{
+ struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle);
+ return s->target_ops->put_byte(s->target_stream, byte);
+}
+
+static val delegate_get_char(val stream)
+{
+ struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle);
+ return s->target_ops->get_char(s->target_stream);
+}
+
+static val delegate_get_byte(val stream)
+{
+ struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle);
+ return s->target_ops->get_byte(s->target_stream);
+}
+
+static val delegate_unget_char(val stream, val ch)
+{
+ struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle);
+ return s->target_ops->unget_char(s->target_stream, ch);
+}
+
+static val delegate_unget_byte(val stream, int byte)
+{
+ struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle);
+ return s->target_ops->unget_byte(s->target_stream, byte);
+}
+
+static val delegate_close(val stream, val throw_on_error)
+{
+ struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle);
+ return s->target_ops->close(s->target_stream, throw_on_error);
+}
+
+static val delegate_flush(val stream)
+{
+ struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle);
+ return s->target_ops->flush(s->target_stream);
+}
+
+static val delegate_seek(val stream, val off, enum strm_whence whence)
+{
+ struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle);
+ return s->target_ops->seek(s->target_stream, off, whence);
+}
+
+static val delegate_truncate(val stream, val len)
+{
+ struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle);
+ return s->target_ops->truncate(s->target_stream, len);
+}
+
+static val delegate_get_prop(val stream, val ind)
+{
+ struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle);
+ return s->target_ops->get_prop(s->target_stream, ind);
+}
+
+static val delegate_set_prop(val stream, val ind, val value)
+{
+ struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle);
+ return s->target_ops->set_prop(s->target_stream, ind, value);
+}
+
+static val delegate_get_error(val stream)
+{
+ struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle);
+ return s->target_ops->get_error(s->target_stream);
+}
+
+static val delegate_get_error_str(val stream)
+{
+ struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle);
+ return s->target_ops->get_error_str(s->target_stream);
+}
+
+static val delegate_clear_error(val stream)
+{
+ struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle);
+ return s->target_ops->clear_error(s->target_stream);
+}
+
+static val make_delegate_stream(val orig_stream, size_t handle_size,
+ struct cobj_ops *ops)
+{
+ struct strm_ops *orig_ops = coerce(struct strm_ops *,
+ cobj_ops(orig_stream, stream_s));
+ struct delegate_base *db = coerce(struct delegate_base *,
+ chk_calloc(1, handle_size));
+ val delegate_stream;
+
+ strm_base_init(&db->a);
+ db->target_stream = nil;
+ db->target_ops = orig_ops;
+
+ delegate_stream = cobj(coerce(mem_t *, db), stream_s, ops);
+
+ db->target_stream = orig_stream;
+
+ return delegate_stream;
+}
+
+struct record_adapter_base {
+ struct delegate_base db;
+ val regex;
+};
+
+static void record_adapter_base_mark(struct record_adapter_base *rb)
+{
+ delegate_base_mark(&rb->db);
+ gc_mark(rb->regex);
+}
+
+static void record_adapter_mark_op(val stream)
+{
+ struct record_adapter_base *rb = coerce(struct record_adapter_base *,
+ stream->co.handle);
+ record_adapter_base_mark(rb);
+}
+
+static val record_adapter_get_line(val stream)
+{
+ struct record_adapter_base *rb = coerce(struct record_adapter_base *,
+ stream->co.handle);
+ return read_until_match(rb->regex, rb->db.target_stream);
+}
+
+static struct strm_ops record_adapter_ops =
+ strm_ops_init(cobj_ops_init(eq,
+ stream_print_op,
+ stream_destroy_op,
+ record_adapter_mark_op,
+ cobj_hash_op),
+ wli("record-adapter"),
+ delegate_put_string, delegate_put_char, delegate_put_byte,
+ record_adapter_get_line, delegate_get_char, delegate_get_byte,
+ delegate_unget_char, delegate_unget_byte,
+ delegate_close, delegate_flush, delegate_seek,
+ delegate_truncate, delegate_get_prop, delegate_set_prop,
+ delegate_get_error, delegate_get_error_str,
+ delegate_clear_error);
+
+val record_adapter(val regex, val stream)
+{
+ val rec_adapter = make_delegate_stream(default_arg(stream, std_input),
+ sizeof (struct record_adapter_base),
+ &record_adapter_ops.cobj_ops);
+ struct record_adapter_base *rb = coerce(struct record_adapter_base *,
+ rec_adapter->co.handle);
+
+ rb->regex = regex;
+ return rec_adapter;
+}
+
val streamp(val obj)
{
return typeof(obj) == stream_s ? t : nil;
@@ -3365,6 +3545,7 @@ void stream_init(void)
reg_fun(intern(lit("cat-streams"), user_package), func_n1(make_catenated_stream));
reg_fun(intern(lit("catenated-stream-p"), user_package), func_n1(catenated_stream_p));
reg_fun(intern(lit("catenated-stream-push"), user_package), func_n2(catenated_stream_push));
+ reg_fun(intern(lit("record-adapter"), user_package), func_n2o(record_adapter, 1));
reg_fun(intern(lit("open-directory"), user_package), func_n1(open_directory));
reg_fun(intern(lit("open-file"), user_package), func_n2o(open_file, 1));
reg_fun(intern(lit("open-fileno"), user_package), func_n2o(open_fileno, 1));
diff --git a/stream.h b/stream.h
index da45ea05..d6abfc21 100644
--- a/stream.h
+++ b/stream.h
@@ -111,6 +111,7 @@ val get_string_from_stream(val);
val make_strlist_output_stream(void);
val get_list_from_stream(val);
val make_dir_stream(DIR *);
+val record_adapter(val regex, val stream);
val streamp(val obj);
val real_time_stream_p(val obj);
val stream_set_prop(val stream, val ind, val prop);
diff --git a/txr.1 b/txr.1
index 6b870a89..7db1b98b 100644
--- a/txr.1
+++ b/txr.1
@@ -29825,6 +29825,49 @@ The double backslash in the string literal produces a single backslash
in the resulting string object that is processed by
.codn regex-parse .
+.coNP Function @ read-until-match
+.synb
+.mets (read-until-match < regex <> [ stream ])
+.syne
+.desc
+The
+.code read-until-match
+function reads characters from
+.metn stream ,
+accumulating them into a string, which is returned.
+
+If an argument is not specified for
+.metn stream ,
+then the
+.code *std-input*
+stream is used.
+
+The accumulation of characters is terminated by a match on
+.metn regex ,
+the end of the stream, or an error.
+
+This means that characters are read from the stream and accumulated while the
+stream has more characters available, and while its prefix does not match
+.metn regex .
+
+If
+.meta regex
+matches the stream before any characters are accumulated,
+then an empty string is returned.
+
+If the stream ends or an error occurs before any characters
+are accumulated, the function returns
+.codn nil .
+
+When the accumulation of characters terminates by a match on
+.metn regex ,
+reading characters from the stream continues. The longest
+possible prefix of the stream which matches
+.meta regex
+is read, and discarded. If an error is encountered, the
+matching procedure terminates and returns the previously
+accumulated string.
+
.SS* Hashing Library
.coNP Functions @, make-hash and @ hash
.synb
@@ -33746,6 +33789,52 @@ possibility that the expression will be further extended by means of the dot or
dotdot operators. An explicit end-of-input signal must be given from the
terminal to terminate the expression.
+.coNP Function @ record-adapter
+.synb
+.mets (record-adapter < regex <> [ stream ])
+.syne
+.desc
+The
+.code record-adapter
+function returns a new stream object which acts as an
+.I adapter
+to the existing
+.metn stream .
+
+If an argument is not specified for
+.metn stream ,
+then the
+.code *std-input*
+stream is used.
+
+With the exception of
+.metn get-line ,
+all operations on the returned adapter transparently delegate to the original
+.meta stream
+object.
+
+When the
+.code get-line
+function is used on the adapter, it behaves differently. A string is
+extracted from
+.metn stream ,
+and returned. However, the string isn't a line delimited by a newline
+character, but a record delimited by
+.metn regex
+as if using the
+.code read-until-match
+function.
+
+All behavior which is built on the
+.code get-lines
+functions is affected by the record-delimiting semantics of a record adapter's
+.code get-line
+implementation. Notably, the
+.code get-lines
+and
+.code lazy-stream-cons
+functions return a lazy list of delimited records rather than of lines.
+
.SS* Stream Output Indentation
\*(TL streams provide support for establishing hanging indentations
in text output. Each stream which supports output has a built-in state variable