diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2016-01-01 12:23:00 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2016-01-01 12:23:00 -0800 |
commit | 27e0a161c083222ef78bcf6192b931aa815583b3 (patch) | |
tree | ebb045f0417307f3c7763ce900cbb4b50781e581 | |
parent | bccdfcb523d7b0315c20a41dec43a6b0cf302a73 (diff) | |
download | txr-27e0a161c083222ef78bcf6192b931aa815583b3.tar.gz txr-27e0a161c083222ef78bcf6192b931aa815583b3.tar.bz2 txr-27e0a161c083222ef78bcf6192b931aa815583b3.zip |
Record-delimiting stream adapter.
* regex.c (read_until_match): New function.
(regex_init): Registered read-until-match intrinsic.
* regex.h (read_until_match): Declared.
* stream.c (struct delegate_base): New struct type.
(delegate_base_mark, delegate_put_string, delegate_put_char,
delegate_put_byte, delegate_get_char, delegate_get_byte,
delegate_unget_char, delegate_unget_byte, delegate_close,
delegate_flush, delegate_seek, delegate_truncate,
delegate_get_prop, delegate_set_prop, delegate_get_error,
delegate_get_error_str, delegate_clear_error,
make_delegate_stream): New static functions.
(struct record_adapter_base): New struct type.
(record_adapter_base_mark, record_adapter_mark_op,
record_adapter_get_line): New static functions.
(record_adapter_ops): New static structure.
(record_adapter): New function.
(stream_init): Registered record-adapter intrinsic.
* stream.h (record_adapter): Declared.
* txr.1: Documented read-until-match and record-adapter.
-rw-r--r-- | regex.c | 41 | ||||
-rw-r--r-- | regex.h | 1 | ||||
-rw-r--r-- | stream.c | 181 | ||||
-rw-r--r-- | stream.h | 1 | ||||
-rw-r--r-- | txr.1 | 89 |
5 files changed, 313 insertions, 0 deletions
@@ -2502,6 +2502,46 @@ val match_regst_right(val str, val regex, val end) sub_str(str, minus(end, len), end))); } +val read_until_match(val regex, val stream) +{ + regex_machine_t regm; + val out = nil; + val ch; + + stream = default_arg(stream, std_input); + + regex_machine_init(®m, regex); + + if ((ch = get_char(stream))) { + if (regex_machine_feed(®m, c_chr(ch)) == REGM_FAIL) { + out = mkstring(one, ch); + + while (ch && (ch = get_char(stream))) { + regex_machine_reset(®m); + + if (regex_machine_feed(®m, c_chr(ch)) == REGM_FAIL) { + string_extend(out, ch); + continue; + } + + break; + } + } else { + out = mkstring(zero, ch); + } + } + + while (ch && (ch = get_char(stream))) { + if (regex_machine_feed(®m, c_chr(ch)) == REGM_FAIL) { + unget_char(ch, stream); + break; + } + } + + regex_machine_cleanup(®m); + return out; +} + static char_set_t *create_wide_cs(void) { #ifdef FULL_UNICODE @@ -2583,5 +2623,6 @@ void regex_init(void) reg_fun(intern(lit("reg-expand-nongreedy"), system_package), func_n1(reg_expand_nongreedy)); reg_fun(intern(lit("reg-optimize"), system_package), func_n1(reg_optimize)); + reg_fun(intern(lit("read-until-match"), user_package), func_n2o(read_until_match, 1)); init_special_char_sets(); } @@ -39,5 +39,6 @@ val search_regst(val haystack, val needle_regex, val start_num, val from_end); val match_regst(val str, val regex, val pos); val match_regst_right(val str, val regex, val end); val regsub(val regex, val repl, val str); +val read_until_match(val regex, val stream); int wide_display_char_p(wchar_t ch); void regex_init(void); @@ -1913,6 +1913,186 @@ val catenated_stream_push(val new_stream, val cat_stream) } } +struct delegate_base { + struct strm_base a; + val target_stream; + struct strm_ops *target_ops; +}; + +static void delegate_base_mark(struct delegate_base *db) +{ + strm_base_mark(&db->a); + gc_mark(db->target_stream); +} + +static val delegate_put_string(val stream, val str) +{ + struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle); + return s->target_ops->put_string(s->target_stream, str); +} + +static val delegate_put_char(val stream, val ch) +{ + struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle); + return s->target_ops->put_char(s->target_stream, ch); +} + +static val delegate_put_byte(val stream, int byte) +{ + struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle); + return s->target_ops->put_byte(s->target_stream, byte); +} + +static val delegate_get_char(val stream) +{ + struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle); + return s->target_ops->get_char(s->target_stream); +} + +static val delegate_get_byte(val stream) +{ + struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle); + return s->target_ops->get_byte(s->target_stream); +} + +static val delegate_unget_char(val stream, val ch) +{ + struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle); + return s->target_ops->unget_char(s->target_stream, ch); +} + +static val delegate_unget_byte(val stream, int byte) +{ + struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle); + return s->target_ops->unget_byte(s->target_stream, byte); +} + +static val delegate_close(val stream, val throw_on_error) +{ + struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle); + return s->target_ops->close(s->target_stream, throw_on_error); +} + +static val delegate_flush(val stream) +{ + struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle); + return s->target_ops->flush(s->target_stream); +} + +static val delegate_seek(val stream, val off, enum strm_whence whence) +{ + struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle); + return s->target_ops->seek(s->target_stream, off, whence); +} + +static val delegate_truncate(val stream, val len) +{ + struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle); + return s->target_ops->truncate(s->target_stream, len); +} + +static val delegate_get_prop(val stream, val ind) +{ + struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle); + return s->target_ops->get_prop(s->target_stream, ind); +} + +static val delegate_set_prop(val stream, val ind, val value) +{ + struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle); + return s->target_ops->set_prop(s->target_stream, ind, value); +} + +static val delegate_get_error(val stream) +{ + struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle); + return s->target_ops->get_error(s->target_stream); +} + +static val delegate_get_error_str(val stream) +{ + struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle); + return s->target_ops->get_error_str(s->target_stream); +} + +static val delegate_clear_error(val stream) +{ + struct delegate_base *s = coerce(struct delegate_base *, stream->co.handle); + return s->target_ops->clear_error(s->target_stream); +} + +static val make_delegate_stream(val orig_stream, size_t handle_size, + struct cobj_ops *ops) +{ + struct strm_ops *orig_ops = coerce(struct strm_ops *, + cobj_ops(orig_stream, stream_s)); + struct delegate_base *db = coerce(struct delegate_base *, + chk_calloc(1, handle_size)); + val delegate_stream; + + strm_base_init(&db->a); + db->target_stream = nil; + db->target_ops = orig_ops; + + delegate_stream = cobj(coerce(mem_t *, db), stream_s, ops); + + db->target_stream = orig_stream; + + return delegate_stream; +} + +struct record_adapter_base { + struct delegate_base db; + val regex; +}; + +static void record_adapter_base_mark(struct record_adapter_base *rb) +{ + delegate_base_mark(&rb->db); + gc_mark(rb->regex); +} + +static void record_adapter_mark_op(val stream) +{ + struct record_adapter_base *rb = coerce(struct record_adapter_base *, + stream->co.handle); + record_adapter_base_mark(rb); +} + +static val record_adapter_get_line(val stream) +{ + struct record_adapter_base *rb = coerce(struct record_adapter_base *, + stream->co.handle); + return read_until_match(rb->regex, rb->db.target_stream); +} + +static struct strm_ops record_adapter_ops = + strm_ops_init(cobj_ops_init(eq, + stream_print_op, + stream_destroy_op, + record_adapter_mark_op, + cobj_hash_op), + wli("record-adapter"), + delegate_put_string, delegate_put_char, delegate_put_byte, + record_adapter_get_line, delegate_get_char, delegate_get_byte, + delegate_unget_char, delegate_unget_byte, + delegate_close, delegate_flush, delegate_seek, + delegate_truncate, delegate_get_prop, delegate_set_prop, + delegate_get_error, delegate_get_error_str, + delegate_clear_error); + +val record_adapter(val regex, val stream) +{ + val rec_adapter = make_delegate_stream(default_arg(stream, std_input), + sizeof (struct record_adapter_base), + &record_adapter_ops.cobj_ops); + struct record_adapter_base *rb = coerce(struct record_adapter_base *, + rec_adapter->co.handle); + + rb->regex = regex; + return rec_adapter; +} + val streamp(val obj) { return typeof(obj) == stream_s ? t : nil; @@ -3365,6 +3545,7 @@ void stream_init(void) reg_fun(intern(lit("cat-streams"), user_package), func_n1(make_catenated_stream)); reg_fun(intern(lit("catenated-stream-p"), user_package), func_n1(catenated_stream_p)); reg_fun(intern(lit("catenated-stream-push"), user_package), func_n2(catenated_stream_push)); + reg_fun(intern(lit("record-adapter"), user_package), func_n2o(record_adapter, 1)); reg_fun(intern(lit("open-directory"), user_package), func_n1(open_directory)); reg_fun(intern(lit("open-file"), user_package), func_n2o(open_file, 1)); reg_fun(intern(lit("open-fileno"), user_package), func_n2o(open_fileno, 1)); @@ -111,6 +111,7 @@ val get_string_from_stream(val); val make_strlist_output_stream(void); val get_list_from_stream(val); val make_dir_stream(DIR *); +val record_adapter(val regex, val stream); val streamp(val obj); val real_time_stream_p(val obj); val stream_set_prop(val stream, val ind, val prop); @@ -29825,6 +29825,49 @@ The double backslash in the string literal produces a single backslash in the resulting string object that is processed by .codn regex-parse . +.coNP Function @ read-until-match +.synb +.mets (read-until-match < regex <> [ stream ]) +.syne +.desc +The +.code read-until-match +function reads characters from +.metn stream , +accumulating them into a string, which is returned. + +If an argument is not specified for +.metn stream , +then the +.code *std-input* +stream is used. + +The accumulation of characters is terminated by a match on +.metn regex , +the end of the stream, or an error. + +This means that characters are read from the stream and accumulated while the +stream has more characters available, and while its prefix does not match +.metn regex . + +If +.meta regex +matches the stream before any characters are accumulated, +then an empty string is returned. + +If the stream ends or an error occurs before any characters +are accumulated, the function returns +.codn nil . + +When the accumulation of characters terminates by a match on +.metn regex , +reading characters from the stream continues. The longest +possible prefix of the stream which matches +.meta regex +is read, and discarded. If an error is encountered, the +matching procedure terminates and returns the previously +accumulated string. + .SS* Hashing Library .coNP Functions @, make-hash and @ hash .synb @@ -33746,6 +33789,52 @@ possibility that the expression will be further extended by means of the dot or dotdot operators. An explicit end-of-input signal must be given from the terminal to terminate the expression. +.coNP Function @ record-adapter +.synb +.mets (record-adapter < regex <> [ stream ]) +.syne +.desc +The +.code record-adapter +function returns a new stream object which acts as an +.I adapter +to the existing +.metn stream . + +If an argument is not specified for +.metn stream , +then the +.code *std-input* +stream is used. + +With the exception of +.metn get-line , +all operations on the returned adapter transparently delegate to the original +.meta stream +object. + +When the +.code get-line +function is used on the adapter, it behaves differently. A string is +extracted from +.metn stream , +and returned. However, the string isn't a line delimited by a newline +character, but a record delimited by +.metn regex +as if using the +.code read-until-match +function. + +All behavior which is built on the +.code get-lines +functions is affected by the record-delimiting semantics of a record adapter's +.code get-line +implementation. Notably, the +.code get-lines +and +.code lazy-stream-cons +functions return a lazy list of delimited records rather than of lines. + .SS* Stream Output Indentation \*(TL streams provide support for establishing hanging indentations in text output. Each stream which supports output has a built-in state variable |