From 48c8cec1c83d70e23dc1359b85cfd9e36fdaa60f Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Mon, 13 Jan 2014 22:19:03 -0800 Subject: Support for pushing back bytes and characters into streams. * stream.c (null_ops, stdio_ops, tail_ops, pipe_ops, string_in_ops, byte_in_ops, string_out_ops, strlist_out_ops, dir_ops, cat_stream_ops): Structure definition updated with new initializers for two new virtuals. (stdio_handle): New member, unget_c. (snarf_line, stdio_get_char): Handle pushed-back character in h->unget_c. (stdio_unget_char, stdio_unget_byte, string_in_unget_char, byte_in_unget_byte): New static functions. (make_stdio_stream_common): Initialize unget_c member. (unget_char, unget_byte): New functions. * stream.h (struct strm_ops): New virtuals: unget_char and unget_byte. (unget_char, unget_byte): New functions declared. * syslog.c (syslog_strm_ops): Two new initializers. * eval.c (eval_init): Registered unget_char and unget_byte as intrinsics. * txr.1: Documented. --- ChangeLog | 26 +++++++++++++ eval.c | 2 + stream.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- stream.h | 4 ++ syslog.c | 2 + txr.1 | 23 +++++++++++ 6 files changed, 185 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index eea30d4f..f7a0f4ac 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,29 @@ +2014-01-13 Kaz Kylheku + + Support for pushing back bytes and characters into streams. + + * stream.c (null_ops, stdio_ops, tail_ops, pipe_ops, + string_in_ops, byte_in_ops, string_out_ops, strlist_out_ops, + dir_ops, cat_stream_ops): Structure definition updated + with new initializers for two new virtuals. + (stdio_handle): New member, unget_c. + (snarf_line, stdio_get_char): Handle pushed-back character in + h->unget_c. + (stdio_unget_char, stdio_unget_byte, string_in_unget_char, + byte_in_unget_byte): New static functions. + (make_stdio_stream_common): Initialize unget_c member. + (unget_char, unget_byte): New functions. + + * stream.h (struct strm_ops): New virtuals: unget_char and unget_byte. + (unget_char, unget_byte): New functions declared. + + * syslog.c (syslog_strm_ops): Two new initializers. + + * eval.c (eval_init): Registered unget_char and unget_byte as + intrinsics. + + * txr.1: Documented. + 2014-01-13 Kaz Kylheku * stream.c (put_byte): Bugfix: was checking whether the put_char diff --git a/eval.c b/eval.c index 03e8d34d..f9a68f95 100644 --- a/eval.c +++ b/eval.c @@ -2451,6 +2451,8 @@ void eval_init(void) reg_fun(intern(lit("put-line"), user_package), func_n2o(put_line, 1)); reg_fun(intern(lit("put-char"), user_package), func_n2o(put_char, 1)); reg_fun(intern(lit("put-byte"), user_package), func_n2o(put_byte, 1)); + reg_fun(intern(lit("unget-char"), user_package), func_n2o(unget_char, 1)); + reg_fun(intern(lit("unget-byte"), user_package), func_n2o(unget_byte, 1)); reg_fun(intern(lit("flush-stream"), user_package), func_n1(flush_stream)); reg_fun(intern(lit("seek-stream"), user_package), func_n3(seek_stream)); reg_fun(intern(lit("stat"), user_package), func_n1(statf)); diff --git a/stream.c b/stream.c index e7cf8b04..f51483a2 100644 --- a/stream.c +++ b/stream.c @@ -98,6 +98,8 @@ static struct strm_ops null_ops = { 0, /* get_line, */ 0, /* get_char, */ 0, /* get_byte, */ + 0, /* unget_char, */ + 0, /* unget_byte, */ 0, /* close, */ 0, /* flush, */ 0, /* seek, */ @@ -114,6 +116,7 @@ struct stdio_handle { FILE *f; val descr; val mode; /* used by tail */ + val unget_c; utf8_decoder_t ud; #if HAVE_FORK_STUFF pid_t pid; @@ -305,7 +308,14 @@ static wchar_t *snarf_line(struct stdio_handle *h) wchar_t *buf = 0; for (;;) { - wint_t ch = utf8_decode(&h->ud, stdio_get_char_callback, (mem_t *) h->f); + wint_t ch; + + if (h->unget_c) { + ch = c_chr(h->unget_c); + h->unget_c = nil; + } else { + ch = utf8_decode(&h->ud, stdio_get_char_callback, (mem_t *) h->f); + } if (ch == WEOF && buf == 0) break; @@ -345,6 +355,11 @@ static val stdio_get_line(val stream) static val stdio_get_char(val stream) { struct stdio_handle *h = (struct stdio_handle *) stream->co.handle; + val uc = h->unget_c; + if (uc) { + h->unget_c = nil; + return uc; + } if (h->f) { wint_t ch = utf8_decode(&h->ud, stdio_get_char_callback, (mem_t *) h->f); return (ch != WEOF) ? chr(ch) : stdio_maybe_read_error(stream); @@ -362,6 +377,29 @@ static val stdio_get_byte(val stream) return stdio_maybe_read_error(stream); } +static val stdio_unget_char(val stream, val ch) +{ + struct stdio_handle *h = (struct stdio_handle *) stream->co.handle; + + if (!is_chr(ch)) + type_mismatch(lit("unget-char: ~s is not a character"), ch, nao); + + if (h->unget_c) + uw_throwf(file_error_s, lit("unget-char overflow on ~a: "), stream, nao); + + h->unget_c = ch; + return ch; +} + +static val stdio_unget_byte(val stream, int byte) +{ + struct stdio_handle *h = (struct stdio_handle *) stream->co.handle; + + return h->f != 0 && ungetc(byte, (FILE *) h->f) != EOF + ? num_fast(byte) + : stdio_maybe_error(stream, lit("pushing back byte into")); +} + static val stdio_close(val stream, val throw_on_error) { struct stdio_handle *h = (struct stdio_handle *) stream->co.handle; @@ -390,6 +428,8 @@ static struct strm_ops stdio_ops = { stdio_get_line, stdio_get_char, stdio_get_byte, + stdio_unget_char, + stdio_unget_byte, stdio_close, stdio_flush, stdio_seek, @@ -509,6 +549,8 @@ static struct strm_ops tail_ops = { tail_get_line, tail_get_char, tail_get_byte, + stdio_unget_char, + stdio_unget_byte, stdio_close, stdio_flush, stdio_seek, @@ -597,6 +639,8 @@ static struct strm_ops pipe_ops = { stdio_get_line, stdio_get_char, stdio_get_byte, + stdio_unget_char, + stdio_unget_byte, pipe_close, stdio_flush, 0, /* seek: not on pipes */ @@ -655,6 +699,27 @@ static val string_in_get_char(val stream) return nil; } +static val string_in_unget_char(val stream, val ch) +{ + val pair = (val) stream->co.handle; + val string = car(pair); + val pos = cdr(pair); + + if (pos == zero) + uw_throwf(file_error_s, + lit("unget-char: cannot push past beginning of string"), nao); + + pos = minus(pos, one); + + if (chr_str(string, pos) != ch) + uw_throwf(file_error_s, + lit("unget-char: ~s doesn't match the character that was read"), + nao); + + set(*cdr_l(pair), plus(pos, one)); + return ch; +} + static val string_in_get_prop(val stream, val ind) { if (ind == name_k) { @@ -676,6 +741,8 @@ static struct strm_ops string_in_ops = { string_in_get_line, string_in_get_char, 0, /* get_byte */ + string_in_unget_char, + 0, /* unget_byte, */ 0, /* close */ 0, /* flush */ 0, /* TODO: seek */ @@ -710,6 +777,19 @@ static val byte_in_get_byte(val stream) return nil; } +static val byte_in_unget_byte(val stream, int byte) +{ + struct byte_input *bi = (struct byte_input *) stream->co.handle; + + if (bi->index == 0) + uw_throwf(file_error_s, + lit("unget-char: cannot push past beginning of byte stream"), + nao); + + bi->buf[--bi->index] = byte; + return num_fast(byte); +} + static struct strm_ops byte_in_ops = { { cobj_equal_op, cobj_print_op, @@ -722,6 +802,8 @@ static struct strm_ops byte_in_ops = { 0, /* get_line */ 0, /* get_char */ byte_in_get_byte, + 0, /* unget_char, */ + byte_in_unget_byte, 0, /* close */ 0, /* flush */ 0, /* TODO: support seek */ @@ -848,6 +930,8 @@ static struct strm_ops string_out_ops = { 0, /* get_line */ 0, /* get_char */ 0, /* get_byte */ + 0, /* unget_char, */ + 0, /* unget_byte, */ 0, /* close */ 0, /* flush */ 0, /* TODO: seek, with fill-with-spaces semantics if past end. */ @@ -919,6 +1003,8 @@ static struct strm_ops strlist_out_ops = { 0, /* get_line */ 0, /* get_char */ 0, /* get_byte */ + 0, /* unget_char, */ + 0, /* unget_byte, */ 0, /* close */ 0, /* flush */ 0, /* seek */ @@ -991,6 +1077,8 @@ static struct strm_ops dir_ops = { dir_get_line, 0, /* get_char */ 0, /* get_byte */ + 0, /* unget_char, */ + 0, /* unget_byte, */ dir_close, 0, /* flush */ 0, /* seek */ @@ -1005,6 +1093,7 @@ static val make_stdio_stream_common(FILE *f, val descr, struct cobj_ops *ops) h->f = f; h->descr = descr; h->mode = nil; + h->unget_c = nil; utf8_decoder_init(&h->ud); h->pid = 0; #if HAVE_ISATTY @@ -1205,6 +1294,42 @@ val get_byte(val stream) } } +val unget_char(val ch, val stream) +{ + if (!stream) + stream = std_input; + + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_s, (lit("~a is not a stream"), + stream, nao)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->unget_char ? ops->unget_char(stream, ch) : nil; + } +} + +val unget_byte(val byte, val stream) +{ + cnum b = c_num(byte); + + if (!stream) + stream = std_input; + + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_s, (lit("~a is not a stream"), + stream, nao)); + + if (b < 0 || b > 255) + uw_throwf(file_error_s, lit("unget-byte on ~a: byte value ~a out of range"), + stream, byte, nao); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->unget_byte ? ops->unget_byte(stream, b) : nil; + } +} + struct fmt { size_t minsize; const char *dec; @@ -2120,6 +2245,8 @@ static struct strm_ops cat_stream_ops = { cat_get_line, cat_get_char, cat_get_byte, + 0, /* unget_char, */ + 0, /* unget_byte, */ 0, /* close, */ 0, /* flush, */ 0, /* seek, */ diff --git a/stream.h b/stream.h index e2ea714d..6a76d6b8 100644 --- a/stream.h +++ b/stream.h @@ -38,6 +38,8 @@ struct strm_ops { val (*get_line)(val); val (*get_char)(val); val (*get_byte)(val); + val (*unget_char)(val, val); + val (*unget_byte)(val, int); val (*close)(val, val); val (*flush)(val); val (*seek)(val, cnum, enum strm_whence); @@ -78,6 +80,8 @@ val close_stream(val stream, val throw_on_error); val get_line(val); val get_char(val); val get_byte(val); +val unget_char(val ch, val stream); +val unget_byte(val byte, val stream); val vformat(val stream, val string, va_list); val vformat_to_string(val string, va_list); val format(val stream, val string, ...); diff --git a/syslog.c b/syslog.c index 39581d34..f544c110 100644 --- a/syslog.c +++ b/syslog.c @@ -219,6 +219,8 @@ static struct strm_ops syslog_strm_ops = { 0, /* get_line */ 0, /* get_char */ 0, /* get_byte */ + 0, /* unget_char */ + 0, /* unget_byte */ 0, /* close */ 0, /* flush */ 0, /* seek */ diff --git a/txr.1 b/txr.1 index 8f87e428..a6760076 100644 --- a/txr.1 +++ b/txr.1 @@ -10465,6 +10465,29 @@ the two operations will interfere with the UTF-8 decoding. These functions return nil when the end of data is reached. Errors are represented as exceptions. +.SS Functions unget-char and unget-byte + +.TP +Syntax: + + (unget-char []) + (unget-byte []) + +.TP +Description: + +These character put back, into a stream, a character or byte which was +previously read. The character or byte must match the one which was most +recently read. If the parameter is omitted, then the *stdin* +stream is used. + +If the operation succeeds, the byte or character value is returned. +A nil return indicates that the operation is unsupported. + +Some streams do not support these operations; some support +only one of them. In general, if a stream supports get-char, +it supports unget-char, and likewise for get-byte and unget-byte. + .SS Functions put-string, put-line, put-char and put-byte .TP -- cgit v1.2.3