diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2009-11-13 09:33:27 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2009-11-13 09:33:27 -0800 |
commit | 95e59dd555a038fd6eb70bc38e4e921d811b1f49 (patch) | |
tree | f3dc2ec7704ff24903618ed4ca5b09e5a1c2e5e8 /stream.c | |
parent | 673d5f3b84d276fb29233d6a3f485ccfe330be13 (diff) | |
download | txr-95e59dd555a038fd6eb70bc38e4e921d811b1f49.tar.gz txr-95e59dd555a038fd6eb70bc38e4e921d811b1f49.tar.bz2 txr-95e59dd555a038fd6eb70bc38e4e921d811b1f49.zip |
Previous commit broke UTF-8 lexing, by changing the get_char
semantics on the input stream to wide character input.
Also, reading a query the command line (-c) must
read bytes from a UTF-8 encoding of the string.
We introduce a new get_byte function which can extract bytes
from streams which provide it.
Diffstat (limited to 'stream.c')
-rw-r--r-- | stream.c | 72 |
1 files changed, 72 insertions, 0 deletions
@@ -54,6 +54,7 @@ struct strm_ops { obj_t *(*put_char)(obj_t *, wchar_t); obj_t *(*get_line)(obj_t *); obj_t *(*get_char)(obj_t *); + obj_t *(*get_byte)(obj_t *); obj_t *(*vcformat)(obj_t *, const wchar_t *fmt, va_list vl); obj_t *(*vformat)(obj_t *, const wchar_t *fmt, va_list vl); obj_t *(*close)(obj_t *, obj_t *); @@ -227,6 +228,16 @@ obj_t *stdio_get_char(obj_t *stream) return nil; } +obj_t *stdio_get_byte(obj_t *stream) +{ + struct stdio_handle *h = (struct stdio_handle *) stream->co.handle; + if (h->f) { + int ch = getc(h->f); + return (ch != EOF) ? num(ch) : stdio_maybe_read_error(stream); + } + return nil; +} + obj_t *stdio_vcformat(obj_t *stream, const wchar_t *fmt, va_list vl) { struct stdio_handle *h = (struct stdio_handle *) stream->co.handle; @@ -263,6 +274,7 @@ static struct strm_ops stdio_ops = { stdio_put_char, stdio_get_line, stdio_get_char, + stdio_get_byte, stdio_vcformat, common_vformat, stdio_close @@ -318,6 +330,7 @@ static struct strm_ops pipe_ops = { stdio_put_char, stdio_get_line, stdio_get_char, + stdio_get_byte, stdio_vcformat, common_vformat, pipe_close @@ -370,9 +383,41 @@ static struct strm_ops string_in_ops = { string_in_get_char, 0, 0, + 0, + 0 +}; + +struct byte_input { + unsigned char *buf; + size_t size; + size_t index; +}; + +static obj_t *byte_in_get_byte(obj_t *stream) +{ + struct byte_input *bi = (struct byte_input *) stream->co.handle; + + if (bi->index < bi->size) + return num(bi->buf[bi->index++]); + return nil; +} + +static struct strm_ops byte_in_ops = { + { common_equal, + cobj_print_op, + 0, + 0 }, + 0, + 0, + 0, + 0, + byte_in_get_byte, + 0, + 0, 0 }; + struct string_output { wchar_t *buf; size_t size; @@ -483,6 +528,7 @@ static struct strm_ops string_out_ops = { string_out_put_char, 0, 0, + 0, string_out_vcformat, common_vformat, 0, @@ -528,6 +574,7 @@ static struct strm_ops dir_ops = { 0, 0, 0, + 0, dir_close }; @@ -572,6 +619,20 @@ obj_t *make_string_input_stream(obj_t *string) return cobj((void *) cons(string, zero), stream_t, &string_in_ops.cobj_ops); } +obj_t *make_string_byte_input_stream(obj_t *string) +{ + type_assert (stringp(string), (L"~a is not a string", string)); + + { + struct byte_input *bi = (struct byte_input *) chk_malloc(sizeof *bi); + unsigned char *utf8 = utf8_dup_to(c_str(string)); + bi->buf = utf8; + bi->size = strlen((char *) utf8); + bi->index = 0; + return cobj(bi, stream_t, &byte_in_ops.cobj_ops); + } +} + obj_t *make_string_output_stream(void) { struct string_output *so = (struct string_output *) chk_malloc(sizeof *so); @@ -646,6 +707,17 @@ obj_t *get_char(obj_t *stream) } } +obj_t *get_byte(obj_t *stream) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, (L"~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->get_byte ? ops->get_byte(stream) : nil; + } +} + obj_t *vformat(obj_t *stream, const wchar_t *str, va_list vl) { type_check (stream, COBJ); |