diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2009-11-13 09:33:27 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2009-11-13 09:33:27 -0800 |
commit | 95e59dd555a038fd6eb70bc38e4e921d811b1f49 (patch) | |
tree | f3dc2ec7704ff24903618ed4ca5b09e5a1c2e5e8 | |
parent | 673d5f3b84d276fb29233d6a3f485ccfe330be13 (diff) | |
download | txr-95e59dd555a038fd6eb70bc38e4e921d811b1f49.tar.gz txr-95e59dd555a038fd6eb70bc38e4e921d811b1f49.tar.bz2 txr-95e59dd555a038fd6eb70bc38e4e921d811b1f49.zip |
Previous commit broke UTF-8 lexing, by changing the get_char
semantics on the input stream to wide character input.
Also, reading a query the command line (-c) must
read bytes from a UTF-8 encoding of the string.
We introduce a new get_byte function which can extract bytes
from streams which provide it.
-rw-r--r-- | ChangeLog | 25 | ||||
-rw-r--r-- | parser.l | 4 | ||||
-rw-r--r-- | stream.c | 72 | ||||
-rw-r--r-- | stream.h | 2 | ||||
-rw-r--r-- | txr.c | 2 |
5 files changed, 102 insertions, 3 deletions
@@ -1,3 +1,28 @@ +2009-11-13 Kaz Kylheku <kkylheku@gmail.com> + + Previous commit broke UTF-8 lexing, by changing the get_char + semantics on the input stream to wide character input. + Also, reading a query the command line (-c) must + read bytes from a UTF-8 encoding of the string. + We introduce a new get_byte function which can extract bytes + from streams which provide it. + + * parser.l (YYINPUT): Call get_byte instead of get_char. + + * stream.c (struct strm_ops): New function pointer, get_byte. + (stdio_get_byte): New function. + (stdio_ops, pipe_ops): Add new function. + (string_in_ops, string_out_ops, dir_ops): Null pointer added. + (struct byte_input): New struct type. + (byte_in_get_byte): New function. + (byte_in_ops): New structure. + (make_string_byte_input_stream, get_byte): New functions. + + * stream.h (make_string_byte_input_stream, get_byte): New functions. + + * txr.c (txr_main): Make a byte input stream from the command + line spec, rather than a string input stream. + 2009-11-12 Kaz Kylheku <kkylheku@gmail.com> Continuing wchar_t conversion. Making sure all stdio calls @@ -48,8 +48,8 @@ obj_t *c = nil; \ int n, ch = '*'; \ for (n = 0; n < max_size && \ - (c = get_char(yyin_stream)) && \ - (ch = c_chr(c)) != '\n'; ++n) \ + (c = get_byte(yyin_stream)) && \ + (ch = c_num(c)) != '\n'; ++n) \ buf[n] = (char) ch; \ if (ch == '\n') \ buf[n++] = (char) ch; \ @@ -54,6 +54,7 @@ struct strm_ops { obj_t *(*put_char)(obj_t *, wchar_t); obj_t *(*get_line)(obj_t *); obj_t *(*get_char)(obj_t *); + obj_t *(*get_byte)(obj_t *); obj_t *(*vcformat)(obj_t *, const wchar_t *fmt, va_list vl); obj_t *(*vformat)(obj_t *, const wchar_t *fmt, va_list vl); obj_t *(*close)(obj_t *, obj_t *); @@ -227,6 +228,16 @@ obj_t *stdio_get_char(obj_t *stream) return nil; } +obj_t *stdio_get_byte(obj_t *stream) +{ + struct stdio_handle *h = (struct stdio_handle *) stream->co.handle; + if (h->f) { + int ch = getc(h->f); + return (ch != EOF) ? num(ch) : stdio_maybe_read_error(stream); + } + return nil; +} + obj_t *stdio_vcformat(obj_t *stream, const wchar_t *fmt, va_list vl) { struct stdio_handle *h = (struct stdio_handle *) stream->co.handle; @@ -263,6 +274,7 @@ static struct strm_ops stdio_ops = { stdio_put_char, stdio_get_line, stdio_get_char, + stdio_get_byte, stdio_vcformat, common_vformat, stdio_close @@ -318,6 +330,7 @@ static struct strm_ops pipe_ops = { stdio_put_char, stdio_get_line, stdio_get_char, + stdio_get_byte, stdio_vcformat, common_vformat, pipe_close @@ -370,9 +383,41 @@ static struct strm_ops string_in_ops = { string_in_get_char, 0, 0, + 0, + 0 +}; + +struct byte_input { + unsigned char *buf; + size_t size; + size_t index; +}; + +static obj_t *byte_in_get_byte(obj_t *stream) +{ + struct byte_input *bi = (struct byte_input *) stream->co.handle; + + if (bi->index < bi->size) + return num(bi->buf[bi->index++]); + return nil; +} + +static struct strm_ops byte_in_ops = { + { common_equal, + cobj_print_op, + 0, + 0 }, + 0, + 0, + 0, + 0, + byte_in_get_byte, + 0, + 0, 0 }; + struct string_output { wchar_t *buf; size_t size; @@ -483,6 +528,7 @@ static struct strm_ops string_out_ops = { string_out_put_char, 0, 0, + 0, string_out_vcformat, common_vformat, 0, @@ -528,6 +574,7 @@ static struct strm_ops dir_ops = { 0, 0, 0, + 0, dir_close }; @@ -572,6 +619,20 @@ obj_t *make_string_input_stream(obj_t *string) return cobj((void *) cons(string, zero), stream_t, &string_in_ops.cobj_ops); } +obj_t *make_string_byte_input_stream(obj_t *string) +{ + type_assert (stringp(string), (L"~a is not a string", string)); + + { + struct byte_input *bi = (struct byte_input *) chk_malloc(sizeof *bi); + unsigned char *utf8 = utf8_dup_to(c_str(string)); + bi->buf = utf8; + bi->size = strlen((char *) utf8); + bi->index = 0; + return cobj(bi, stream_t, &byte_in_ops.cobj_ops); + } +} + obj_t *make_string_output_stream(void) { struct string_output *so = (struct string_output *) chk_malloc(sizeof *so); @@ -646,6 +707,17 @@ obj_t *get_char(obj_t *stream) } } +obj_t *get_byte(obj_t *stream) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, (L"~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->get_byte ? ops->get_byte(stream) : nil; + } +} + obj_t *vformat(obj_t *stream, const wchar_t *str, va_list vl) { type_check (stream, COBJ); @@ -29,12 +29,14 @@ extern obj_t *std_input, *std_output, *std_error; obj_t *make_stdio_stream(FILE *, obj_t *descr, obj_t *input, obj_t *output); obj_t *make_pipe_stream(FILE *, obj_t *descr, obj_t *input, obj_t *output); obj_t *make_string_input_stream(obj_t *); +obj_t *make_string_byte_input_stream(obj_t *); obj_t *make_string_output_stream(void); obj_t *get_string_from_stream(obj_t *); obj_t *make_dir_stream(DIR *); obj_t *close_stream(obj_t *stream, obj_t *throw_on_error); obj_t *get_line(obj_t *); obj_t *get_char(obj_t *); +obj_t *get_byte(obj_t *); obj_t *vformat(obj_t *stream, const wchar_t *string, va_list); obj_t *vcformat(obj_t *stream, const wchar_t *string, va_list); obj_t *format(obj_t *stream, const wchar_t *string, ...); @@ -307,7 +307,7 @@ static int txr_main(int argc, char **argv) if (specstring) { spec_file = L"cmdline"; spec_file_str = string(spec_file); - yyin_stream = make_string_input_stream(specstring); + yyin_stream = make_string_byte_input_stream(specstring); } else if (spec_file_str) { if (wcscmp(c_str(spec_file_str), L"-") != 0) { FILE *in = w_fopen(c_str(spec_file_str), L"r"); |