Previous commit broke UTF-8 lexing, by changing the get_char

semantics on the input stream to wide character input. Also, reading a query the command line (-c) must read bytes from a UTF-8 encoding of the string. We introduce a new get_byte function which can extract bytes from streams which provide it.
author: Kaz Kylheku <kaz@kylheku.com> 2009-11-13 09:33:27 -0800
committer: Kaz Kylheku <kaz@kylheku.com> 2009-11-13 09:33:27 -0800
commit: 95e59dd555a038fd6eb70bc38e4e921d811b1f49 (patch)
tree: f3dc2ec7704ff24903618ed4ca5b09e5a1c2e5e8 /stream.c
parent: 673d5f3b84d276fb29233d6a3f485ccfe330be13 (diff)
download: txr-95e59dd555a038fd6eb70bc38e4e921d811b1f49.tar.gz
txr-95e59dd555a038fd6eb70bc38e4e921d811b1f49.tar.bz2
txr-95e59dd555a038fd6eb70bc38e4e921d811b1f49.zip
1 files changed, 72 insertions, 0 deletions
diff --git a/stream.c b/stream.c
index 832bbf80..e2618a03 100644
--- a/stream.c
+++ b/stream.c
@@ -54,6 +54,7 @@ struct strm_ops {
   obj_t *(*put_char)(obj_t *, wchar_t);
   obj_t *(*get_line)(obj_t *);
   obj_t *(*get_char)(obj_t *);
+  obj_t *(*get_byte)(obj_t *);
   obj_t *(*vcformat)(obj_t *, const wchar_t *fmt, va_list vl);
   obj_t *(*vformat)(obj_t *, const wchar_t *fmt, va_list vl);
   obj_t *(*close)(obj_t *, obj_t *);
@@ -227,6 +228,16 @@ obj_t *stdio_get_char(obj_t *stream)
   return nil;
 }
 
+obj_t *stdio_get_byte(obj_t *stream)
+{
+  struct stdio_handle *h = (struct stdio_handle *) stream->co.handle;
+  if (h->f) {
+    int ch = getc(h->f);
+    return (ch != EOF) ? num(ch) : stdio_maybe_read_error(stream);
+  }
+  return nil;
+}
+
 obj_t *stdio_vcformat(obj_t *stream, const wchar_t *fmt, va_list vl)
 {
   struct stdio_handle *h = (struct stdio_handle *) stream->co.handle;
@@ -263,6 +274,7 @@ static struct strm_ops stdio_ops = {
   stdio_put_char,
   stdio_get_line,
   stdio_get_char,
+  stdio_get_byte,
   stdio_vcformat,
   common_vformat,
   stdio_close
@@ -318,6 +330,7 @@ static struct strm_ops pipe_ops = {
   stdio_put_char,
   stdio_get_line,
   stdio_get_char,
+  stdio_get_byte,
   stdio_vcformat,
   common_vformat,
   pipe_close
@@ -370,9 +383,41 @@ static struct strm_ops string_in_ops = {
   string_in_get_char,
   0,
   0,
+  0,
+  0
+};
+
+struct byte_input {
+  unsigned char *buf;
+  size_t size;
+  size_t index;
+};
+
+static obj_t *byte_in_get_byte(obj_t *stream)
+{
+  struct byte_input *bi = (struct byte_input *) stream->co.handle;
+  
+  if (bi->index < bi->size)
+    return num(bi->buf[bi->index++]);
+  return nil;
+}
+
+static struct strm_ops byte_in_ops = {
+  { common_equal,
+    cobj_print_op,
+    0,
+    0 },
+  0,
+  0,
+  0,
+  0,
+  byte_in_get_byte,
+  0,
+  0,
   0
 };
 
+
 struct string_output {
   wchar_t *buf;
   size_t size;
@@ -483,6 +528,7 @@ static struct strm_ops string_out_ops = {
   string_out_put_char,
   0,
   0,
+  0,
   string_out_vcformat,
   common_vformat,
   0,
@@ -528,6 +574,7 @@ static struct strm_ops dir_ops = {
   0,
   0,
   0,
+  0,
   dir_close
 };
 
@@ -572,6 +619,20 @@ obj_t *make_string_input_stream(obj_t *string)
   return cobj((void *) cons(string, zero), stream_t, &string_in_ops.cobj_ops);
 }
 
+obj_t *make_string_byte_input_stream(obj_t *string)
+{
+  type_assert (stringp(string), (L"~a is not a string", string));
+
+  {
+    struct byte_input *bi = (struct byte_input *) chk_malloc(sizeof *bi);
+    unsigned char *utf8 = utf8_dup_to(c_str(string));
+    bi->buf = utf8;
+    bi->size = strlen((char *) utf8);
+    bi->index = 0;
+    return cobj(bi, stream_t, &byte_in_ops.cobj_ops);
+  }
+}
+
 obj_t *make_string_output_stream(void)
 {
   struct string_output *so = (struct string_output *) chk_malloc(sizeof *so);
@@ -646,6 +707,17 @@ obj_t *get_char(obj_t *stream)
   }
 }
 
+obj_t *get_byte(obj_t *stream)
+{
+  type_check (stream, COBJ);
+  type_assert (stream->co.cls == stream_t, (L"~a is not a stream", stream));
+
+  {
+    struct strm_ops *ops = (struct strm_ops *) stream->co.ops;
+    return ops->get_byte ? ops->get_byte(stream) : nil;
+  }
+}
+
 obj_t *vformat(obj_t *stream, const wchar_t *str, va_list vl)
 {
   type_check (stream, COBJ);
author	Kaz Kylheku <kaz@kylheku.com>	2009-11-13 09:33:27 -0800
committer	Kaz Kylheku <kaz@kylheku.com>	2009-11-13 09:33:27 -0800
commit	95e59dd555a038fd6eb70bc38e4e921d811b1f49 (patch)
tree	f3dc2ec7704ff24903618ed4ca5b09e5a1c2e5e8 /stream.c
parent	673d5f3b84d276fb29233d6a3f485ccfe330be13 (diff)
download	txr-95e59dd555a038fd6eb70bc38e4e921d811b1f49.tar.gz txr-95e59dd555a038fd6eb70bc38e4e921d811b1f49.tar.bz2 txr-95e59dd555a038fd6eb70bc38e4e921d811b1f49.zip