summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2009-11-13 09:33:27 -0800
committerKaz Kylheku <kaz@kylheku.com>2009-11-13 09:33:27 -0800
commit95e59dd555a038fd6eb70bc38e4e921d811b1f49 (patch)
treef3dc2ec7704ff24903618ed4ca5b09e5a1c2e5e8
parent673d5f3b84d276fb29233d6a3f485ccfe330be13 (diff)
downloadtxr-95e59dd555a038fd6eb70bc38e4e921d811b1f49.tar.gz
txr-95e59dd555a038fd6eb70bc38e4e921d811b1f49.tar.bz2
txr-95e59dd555a038fd6eb70bc38e4e921d811b1f49.zip
Previous commit broke UTF-8 lexing, by changing the get_char
semantics on the input stream to wide character input. Also, reading a query the command line (-c) must read bytes from a UTF-8 encoding of the string. We introduce a new get_byte function which can extract bytes from streams which provide it.
-rw-r--r--ChangeLog25
-rw-r--r--parser.l4
-rw-r--r--stream.c72
-rw-r--r--stream.h2
-rw-r--r--txr.c2
5 files changed, 102 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index ec042774..2d31eba4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,28 @@
+2009-11-13 Kaz Kylheku <kkylheku@gmail.com>
+
+ Previous commit broke UTF-8 lexing, by changing the get_char
+ semantics on the input stream to wide character input.
+ Also, reading a query the command line (-c) must
+ read bytes from a UTF-8 encoding of the string.
+ We introduce a new get_byte function which can extract bytes
+ from streams which provide it.
+
+ * parser.l (YYINPUT): Call get_byte instead of get_char.
+
+ * stream.c (struct strm_ops): New function pointer, get_byte.
+ (stdio_get_byte): New function.
+ (stdio_ops, pipe_ops): Add new function.
+ (string_in_ops, string_out_ops, dir_ops): Null pointer added.
+ (struct byte_input): New struct type.
+ (byte_in_get_byte): New function.
+ (byte_in_ops): New structure.
+ (make_string_byte_input_stream, get_byte): New functions.
+
+ * stream.h (make_string_byte_input_stream, get_byte): New functions.
+
+ * txr.c (txr_main): Make a byte input stream from the command
+ line spec, rather than a string input stream.
+
2009-11-12 Kaz Kylheku <kkylheku@gmail.com>
Continuing wchar_t conversion. Making sure all stdio calls
diff --git a/parser.l b/parser.l
index 801185f0..39392d7f 100644
--- a/parser.l
+++ b/parser.l
@@ -48,8 +48,8 @@
obj_t *c = nil; \
int n, ch = '*'; \
for (n = 0; n < max_size && \
- (c = get_char(yyin_stream)) && \
- (ch = c_chr(c)) != '\n'; ++n) \
+ (c = get_byte(yyin_stream)) && \
+ (ch = c_num(c)) != '\n'; ++n) \
buf[n] = (char) ch; \
if (ch == '\n') \
buf[n++] = (char) ch; \
diff --git a/stream.c b/stream.c
index 832bbf80..e2618a03 100644
--- a/stream.c
+++ b/stream.c
@@ -54,6 +54,7 @@ struct strm_ops {
obj_t *(*put_char)(obj_t *, wchar_t);
obj_t *(*get_line)(obj_t *);
obj_t *(*get_char)(obj_t *);
+ obj_t *(*get_byte)(obj_t *);
obj_t *(*vcformat)(obj_t *, const wchar_t *fmt, va_list vl);
obj_t *(*vformat)(obj_t *, const wchar_t *fmt, va_list vl);
obj_t *(*close)(obj_t *, obj_t *);
@@ -227,6 +228,16 @@ obj_t *stdio_get_char(obj_t *stream)
return nil;
}
+obj_t *stdio_get_byte(obj_t *stream)
+{
+ struct stdio_handle *h = (struct stdio_handle *) stream->co.handle;
+ if (h->f) {
+ int ch = getc(h->f);
+ return (ch != EOF) ? num(ch) : stdio_maybe_read_error(stream);
+ }
+ return nil;
+}
+
obj_t *stdio_vcformat(obj_t *stream, const wchar_t *fmt, va_list vl)
{
struct stdio_handle *h = (struct stdio_handle *) stream->co.handle;
@@ -263,6 +274,7 @@ static struct strm_ops stdio_ops = {
stdio_put_char,
stdio_get_line,
stdio_get_char,
+ stdio_get_byte,
stdio_vcformat,
common_vformat,
stdio_close
@@ -318,6 +330,7 @@ static struct strm_ops pipe_ops = {
stdio_put_char,
stdio_get_line,
stdio_get_char,
+ stdio_get_byte,
stdio_vcformat,
common_vformat,
pipe_close
@@ -370,9 +383,41 @@ static struct strm_ops string_in_ops = {
string_in_get_char,
0,
0,
+ 0,
+ 0
+};
+
+struct byte_input {
+ unsigned char *buf;
+ size_t size;
+ size_t index;
+};
+
+static obj_t *byte_in_get_byte(obj_t *stream)
+{
+ struct byte_input *bi = (struct byte_input *) stream->co.handle;
+
+ if (bi->index < bi->size)
+ return num(bi->buf[bi->index++]);
+ return nil;
+}
+
+static struct strm_ops byte_in_ops = {
+ { common_equal,
+ cobj_print_op,
+ 0,
+ 0 },
+ 0,
+ 0,
+ 0,
+ 0,
+ byte_in_get_byte,
+ 0,
+ 0,
0
};
+
struct string_output {
wchar_t *buf;
size_t size;
@@ -483,6 +528,7 @@ static struct strm_ops string_out_ops = {
string_out_put_char,
0,
0,
+ 0,
string_out_vcformat,
common_vformat,
0,
@@ -528,6 +574,7 @@ static struct strm_ops dir_ops = {
0,
0,
0,
+ 0,
dir_close
};
@@ -572,6 +619,20 @@ obj_t *make_string_input_stream(obj_t *string)
return cobj((void *) cons(string, zero), stream_t, &string_in_ops.cobj_ops);
}
+obj_t *make_string_byte_input_stream(obj_t *string)
+{
+ type_assert (stringp(string), (L"~a is not a string", string));
+
+ {
+ struct byte_input *bi = (struct byte_input *) chk_malloc(sizeof *bi);
+ unsigned char *utf8 = utf8_dup_to(c_str(string));
+ bi->buf = utf8;
+ bi->size = strlen((char *) utf8);
+ bi->index = 0;
+ return cobj(bi, stream_t, &byte_in_ops.cobj_ops);
+ }
+}
+
obj_t *make_string_output_stream(void)
{
struct string_output *so = (struct string_output *) chk_malloc(sizeof *so);
@@ -646,6 +707,17 @@ obj_t *get_char(obj_t *stream)
}
}
+obj_t *get_byte(obj_t *stream)
+{
+ type_check (stream, COBJ);
+ type_assert (stream->co.cls == stream_t, (L"~a is not a stream", stream));
+
+ {
+ struct strm_ops *ops = (struct strm_ops *) stream->co.ops;
+ return ops->get_byte ? ops->get_byte(stream) : nil;
+ }
+}
+
obj_t *vformat(obj_t *stream, const wchar_t *str, va_list vl)
{
type_check (stream, COBJ);
diff --git a/stream.h b/stream.h
index 893aabfe..b4354168 100644
--- a/stream.h
+++ b/stream.h
@@ -29,12 +29,14 @@ extern obj_t *std_input, *std_output, *std_error;
obj_t *make_stdio_stream(FILE *, obj_t *descr, obj_t *input, obj_t *output);
obj_t *make_pipe_stream(FILE *, obj_t *descr, obj_t *input, obj_t *output);
obj_t *make_string_input_stream(obj_t *);
+obj_t *make_string_byte_input_stream(obj_t *);
obj_t *make_string_output_stream(void);
obj_t *get_string_from_stream(obj_t *);
obj_t *make_dir_stream(DIR *);
obj_t *close_stream(obj_t *stream, obj_t *throw_on_error);
obj_t *get_line(obj_t *);
obj_t *get_char(obj_t *);
+obj_t *get_byte(obj_t *);
obj_t *vformat(obj_t *stream, const wchar_t *string, va_list);
obj_t *vcformat(obj_t *stream, const wchar_t *string, va_list);
obj_t *format(obj_t *stream, const wchar_t *string, ...);
diff --git a/txr.c b/txr.c
index 9cdb03d5..58bfffb3 100644
--- a/txr.c
+++ b/txr.c
@@ -307,7 +307,7 @@ static int txr_main(int argc, char **argv)
if (specstring) {
spec_file = L"cmdline";
spec_file_str = string(spec_file);
- yyin_stream = make_string_input_stream(specstring);
+ yyin_stream = make_string_byte_input_stream(specstring);
} else if (spec_file_str) {
if (wcscmp(c_str(spec_file_str), L"-") != 0) {
FILE *in = w_fopen(c_str(spec_file_str), L"r");