summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2009-11-11 08:54:21 -0800
committerKaz Kylheku <kaz@kylheku.com>2009-11-11 08:54:21 -0800
commitd59d8950ec58702821ec618b92dfb2490ae0bf31 (patch)
treee27e2914d563171ad56c2f7ae30c7c49343df06c
parent2f62f352f603b837a5cf032c257531052530c410 (diff)
downloadtxr-d59d8950ec58702821ec618b92dfb2490ae0bf31.tar.gz
txr-d59d8950ec58702821ec618b92dfb2490ae0bf31.tar.bz2
txr-d59d8950ec58702821ec618b92dfb2490ae0bf31.zip
Big conversion to wide characters and UTF-8 support.
This is incomplete. There are too many dependencies on wide character support from the C stream I/O library, and implicit use of some encoding which may not be UTF-8. The regex code does not handle wide characters properly. Character type is still int in some places, rather than wchar_t. Test suite passes though.
-rw-r--r--ChangeLog68
-rw-r--r--Makefile2
-rw-r--r--gc.c2
-rw-r--r--hash.c2
-rw-r--r--lib.c328
-rw-r--r--lib.h17
-rw-r--r--match.c233
-rw-r--r--parser.h2
-rw-r--r--parser.l84
-rw-r--r--parser.y5
-rw-r--r--regex.c6
-rw-r--r--regex.h2
-rw-r--r--stream.c128
-rw-r--r--stream.h6
-rw-r--r--txr.c67
-rw-r--r--txr.h4
-rw-r--r--unwind.c20
-rw-r--r--unwind.h6
-rw-r--r--utf8.c168
-rw-r--r--utf8.h32
20 files changed, 750 insertions, 432 deletions
diff --git a/ChangeLog b/ChangeLog
index 2799b9b2..dcbf23e0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,71 @@
+2009-11-11 Kaz Kylheku <kkylheku@gmail.com>
+
+ Big conversion to wide characters and UTF-8 support.
+ This is incomplete. There are too many dependencies on
+ wide character support from the C stream I/O library.
+ The regex code does not handle wide characters properly.
+ Character type is still int in some places, rather than wchar_t.
+ Test suite passes though.
+
+ * hash.c (hash_str): Converted to wchar_t.
+
+ * lib.c (progname, type_check, type_check2, type_check3,
+ car, cdr, car_l, cdr_l, equal, chk_strdup, string_own,
+ string, mkstring, mkustring, init_str, length_str,
+ c_str, search_str, sub_str, cat_str, split_str, trim_str,
+ chrp, apply, lazy_str, lazy_str_get_trailing_list,
+ cobj, obj_init, obj_print, obj_pprint, init): Converted to wchar_t.
+ (vector): Cast of chk_malloc return value added.
+ (string_utf8): New function.
+
+ * lib.h (struct string): Member str changed to wchar_t *.
+ (progname, chk_strdup, string_own, string, init_str,
+ c_str, init): Declarations updated.
+ (string_utf8): Declared.
+
+ * match.c (debugf, debuglf, sem_error, file_err, dump_shell_string,
+ dump_var, dump_bindings, dest_bind, match_line, do_output_line,
+ do_output, match_files): Converted to wchar_t.
+
+ * parser.h (spec_file): Declaration updated.
+
+ * parser.l (yy_errorf, char_esc, num_esc): Converted to wchar_t.
+ (ASC, ASCN, U, U2, U3, U4, UANY, UNANN, UONLY): New named
+ regexes, used for lexing utf-8.
+ (grammar): Converted to wchar_t and utf-8 handling.
+
+ * parser.y (%union/yystype): lexeme member changed to wchar_t *,
+ chr member changed to wchar_t.
+
+ * regex.c (nfa_run): Input string is wchar_t *.
+ (search_regex): String from haystack is wchar_t *.
+
+ * regex.h (nfa_run): Declaration updated.
+
+ * stream.c (struct strm_ops, common_vformat, stdio_stream_print,
+ stdio_maybe_read_error, stdio_maybe_write_error, stdio_put_string,
+ stdio_put_char, snarf_line, stdio_get_line, stdio_close, pipe_close,
+ struct string_output, string_out_put_string, string_out_put_char,
+ string_out_vcformat, dir_get_line, make_string_output_stream,
+ get_string_from-stream, make_dir_stream, get_line, get_char,
+ vformat, vcformat, format, cformat, put_string, put_cstring,
+ put_char, put_cchar, stream_init): Converted to wchar_t.
+
+ * stream.h (vformat, format, put_cstring): Declarations updated.
+
+ * txr.c (version, progname, spec_file, oom_realloc_handler,
+ help, hint, remove_hash_bang_line, main, txr_main): Converted
+ to wchar_t.
+
+ * txr.h (version, progname): Declarations updated.
+
+ * unwind.c (uw_throw, uw_throwf, uw_errorf, type_mismatch,
+ uw_register_subtype): Converted to wchar_t.
+
+ * unwind.h (uw_throwf, uw_errorf, type_mismatch): Declarations updated.
+
+ * utf8.c, utf8.h: New files.
+
2009-11-10 Kaz Kylheku <kkylheku@gmail.com>
hash.c (hash_grow): Rewritten to avoid resizing the vector
diff --git a/Makefile b/Makefile
index 766cdd0f..a3389518 100644
--- a/Makefile
+++ b/Makefile
@@ -31,7 +31,7 @@
CFLAGS := -I$(top_srcdir) $(LANG_FLAGS) $(DIAG_FLAGS) $(OPT_FLAGS) $(DBG_FLAGS)
OBJS := txr.o lex.yy.o y.tab.o match.o lib.o regex.o gc.o unwind.o stream.o
-OBJS += hash.o
+OBJS += hash.o utf8.o
PROG := ./txr
diff --git a/gc.c b/gc.c
index 4d19fedf..40291021 100644
--- a/gc.c
+++ b/gc.c
@@ -321,7 +321,7 @@ static void sweep(void)
continue;
if (0 && dbg) {
- fprintf(stderr, "%s: finalizing: ", progname);
+ fprintf(stderr, "%ls: finalizing: ", progname);
obj_print(block, std_error);
putc('\n', stderr);
}
diff --git a/hash.c b/hash.c
index 8acf7a5e..86ceb340 100644
--- a/hash.c
+++ b/hash.c
@@ -64,7 +64,7 @@ static struct hash *reachable_weak_hashes;
* We don't reduce the final result modulo a small prime, but leave it
* as it is; let the hashing routines do their own reduction.
*/
-static long hash_c_str(const char *str)
+static long hash_c_str(const wchar_t *str)
{
unsigned long h = 0;
while (*str) {
diff --git a/lib.c b/lib.c
index 51a1cdd2..110a94d1 100644
--- a/lib.c
+++ b/lib.c
@@ -27,16 +27,18 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <ctype.h>
+#include <wctype.h>
#include <assert.h>
#include <limits.h>
#include <stdarg.h>
#include <dirent.h>
#include <setjmp.h>
+#include <wchar.h>
#include "lib.h"
#include "gc.h"
#include "unwind.h"
#include "stream.h"
+#include "utf8.h"
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min(a, b) ((a) < (b) ? (a) : (b))
@@ -63,7 +65,7 @@ obj_t *null_list;
obj_t *identity_f;
obj_t *equal_f;
-const char *progname;
+const wchar_t *progname;
obj_t *prog_string;
void *(*oom_realloc)(void *, size_t);
@@ -125,14 +127,14 @@ obj_t *typeof(obj_t *obj)
obj_t *type_check(obj_t *obj, int type)
{
if (!is_ptr(obj) || obj->t.type != type)
- type_mismatch("~s is not of type ~s", obj, code2type(type), nao);
+ type_mismatch(L"~s is not of type ~s", obj, code2type(type), nao);
return t;
}
obj_t *type_check2(obj_t *obj, int t1, int t2)
{
if (!is_ptr(obj) || (obj->t.type != t1 && obj->t.type != t2))
- type_mismatch("~s is not of type ~s or ~s", obj,
+ type_mismatch(L"~s is not of type ~s or ~s", obj,
code2type(t1), code2type(t2), nao);
return t;
}
@@ -141,7 +143,7 @@ obj_t *type_check3(obj_t *obj, int t1, int t2, int t3)
{
if (!is_ptr(obj) || (obj->t.type != t1 && obj->t.type != t2
&& obj->t.type != t3))
- type_mismatch("~s is not of type ~s, ~s nor ~s", obj,
+ type_mismatch(L"~s is not of type ~s, ~s nor ~s", obj,
code2type(t1), code2type(t2), code2type(t3), nao);
return t;
}
@@ -162,7 +164,7 @@ obj_t *car(obj_t *cons)
return cons->lc.car;
}
default:
- type_mismatch("~s is not a cons", cons, nao);
+ type_mismatch(L"~s is not a cons", cons, nao);
}
}
@@ -182,7 +184,7 @@ obj_t *cdr(obj_t *cons)
return cons->lc.cdr;
}
default:
- type_mismatch("~s is not a cons", cons, nao);
+ type_mismatch(L"~s is not a cons", cons, nao);
}
}
@@ -195,7 +197,7 @@ obj_t **car_l(obj_t *cons)
funcall1(cons->lc.func, cons);
return &cons->lc.car;
default:
- type_mismatch("~s is not a cons", cons, nao);
+ type_mismatch(L"~s is not a cons", cons, nao);
}
}
@@ -208,7 +210,7 @@ obj_t **cdr_l(obj_t *cons)
funcall1(cons->lc.func, cons);
return &cons->lc.cdr;
default:
- type_mismatch("~s is not a cons", cons, nao);
+ type_mismatch(L"~s is not a cons", cons, nao);
}
}
@@ -429,7 +431,7 @@ obj_t *equal(obj_t *left, obj_t *right)
return nil;
case STR:
if (type(right) == STR)
- return strcmp(left->st.str, right->st.str) == 0 ? t : nil;
+ return wcscmp(left->st.str, right->st.str) == 0 ? t : nil;
if (type(right) == LSTR) {
lazy_str_force(right);
return equal(left, right->ls.prefix);
@@ -508,11 +510,11 @@ void *chk_realloc(void *old, size_t size)
return newptr;
}
-void *chk_strdup(const char *str)
+void *chk_strdup(const wchar_t *str)
{
- size_t size = strlen(str) + 1;
- char *copy = chk_malloc(size);
- memcpy(copy, str, size);
+ size_t nchar = wcslen(str) + 1;
+ wchar_t *copy = (wchar_t *) chk_malloc(nchar * sizeof *copy);
+ wmemcpy(copy, str, nchar);
return copy;
}
@@ -603,7 +605,7 @@ obj_t *num(long val)
long c_num(obj_t *num)
{
if (!is_num(num))
- type_mismatch("~s is not a number", num, nao);
+ type_mismatch(L"~s is not a number", num, nao);
return ((long) num) >> TAG_SHIFT;
}
@@ -681,7 +683,7 @@ obj_t *min2(obj_t *anum, obj_t *bnum)
return c_num(anum) < c_num(bnum) ? anum : bnum;
}
-obj_t *string_own(char *str)
+obj_t *string_own(wchar_t *str)
{
obj_t *obj = make_obj();
obj->st.type = STR;
@@ -690,7 +692,7 @@ obj_t *string_own(char *str)
return obj;
}
-obj_t *string(const char *str)
+obj_t *string(const wchar_t *str)
{
obj_t *obj = make_obj();
obj->st.type = STR;
@@ -699,27 +701,36 @@ obj_t *string(const char *str)
return obj;
}
+obj_t *string_utf8(const unsigned char *str)
+{
+ obj_t *obj = make_obj();
+ obj->st.type = STR;
+ obj->st.str = utf8_dup_from(str);
+ obj->st.len = nil;
+ return obj;
+}
+
obj_t *mkstring(obj_t *len, obj_t *ch)
{
- char *str = chk_malloc(c_num(len) + 1);
+ size_t nchar = c_num(len) + 1;
+ wchar_t *str = (wchar_t *) chk_malloc(nchar * sizeof *str);
obj_t *s = string_own(str);
- memset(str, c_chr(ch), c_num(len));
- str[c_num(len)] = 0;
+ wmemset(str, c_chr(ch), nchar);
s->st.len = len;
return s;
}
obj_t *mkustring(obj_t *len)
{
- char *str = chk_malloc(c_num(len) + 1);
+ wchar_t *str = (wchar_t *) chk_malloc((c_num(len) + 1) * sizeof *str);
obj_t *s = string_own(str);
s->st.len = len;
return s;
}
-obj_t *init_str(obj_t *str, const char *data)
+obj_t *init_str(obj_t *str, const wchar_t *data)
{
- memcpy(str->st.str, data, c_num(str->st.len));
+ wmemcpy(str->st.str, data, c_num(str->st.len) + 1);
return str;
}
@@ -753,11 +764,11 @@ obj_t *length_str(obj_t *str)
}
if (!str->st.len)
- str->st.len = num(strlen(str->st.str));
+ str->st.len = num(wcslen(str->st.str));
return str->st.len;
}
-const char *c_str(obj_t *obj)
+const wchar_t *c_str(obj_t *obj)
{
type_check3(obj, STR, SYM, LSTR);
@@ -783,14 +794,14 @@ obj_t *search_str(obj_t *haystack, obj_t *needle, obj_t *start_num,
obj_t *h_is_lazy = lazy_stringp(haystack);
long start = c_num(start_num);
long good = -1, pos = -1;
- const char *n = c_str(needle), *h;
+ const wchar_t *n = c_str(needle), *h;
if (!h_is_lazy) {
do {
- const char *f;
+ const wchar_t *f;
h = c_str(haystack);
- f = strstr(h + start, n);
+ f = wcsstr(h + start, n);
if (f)
pos = f - h;
@@ -804,7 +815,7 @@ obj_t *search_str(obj_t *haystack, obj_t *needle, obj_t *start_num,
lazy_str_force_upto(haystack, plus(num(start + 1), length_str(needle)));
h = c_str(haystack->ls.prefix);
- if (!strncmp(h + start, n, ln))
+ if (!wcsncmp(h + start, n, ln))
good = start;
} while (h[++start] && (from_end || good == -1));
}
@@ -847,11 +858,12 @@ obj_t *sub_str(obj_t *str_in, obj_t *from, obj_t *to)
if (ge(from, to)) {
return null_string;
} else {
- size_t size = c_num(to) - c_num(from) + 1;
- char *sub = chk_malloc(size);
- const char *str = c_str(lazy_stringp(str_in) ? str_in->ls.prefix : str_in);
- strncpy(sub, str + c_num(from), size);
- sub[size-1] = 0;
+ size_t nchar = c_num(to) - c_num(from) + 1;
+ wchar_t *sub = (wchar_t *) chk_malloc(nchar * sizeof (wchar_t));
+ const wchar_t *str = c_str(lazy_stringp(str_in)
+ ? str_in->ls.prefix : str_in);
+ wcsncpy(sub, str + c_num(from), nchar);
+ sub[nchar-1] = 0;
return string_own(sub);
}
}
@@ -860,7 +872,7 @@ obj_t *cat_str(obj_t *list, obj_t *sep)
{
long total = 0;
obj_t *iter;
- char *str, *ptr;
+ wchar_t *str, *ptr;
long len_sep = sep ? c_num(length_str(sep)) : 0;
for (iter = list; iter != nil; iter = cdr(iter)) {
@@ -882,7 +894,7 @@ obj_t *cat_str(obj_t *list, obj_t *sep)
return nil;
}
- str = chk_malloc(total + 1);
+ str = (wchar_t *) chk_malloc((total + 1) * sizeof *str);
for (ptr = str, iter = list; iter != nil; iter = cdr(iter)) {
obj_t *item = car(iter);
@@ -891,14 +903,14 @@ obj_t *cat_str(obj_t *list, obj_t *sep)
continue;
if (stringp(item)) {
len = c_num(length_str(item));
- memcpy(ptr, c_str(item), len);
+ wmemcpy(ptr, c_str(item), len);
ptr += len;
} else {
*ptr++ = c_chr(item);
}
if (len_sep && cdr(iter)) {
- memcpy(ptr, c_str(sep), len_sep);
+ wmemcpy(ptr, c_str(sep), len_sep);
ptr += len_sep;
}
}
@@ -909,12 +921,12 @@ obj_t *cat_str(obj_t *list, obj_t *sep)
obj_t *split_str(obj_t *str, obj_t *sep)
{
- const char *cstr = c_str(str);
- const char *csep = c_str(sep);
+ const wchar_t *cstr = c_str(str);
+ const wchar_t *csep = c_str(sep);
list_collect_decl (out, iter);
for (;;) {
- size_t span = strcspn(cstr, csep);
+ size_t span = wcscspn(cstr, csep);
obj_t *piece = mkustring(num(span));
init_str(piece, cstr);
list_collect (iter, piece);
@@ -929,21 +941,21 @@ obj_t *split_str(obj_t *str, obj_t *sep)
obj_t *trim_str(obj_t *str)
{
- const char *start = c_str(str);
- const char *end = start + c_num(length_str(str));
+ const wchar_t *start = c_str(str);
+ const wchar_t *end = start + c_num(length_str(str));
- while (start[0] && isspace(start[0]))
+ while (start[0] && iswspace(start[0]))
start++;
- while (end > start && isspace(end[-1]))
+ while (end > start && iswspace(end[-1]))
end--;
if (end == start) {
return null_string;
} else {
size_t len = end - start;
- char *new = chk_malloc(len + 1);
- memcpy(new, start, len);
+ wchar_t *new = (wchar_t *) chk_malloc((len + 1) * sizeof *new);
+ wmemcpy(new, start, len);
new[len] = 0;
return string_own(new);
}
@@ -951,7 +963,7 @@ obj_t *trim_str(obj_t *str)
obj_t *string_lt(obj_t *astr, obj_t *bstr)
{
- int cmp = strcmp(c_str(astr), c_str(bstr));
+ int cmp = wcscmp(c_str(astr), c_str(bstr));
return cmp == -1 ? t : nil;
}
@@ -969,7 +981,7 @@ obj_t *chrp(obj_t *chr)
int c_chr(obj_t *chr)
{
if (!is_chr(chr))
- type_mismatch("~s is not a character", chr, nao);
+ type_mismatch(L"~s is not a character", chr, nao);
return ((int) chr) >> TAG_SHIFT;
}
@@ -1141,7 +1153,7 @@ obj_t *apply(obj_t *fun, obj_t *arglist)
type_check (fun, FUN);
type_assert (listp(arglist),
- ("apply arglist ~s is not a list", arglist, nao));
+ (L"apply arglist ~s is not a list", arglist, nao));
*p++ = car(arglist); arglist = cdr(arglist);
*p++ = car(arglist); arglist = cdr(arglist);
@@ -1267,7 +1279,7 @@ obj_t *vector(obj_t *alloc)
{
long alloc_plus = c_num(alloc) + 2;
obj_t *vec = make_obj();
- obj_t **v = chk_malloc(alloc_plus * sizeof *v);
+ obj_t **v = (obj_t **) chk_malloc(alloc_plus * sizeof *v);
vec->v.type = VEC;
vec->v.vec = v + 2;
v[0] = alloc;
@@ -1374,7 +1386,7 @@ obj_t *lazy_str(obj_t *lst, obj_t *term, obj_t *limit)
obj_t *obj = make_obj();
obj->ls.type = LSTR;
- term = or2(term, string("\n"));
+ term = or2(term, string(L"\n"));
if (nullp(lst)) {
obj->ls.prefix = null_string;
@@ -1501,7 +1513,7 @@ obj_t *lazy_str_get_trailing_list(obj_t *lstr, obj_t *index)
{
obj_t *split_suffix = split_str(sub_str(lstr->ls.prefix, index, nil),
- or2(car(lstr->ls.opts), string("\n")));
+ or2(car(lstr->ls.opts), string(L"\n")));
return nappend2(split_suffix, lstr->ls.list);
}
@@ -1519,7 +1531,7 @@ obj_t *cobj(void *handle, obj_t *cls_sym, struct cobj_ops *ops)
void cobj_print_op(obj_t *obj, obj_t *out)
{
- put_cstring(out, "#<");
+ put_cstring(out, L"#<");
obj_print(obj->co.cls, out);
cformat(out, ": %p>", obj->co.handle);
}
@@ -1707,77 +1719,77 @@ static void obj_init(void)
&identity_f, &prog_string,
(obj_t **) 0);
- nil_string = string("nil");
-
- null = intern(string("null"));
- t = intern(string("t"));
- cons_t = intern(string("cons"));
- str_t = intern(string("str"));
- chr_t = intern(string("chr"));
- num_t = intern(string("num"));
- sym_t = intern(string("sym"));
- fun_t = intern(string("fun"));
- vec_t = intern(string("vec"));
- stream_t = intern(string("stream"));
- hash_t = intern(string("hash"));
- lcons_t = intern(string("lcons"));
- lstr_t = intern(string("lstr"));
- cobj_t = intern(string("cobj"));
- var = intern(string("$var"));
- regex = intern(string("$regex"));
- set = intern(string("set"));
- cset = intern(string("cset"));
- wild = intern(string("wild"));
- oneplus = intern(string("1+"));
- zeroplus = intern(string("0+"));
- optional = intern(string("?"));
- compound = intern(string("compound"));
- or = intern(string("or"));
- quasi = intern(string("$quasi"));
- skip = intern(string("skip"));
- trailer = intern(string("trailer"));
- block = intern(string("block"));
- next = intern(string("next"));
- freeform = intern(string("freeform"));
- fail = intern(string("fail"));
- accept = intern(string("accept"));
- all = intern(string("all"));
- some = intern(string("some"));
- none = intern(string("none"));
- maybe = intern(string("maybe"));
- cases = intern(string("cases"));
- collect = intern(string("collect"));
- until = intern(string("until"));
- coll = intern(string("coll"));
- define = intern(string("define"));
- output = intern(string("output"));
- single = intern(string("single"));
- frst = intern(string("first"));
- lst = intern(string("last"));
- empty = intern(string("empty"));
- repeat = intern(string("repeat"));
- rep = intern(string("rep"));
- flattn = intern(string("flatten"));
- forget = intern(string("forget"));
- local = intern(string("local"));
- mrge = intern(string("merge"));
- bind = intern(string("bind"));
- cat = intern(string("cat"));
- args = intern(string("args"));
- try = intern(string("try"));
- catch = intern(string("catch"));
- finally = intern(string("finally"));
- nothrow = intern(string("nothrow"));
- throw = intern(string("throw"));
- defex = intern(string("defex"));
- error = intern(string("error"));
- type_error = intern(string("type_error"));
- internal_err = intern(string("internal_error"));
- numeric_err = intern(string("numeric_error"));
- range_err = intern(string("range_error"));
- query_error = intern(string("query_error"));
- file_error = intern(string("file_error"));
- process_error = intern(string("process_error"));
+ nil_string = string(L"nil");
+
+ null = intern(string(L"null"));
+ t = intern(string(L"t"));
+ cons_t = intern(string(L"cons"));
+ str_t = intern(string(L"str"));
+ chr_t = intern(string(L"chr"));
+ num_t = intern(string(L"num"));
+ sym_t = intern(string(L"sym"));
+ fun_t = intern(string(L"fun"));
+ vec_t = intern(string(L"vec"));
+ stream_t = intern(string(L"stream"));
+ hash_t = intern(string(L"hash"));
+ lcons_t = intern(string(L"lcons"));
+ lstr_t = intern(string(L"lstr"));
+ cobj_t = intern(string(L"cobj"));
+ var = intern(string(L"$var"));
+ regex = intern(string(L"$regex"));
+ set = intern(string(L"set"));
+ cset = intern(string(L"cset"));
+ wild = intern(string(L"wild"));
+ oneplus = intern(string(L"1+"));
+ zeroplus = intern(string(L"0+"));
+ optional = intern(string(L"?"));
+ compound = intern(string(L"compound"));
+ or = intern(string(L"or"));
+ quasi = intern(string(L"$quasi"));
+ skip = intern(string(L"skip"));
+ trailer = intern(string(L"trailer"));
+ block = intern(string(L"block"));
+ next = intern(string(L"next"));
+ freeform = intern(string(L"freeform"));
+ fail = intern(string(L"fail"));
+ accept = intern(string(L"accept"));
+ all = intern(string(L"all"));
+ some = intern(string(L"some"));
+ none = intern(string(L"none"));
+ maybe = intern(string(L"maybe"));
+ cases = intern(string(L"cases"));
+ collect = intern(string(L"collect"));
+ until = intern(string(L"until"));
+ coll = intern(string(L"coll"));
+ define = intern(string(L"define"));
+ output = intern(string(L"output"));
+ single = intern(string(L"single"));
+ frst = intern(string(L"first"));
+ lst = intern(string(L"last"));
+ empty = intern(string(L"empty"));
+ repeat = intern(string(L"repeat"));
+ rep = intern(string(L"rep"));
+ flattn = intern(string(L"flatten"));
+ forget = intern(string(L"forget"));
+ local = intern(string(L"local"));
+ mrge = intern(string(L"merge"));
+ bind = intern(string(L"bind"));
+ cat = intern(string(L"cat"));
+ args = intern(string(L"args"));
+ try = intern(string(L"try"));
+ catch = intern(string(L"catch"));
+ finally = intern(string(L"finally"));
+ nothrow = intern(string(L"nothrow"));
+ throw = intern(string(L"throw"));
+ defex = intern(string(L"defex"));
+ error = intern(string(L"error"));
+ type_error = intern(string(L"type_error"));
+ internal_err = intern(string(L"internal_error"));
+ numeric_err = intern(string(L"numeric_error"));
+ range_err = intern(string(L"range_error"));
+ query_error = intern(string(L"query_error"));
+ file_error = intern(string(L"file_error"));
+ process_error = intern(string(L"process_error"));
interned_syms = cons(nil, interned_syms);
@@ -1788,7 +1800,7 @@ static void obj_init(void)
maxint = num(NUM_MAX);
minint = num(NUM_MIN);
- null_string = string("");
+ null_string = string(L"");
null_list = cons(nil, nil);
@@ -1800,7 +1812,7 @@ static void obj_init(void)
void obj_print(obj_t *obj, obj_t *out)
{
if (obj == nil) {
- put_cstring(out, "nil");
+ put_cstring(out, L"nil");
return;
}
@@ -1817,7 +1829,7 @@ void obj_print(obj_t *obj, obj_t *out)
} else if (consp(cdr(iter))) {
put_cchar(out, ' ');
} else {
- put_cstring(out, " . ");
+ put_cstring(out, L" . ");
obj_print(cdr(iter), out);
put_cchar(out, ')');
}
@@ -1826,22 +1838,22 @@ void obj_print(obj_t *obj, obj_t *out)
return;
case STR:
{
- const char *ptr;
+ const wchar_t *ptr;
put_cchar(out, '"');
for (ptr = obj->st.str; *ptr; ptr++) {
switch (*ptr) {
- case '\a': put_cstring(out, "\\a"); break;
- case '\b': put_cstring(out, "\\b"); break;
- case '\t': put_cstring(out, "\\t"); break;
- case '\n': put_cstring(out, "\\n"); break;
- case '\v': put_cstring(out, "\\v"); break;
- case '\f': put_cstring(out, "\\f"); break;
- case '\r': put_cstring(out, "\\r"); break;
- case '"': put_cstring(out, "\\\""); break;
- case '\\': put_cstring(out, "\\\\"); break;
- case 27: put_cstring(out, "\\e"); break;
+ case L'\a': put_cstring(out, L"\\a"); break;
+ case L'\b': put_cstring(out, L"\\b"); break;
+ case L'\t': put_cstring(out, L"\\t"); break;
+ case L'\n': put_cstring(out, L"\\n"); break;
+ case L'\v': put_cstring(out, L"\\v"); break;
+ case L'\f': put_cstring(out, L"\\f"); break;
+ case L'\r': put_cstring(out, L"\\r"); break;
+ case L'"': put_cstring(out, L"\\\""); break;
+ case L'\\': put_cstring(out, L"\\\\"); break;
+ case 27: put_cstring(out, L"\\e"); break;
default:
- if (isprint(*ptr))
+ if (iswprint(*ptr))
put_cchar(out, *ptr);
else
cformat(out, "\\%03o", (int) *ptr);
@@ -1856,18 +1868,18 @@ void obj_print(obj_t *obj, obj_t *out)
put_cchar(out, '\'');
switch (ch) {
- case '\a': put_cstring(out, "\\a"); break;
- case '\b': put_cstring(out, "\\b"); break;
- case '\t': put_cstring(out, "\\t"); break;
- case '\n': put_cstring(out, "\\n"); break;
- case '\v': put_cstring(out, "\\v"); break;
- case '\f': put_cstring(out, "\\f"); break;
- case '\r': put_cstring(out, "\\r"); break;
- case '"': put_cstring(out, "\\\""); break;
- case '\\': put_cstring(out, "\\\\"); break;
- case 27: put_cstring(out, "\\e"); break;
+ case L'\a': put_cstring(out, L"\\a"); break;
+ case L'\b': put_cstring(out, L"\\b"); break;
+ case L'\t': put_cstring(out, L"\\t"); break;
+ case L'\n': put_cstring(out, L"\\n"); break;
+ case L'\v': put_cstring(out, L"\\v"); break;
+ case L'\f': put_cstring(out, L"\\f"); break;
+ case L'\r': put_cstring(out, L"\\r"); break;
+ case L'"': put_cstring(out, L"\\\""); break;
+ case L'\\': put_cstring(out, L"\\\\"); break;
+ case 27: put_cstring(out, L"\\e"); break;
default:
- if (isprint(ch))
+ if (iswprint(ch))
put_cchar(out, ch);
else
cformat(out, "\\%03o", ch);
@@ -1887,7 +1899,7 @@ void obj_print(obj_t *obj, obj_t *out)
case VEC:
{
long i, fill = c_num(obj->v.vec[vec_fill]);
- put_cstring(out, "#(");
+ put_cstring(out, L"#(");
for (i = 0; i < fill; i++) {
obj_print(obj->v.vec[i], out);
if (i < fill - 1)
@@ -1898,7 +1910,7 @@ void obj_print(obj_t *obj, obj_t *out)
return;
case LSTR:
obj_print(obj->ls.prefix, out);
- put_cstring(out, "#<... lazy string>");
+ put_cstring(out, L"#<... lazy string>");
return;
case COBJ:
obj->co.ops->print(obj, out);
@@ -1911,7 +1923,7 @@ void obj_print(obj_t *obj, obj_t *out)
void obj_pprint(obj_t *obj, obj_t *out)
{
if (obj == nil) {
- put_cstring(out, "nil");
+ put_cstring(out, L"nil");
return;
}
@@ -1928,7 +1940,7 @@ void obj_pprint(obj_t *obj, obj_t *out)
} else if (consp(cdr(iter))) {
put_cchar(out, ' ');
} else {
- put_cstring(out, " . ");
+ put_cstring(out, L" . ");
obj_pprint(cdr(iter), out);
put_cchar(out, ')');
}
@@ -1953,7 +1965,7 @@ void obj_pprint(obj_t *obj, obj_t *out)
case VEC:
{
long i, fill = c_num(obj->v.vec[vec_fill]);
- put_cstring(out, "#(");
+ put_cstring(out, L"#(");
for (i = 0; i < fill; i++) {
obj_pprint(obj->v.vec[i], out);
if (i < fill - 1)
@@ -1964,7 +1976,7 @@ void obj_pprint(obj_t *obj, obj_t *out)
return;
case LSTR:
obj_pprint(obj->ls.prefix, out);
- put_cstring(out, "...");
+ put_cstring(out, L"...");
return;
case COBJ:
obj->co.ops->print(obj, out);
@@ -1974,7 +1986,7 @@ void obj_pprint(obj_t *obj, obj_t *out)
cformat(out, "#<garbage: %p>", (void *) obj);
}
-void init(const char *pn, void *(*oom)(void *, size_t),
+void init(const wchar_t *pn, void *(*oom)(void *, size_t),
obj_t **stack_bottom)
{
progname = pn;
diff --git a/lib.h b/lib.h
index b993d39e..5fb84018 100644
--- a/lib.h
+++ b/lib.h
@@ -64,7 +64,7 @@ struct cons {
struct string {
type_t type;
- char *str;
+ wchar_t *str;
obj_t *len;
};
@@ -176,7 +176,7 @@ extern obj_t *null_list; /* (nil) */
extern obj_t *identity_f;
extern obj_t *equal_f;
-extern const char *progname;
+extern const wchar_t *progname;
extern obj_t *prog_string;
extern void *(*oom_realloc)(void *, size_t);
@@ -216,7 +216,7 @@ obj_t *nump(obj_t *num);
obj_t *equal(obj_t *left, obj_t *right);
void *chk_malloc(size_t size);
void *chk_realloc(void*, size_t size);
-void *chk_strdup(const char *str);
+void *chk_strdup(const wchar_t *str);
obj_t *cons(obj_t *car, obj_t *cdr);
obj_t *list(obj_t *first, ...); /* terminated by nao */
obj_t *consp(obj_t *obj);
@@ -238,16 +238,17 @@ obj_t *le(obj_t *anum, obj_t *bnum);
obj_t *numeq(obj_t *anum, obj_t *bnum);
obj_t *max2(obj_t *anum, obj_t *bnum);
obj_t *min2(obj_t *anum, obj_t *bnum);
-obj_t *string_own(char *str);
-obj_t *string(const char *str);
+obj_t *string_own(wchar_t *str);
+obj_t *string(const wchar_t *str);
+obj_t *string_utf8(const unsigned char *str);
obj_t *mkstring(obj_t *len, obj_t *ch);
obj_t *mkustring(obj_t *len); /* must initialize immediately with init_str! */
-obj_t *init_str(obj_t *str, const char *);
+obj_t *init_str(obj_t *str, const wchar_t *);
obj_t *copy_str(obj_t *str);
obj_t *stringp(obj_t *str);
obj_t *lazy_stringp(obj_t *str);
obj_t *length_str(obj_t *str);
-const char *c_str(obj_t *str);
+const wchar_t *c_str(obj_t *str);
obj_t *search_str(obj_t *haystack, obj_t *needle, obj_t *start_num,
obj_t *from_end);
obj_t *search_str_tree(obj_t *haystack, obj_t *tree, obj_t *start_num,
@@ -315,7 +316,7 @@ obj_t *sort(obj_t *list, obj_t *lessfun, obj_t *keyfun);
void obj_print(obj_t *obj, obj_t *stream);
void obj_pprint(obj_t *obj, obj_t *stream);
-void init(const char *progname, void *(*oom_realloc)(void *, size_t),
+void init(const wchar_t *progname, void *(*oom_realloc)(void *, size_t),
obj_t **stack_bottom);
void dump(obj_t *obj, obj_t *stream);
obj_t *snarf(obj_t *in);
diff --git a/match.c b/match.c
index fb82d945..9b62a7ba 100644
--- a/match.c
+++ b/match.c
@@ -32,6 +32,7 @@
#include <dirent.h>
#include <setjmp.h>
#include <stdarg.h>
+#include <wchar.h>
#include "lib.h"
#include "gc.h"
#include "unwind.h"
@@ -39,28 +40,29 @@
#include "stream.h"
#include "parser.h"
#include "txr.h"
+#include "utf8.h"
#include "match.h"
int output_produced;
-static void debugf(const char *fmt, ...)
+static void debugf(const wchar_t *fmt, ...)
{
if (opt_loglevel >= 2) {
va_list vl;
va_start (vl, fmt);
- format(std_error, "~a: ", prog_string, nao);
+ format(std_error, L"~a: ", prog_string, nao);
vformat(std_error, fmt, vl);
put_cchar(std_error, '\n');
va_end (vl);
}
}
-static void debuglf(obj_t *line, const char *fmt, ...)
+static void debuglf(obj_t *line, const wchar_t *fmt, ...)
{
if (opt_loglevel >= 2) {
va_list vl;
va_start (vl, fmt);
- format(std_error, "~a: (~a:~a) ", prog_string, spec_file_str, line, nao);
+ format(std_error, L"~a: (~a:~a) ", prog_string, spec_file_str, line, nao);
vformat(std_error, fmt, vl);
put_cchar(std_error, '\n');
va_end (vl);
@@ -72,21 +74,21 @@ static void debuglcf(obj_t *line, const char *fmt, ...)
if (opt_loglevel >= 2) {
va_list vl;
va_start (vl, fmt);
- format(std_error, "~a: (~a:~a) ", prog_string, spec_file_str, line, nao);
+ format(std_error, L"~a: (~a:~a) ", prog_string, spec_file_str, line, nao);
vcformat(std_error, fmt, vl);
put_cchar(std_error, '\n');
va_end (vl);
}
}
-static void sem_error(obj_t *line, const char *fmt, ...)
+static void sem_error(obj_t *line, const wchar_t *fmt, ...)
{
va_list vl;
obj_t *stream = make_string_output_stream();
va_start (vl, fmt);
if (line)
- format(stream, "(~a:~a) ", spec_file_str, line, nao);
+ format(stream, L"(~a:~a) ", spec_file_str, line, nao);
(void) vformat(stream, fmt, vl);
va_end (vl);
@@ -94,14 +96,14 @@ static void sem_error(obj_t *line, const char *fmt, ...)
abort();
}
-static void file_err(obj_t *line, const char *fmt, ...)
+static void file_err(obj_t *line, const wchar_t *fmt, ...)
{
va_list vl;
obj_t *stream = make_string_output_stream();
va_start (vl, fmt);
if (line)
- format(stream, "(~a:~a) ", spec_file_str, line, nao);
+ format(stream, L"(~a:~a) ", spec_file_str, line, nao);
(void) vformat(stream, fmt, vl);
va_end (vl);
@@ -110,43 +112,43 @@ static void file_err(obj_t *line, const char *fmt, ...)
}
-void dump_shell_string(const char *str)
+void dump_shell_string(const wchar_t *str)
{
int ch;
- putchar('"');
+ putwchar('"');
while ((ch = *str++) != 0) {
switch (ch) {
case '"': case '`': case '$': case '\\': case '\n':
- putchar('\\');
+ putwchar('\\');
/* fallthrough */
default:
- putchar(ch);
+ putwchar(ch);
}
}
- putchar('"');
+ putwchar('"');
}
-void dump_var(const char *name, char *pfx1, size_t len1,
- char *pfx2, size_t len2, obj_t *value, int level)
+void dump_var(const wchar_t *name, wchar_t *pfx1, size_t len1,
+ wchar_t *pfx2, size_t len2, obj_t *value, int level)
{
if (len1 >= 112 || len2 >= 112)
internal_error("too much depth in bindings");
if (stringp(value) || chrp(value)) {
- fputs(name, stdout);
- fputs(pfx1, stdout);
- fputs(pfx2, stdout);
- putchar('=');
+ fputws(name, stdout);
+ fputws(pfx1, stdout);
+ fputws(pfx2, stdout);
+ putwchar('=');
if (stringp(value)) {
dump_shell_string(c_str(value));
} else {
- char mini[2];
+ wchar_t mini[2];
mini[0] = c_chr(value);
mini[1] = 0;
dump_shell_string(mini);
}
- putchar('\n');
+ putwchar('\n');
} else {
obj_t *iter;
int i;
@@ -154,10 +156,10 @@ void dump_var(const char *name, char *pfx1, size_t len1,
for (i = 0, iter = value; iter; iter = cdr(iter), i++) {
if (level < opt_arraydims) {
- add2 = sprintf(pfx2 + len2, "[%d]", i);
+ add2 = swprintf(pfx2 + len2, 12, L"[%d]", i);
add1 = 0;
} else {
- add1 = sprintf(pfx1 + len1, "_%d", i);
+ add1 = swprintf(pfx1 + len1, 12, L"_%d", i);
add2 = 0;
}
@@ -174,10 +176,10 @@ void dump_bindings(obj_t *bindings)
}
while (bindings) {
- char pfx1[128], pfx2[128];
+ wchar_t pfx1[128], pfx2[128];
obj_t *var = car(car(bindings));
obj_t *value = cdr(car(bindings));
- const char *name = c_str(symbol_name(var));
+ const wchar_t *name = c_str(symbol_name(var));
*pfx1 = 0; *pfx2 = 0;
dump_var(name, pfx1, 0, pfx2, 0, value, 0);
bindings = cdr(bindings);
@@ -241,7 +243,7 @@ obj_t *dest_bind(obj_t *bindings, obj_t *pattern, obj_t *value)
return bindings;
if (tree_find(cdr(existing), value))
return bindings;
- debugf("bind variable mismatch: ~a", pattern, nao);
+ debugf(L"bind variable mismatch: ~a", pattern, nao);
return t;
}
return cons(cons(pattern, value), bindings);
@@ -274,16 +276,16 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
obj_t *file)
{
#define LOG_MISMATCH(KIND) \
- debuglf(spec_lineno, KIND " mismatch, position ~a (~a:~a)", pos, \
+ debuglf(spec_lineno, KIND L" mismatch, position ~a (~a:~a)", pos, \
file, data_lineno, nao); \
- debuglf(spec_lineno, " ~a", dataline, nao); \
+ debuglf(spec_lineno, L" ~a", dataline, nao); \
if (c_num(pos) < 77) \
debuglcf(spec_lineno, " %*s^", (int) c_num(pos), "")
#define LOG_MATCH(KIND, EXTENT) \
- debuglf(spec_lineno, KIND " matched, position ~a-~a (~a:~a)", \
+ debuglf(spec_lineno, KIND L" matched, position ~a-~a (~a:~a)", \
pos, EXTENT, file, data_lineno, nao); \
- debuglf(spec_lineno, " ~a", dataline, nao); \
+ debuglf(spec_lineno, L" ~a", dataline, nao); \
if (c_num(EXTENT) < 77) \
debuglcf(spec_lineno, " %*s%-*s^", (int) c_num(pos), \
"", (int) (c_num(EXTENT) - c_num(pos)), "^")
@@ -320,18 +322,18 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
if (length_str_lt(dataline, past) || lt(past, pos))
{
- LOG_MISMATCH("fixed field size");
+ LOG_MISMATCH(L"fixed field size");
return nil;
}
if (!tree_find(trim_str(sub_str(dataline, pos, past)),
cdr(pair)))
{
- LOG_MISMATCH("fixed field contents");
+ LOG_MISMATCH(L"fixed field contents");
return nil;
}
- LOG_MATCH("fixed field", past);
+ LOG_MATCH(L"fixed field", past);
pos = past;
specline = cdr(specline);
} else {
@@ -342,20 +344,20 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
if (consp(modifier)) {
obj_t *past = match_regex(dataline, car(modifier), pos);
if (nullp(past)) {
- LOG_MISMATCH("var positive regex");
+ LOG_MISMATCH(L"var positive regex");
return nil;
}
- LOG_MATCH("var positive regex", past);
+ LOG_MATCH(L"var positive regex", past);
bindings = acons_new(bindings, sym, sub_str(dataline, pos, past));
pos = past;
} else if (nump(modifier)) {
obj_t *past = plus(pos, modifier);
if (length_str_lt(dataline, past) || lt(past, pos))
{
- LOG_MISMATCH("count based var");
+ LOG_MISMATCH(L"count based var");
return nil;
}
- LOG_MATCH("count based var", past);
+ LOG_MATCH(L"count based var", past);
bindings = acons_new(bindings, sym, trim_str(sub_str(dataline, pos, past)));
pos = past;
} else {
@@ -365,10 +367,10 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
} else if (type(pat) == STR) {
obj_t *find = search_str(dataline, pat, pos, modifier);
if (!find) {
- LOG_MISMATCH("var delimiting string");
+ LOG_MISMATCH(L"var delimiting string");
return nil;
}
- LOG_MATCH("var delimiting string", find);
+ LOG_MATCH(L"var delimiting string", find);
bindings = acons_new(bindings, sym, sub_str(dataline, pos, find));
pos = plus(find, length_str(pat));
} else if (consp(pat) && typeof(first(pat)) == regex) {
@@ -376,10 +378,10 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
obj_t *fpos = car(find);
obj_t *flen = cdr(find);
if (!find) {
- LOG_MISMATCH("var delimiting regex");
+ LOG_MISMATCH(L"var delimiting regex");
return nil;
}
- LOG_MATCH("var delimiting regex", fpos);
+ LOG_MATCH(L"var delimiting regex", fpos);
bindings = acons_new(bindings, sym, sub_str(dataline, pos, fpos));
pos = plus(fpos, flen);
} else if (consp(pat) && first(pat) == var) {
@@ -389,7 +391,7 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
obj_t *pair = assoc(bindings, second_sym); /* var exists already? */
if (!pair)
- sem_error(spec_lineno, "consecutive unbound variables", nao);
+ sem_error(spec_lineno, L"consecutive unbound variables", nao);
/* Re-generate a new spec with an edited version of
the element we just processed, and repeat. */
@@ -406,14 +408,14 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
} else if (consp(pat) && (consp(first(pat)) || stringp(first(pat)))) {
cons_bind (find, len, search_str(dataline, pat, pos, modifier));
if (!find) {
- LOG_MISMATCH("string");
+ LOG_MISMATCH(L"string");
return nil;
}
bindings = acons_new(bindings, sym, sub_str(dataline, pos, find));
pos = plus(find, len);
} else {
sem_error(spec_lineno,
- "variable followed by invalid element", nao);
+ L"variable followed by invalid element", nao);
}
} else if (typeof(directive) == regex) {
obj_t *past = match_regex(dataline, directive, pos);
@@ -475,7 +477,7 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
if (!bindings_coll) {
- debuglf(spec_lineno, "nothing was collected", nao);
+ debuglf(spec_lineno, L"nothing was collected", nao);
return nil;
}
@@ -489,15 +491,15 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
obj_t *newpos;
if (find == nil || !equal(find, pos)) {
- LOG_MISMATCH("string tree");
+ LOG_MISMATCH(L"string tree");
return nil;
}
newpos = plus(find, len);
- LOG_MATCH("string tree", newpos);
+ LOG_MATCH(L"string tree", newpos);
pos = newpos;
} else {
- sem_error(spec_lineno, "unknown directive: ~a", directive, nao);
+ sem_error(spec_lineno, L"unknown directive: ~a", directive, nao);
}
}
break;
@@ -515,7 +517,7 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
break;
}
default:
- sem_error(spec_lineno, "unsupported object in spec: ~s", elem, nao);
+ sem_error(spec_lineno, L"unsupported object in spec: ~s", elem, nao);
}
specline = cdr(specline);
@@ -628,27 +630,30 @@ fpip_t complex_open(obj_t *name, obj_t *output)
{
fpip_t ret = { 0 };
- const char *namestr = c_str(name);
+ const wchar_t *namestr = c_str(name);
long len = c_num(length_str(name));
if (len == 0)
return ret;
- if (!strcmp(namestr, "-")) {
+ if (!wcscmp(namestr, L"-")) {
ret.close = fpip_fclose;
ret.f = output ? stdout : stdin;
output_produced = output ? 1 : 0;
} else if (namestr[0] == '!') {
ret.close = fpip_pclose;
- ret.f = popen(namestr+1, output ? "w" : "r");
+ ret.f = w_popen(namestr+1, output ? L"w" : L"r");
} else if (namestr[0] == '$') {
+ char *name;
if (output)
return ret;
+ name = (char *) utf8_dup_to(namestr+1);
ret.close = fpip_closedir;
- ret.d = opendir(namestr+1);
+ ret.d = opendir(name);
+ free(name);
} else {
ret.close = fpip_fclose;
- ret.f = fopen(namestr, output ? "w" : "r");
+ ret.f = w_fopen(namestr, output ? L"w" : L"r");
}
return ret;
@@ -758,8 +763,8 @@ void do_output_line(obj_t *bindings, obj_t *specline,
if (directive == var) {
obj_t *str = cat_str(subst_vars(cons(elem, nil), bindings), nil);
if (str == nil)
- sem_error(spec_lineno, "bad substitution: ~a", second(elem), nao);
- fputs(c_str(str), out);
+ sem_error(spec_lineno, L"bad substitution: ~a", second(elem), nao);
+ fputws(c_str(str), out);
} else if (directive == rep) {
obj_t *main_clauses = second(elem);
obj_t *single_clauses = third(elem);
@@ -798,17 +803,17 @@ void do_output_line(obj_t *bindings, obj_t *specline,
}
} else {
- sem_error(spec_lineno, "unknown directive: ~a", directive, nao);
+ sem_error(spec_lineno, L"unknown directive: ~a", directive, nao);
}
}
break;
case STR:
- fputs(c_str(elem), out);
+ fputws(c_str(elem), out);
break;
case 0:
break;
default:
- sem_error(spec_lineno, "unsupported object in output spec: ~s", elem);
+ sem_error(spec_lineno, L"unsupported object in output spec: ~s", elem);
}
}
}
@@ -866,7 +871,7 @@ void do_output(obj_t *bindings, obj_t *specs, FILE *out)
}
do_output_line(bindings, specline, spec_lineno, out);
- putc('\n', out);
+ putwc('\n', out);
}
}
@@ -888,21 +893,21 @@ obj_t *match_files(obj_t *spec, obj_t *files,
obj_t *first_spec_item = second(first(spec));
if (consp(first_spec_item) && eq(first(first_spec_item), next)) {
- debugf("not opening source ~a since query starts with next directive",
+ debugf(L"not opening source ~a since query starts with next directive",
name, nao);
} else {
- debugf("opening data source ~a", name, nao);
+ debugf(L"opening data source ~a", name, nao);
if (complex_open_failed(fp)) {
if (consp(source_spec) && car(source_spec) == nothrow) {
- debugf("could not open ~a: treating as failed match due to nothrow",
+ debugf(L"could not open ~a: treating as failed match due to nothrow",
name, nao);
return nil;
} else if (errno != 0)
- file_err(nil, "could not open ~a (error ~a/~a)", name,
- num(errno), string(strerror(errno)), nao);
+ file_err(nil, L"could not open ~a (error ~a/~a)", name,
+ num(errno), string_utf8(strerror(errno)), nao);
else
- file_err(nil, "could not open ~a", name, nao);
+ file_err(nil, L"could not open ~a", name, nao);
return nil;
}
@@ -931,7 +936,7 @@ repeat_spec_same_data:
if (rest(specline))
sem_error(spec_linenum,
- "unexpected material after skip directive", nao);
+ L"unexpected material after skip directive", nao);
if ((spec = rest(spec)) == nil)
break;
@@ -944,12 +949,12 @@ repeat_spec_same_data:
data, num(data_lineno));
if (result) {
- debuglf(spec_linenum, "skip matched ~a:~a", first(files),
+ debuglf(spec_linenum, L"skip matched ~a:~a", first(files),
num(data_lineno), nao);
break;
}
- debuglf(spec_linenum, "skip didn't match ~a:~a", first(files),
+ debuglf(spec_linenum, L"skip didn't match ~a:~a", first(files),
num(data_lineno), nao);
data = rest(data);
data_lineno++;
@@ -962,12 +967,12 @@ repeat_spec_same_data:
return result;
}
- debuglf(spec_linenum, "skip failed", nao);
+ debuglf(spec_linenum, L"skip failed", nao);
return nil;
} else if (sym == trailer) {
if (rest(specline))
sem_error(spec_linenum,
- "unexpected material after trailer directive", nao);
+ L"unexpected material after trailer directive", nao);
if ((spec = rest(spec)) == nil)
break;
@@ -989,7 +994,7 @@ repeat_spec_same_data:
if ((spec = rest(spec)) == nil) {
sem_error(spec_linenum,
- "freeform must be followed by a query line", nao);
+ L"freeform must be followed by a query line", nao);
} else {
obj_t *limit = or2(if2(nump(first(vals)), first(vals)),
if2(nump(second(vals)), second(vals)));
@@ -1003,7 +1008,7 @@ repeat_spec_same_data:
spec_linenum, num(data_lineno), first(files)));
if (!success) {
- debuglf(spec_linenum, "freeform match failure", nao);
+ debuglf(spec_linenum, L"freeform match failure", nao);
return nil;
}
@@ -1021,7 +1026,7 @@ repeat_spec_same_data:
obj_t *name = first(rest(first_spec));
if (rest(specline))
sem_error(spec_linenum,
- "unexpected material after block directive", nao);
+ L"unexpected material after block directive", nao);
if ((spec = rest(spec)) == nil)
break;
{
@@ -1034,7 +1039,7 @@ repeat_spec_same_data:
obj_t *target = first(rest(first_spec));
if (rest(specline))
- sem_error(spec_linenum, "unexpected material after ~a", sym, nao);
+ sem_error(spec_linenum, L"unexpected material after ~a", sym, nao);
uw_block_return(target,
if2(sym == accept,
@@ -1042,15 +1047,15 @@ repeat_spec_same_data:
if3(data, cons(data, num(data_lineno)), t))));
/* TODO: uw_block_return could just throw this */
if (target)
- sem_error(spec_linenum, "~a: no block named ~a in scope",
+ sem_error(spec_linenum, L"~a: no block named ~a in scope",
sym, target, nao);
else
- sem_error(spec_linenum, "%~a: no anonymous block in scope", sym, nao);
+ sem_error(spec_linenum, L"%~a: no anonymous block in scope", sym, nao);
return nil;
} else if (sym == next) {
if (rest(first_spec) && rest(specline))
sem_error(spec_linenum,
- "invalid combination of old and new next syntax", nao);
+ L"invalid combination of old and new next syntax", nao);
if ((spec = rest(spec)) == nil)
break;
@@ -1061,7 +1066,7 @@ repeat_spec_same_data:
if (eq(first(source), nothrow))
push(nil, &source);
else if (eq(first(source), args)) {
- obj_t *input_name = string("args");
+ obj_t *input_name = string(L"args");
cons_bind (new_bindings, success,
match_files(spec, cons(input_name, files),
bindings, files, one));
@@ -1076,7 +1081,7 @@ repeat_spec_same_data:
obj_t *name = cdr(val);
if (!val)
- sem_error(spec_linenum, "next: unbound variable in form ~a",
+ sem_error(spec_linenum, L"next: unbound variable in form ~a",
first(source), nao);
if (eq(second(source), nothrow)) {
@@ -1085,7 +1090,7 @@ repeat_spec_same_data:
} else {
files = rest(files);
if (!files) {
- debuglf(spec_linenum, "next: out of arguments", nao);
+ debuglf(spec_linenum, L"next: out of arguments", nao);
return nil;
}
files = cons(cons(nothrow, first(files)), rest(files));
@@ -1096,7 +1101,7 @@ repeat_spec_same_data:
} else {
files = rest(files);
if (!files)
- sem_error(spec_linenum, "next: out of arguments", nao);
+ sem_error(spec_linenum, L"next: out of arguments", nao);
files = cons(cons(nothrow, first(files)), rest(files));
}
}
@@ -1105,14 +1110,14 @@ repeat_spec_same_data:
obj_t *sub = subst_vars(rest(specline), bindings);
obj_t *str = cat_str(sub, nil);
if (str == nil) {
- sem_error(spec_linenum, "bad substitution in next file spec", nao);
+ sem_error(spec_linenum, L"bad substitution in next file spec", nao);
continue;
}
files = cons(cons(nothrow, str), files);
} else {
files = rest(files);
if (!files)
- sem_error(spec_linenum, "next: out of arguments", nao);
+ sem_error(spec_linenum, L"next: out of arguments", nao);
}
/* We recursively process the file list, but the new
@@ -1166,17 +1171,17 @@ repeat_spec_same_data:
}
if (sym == all && !all_match) {
- debuglf(spec_linenum, "all: some clauses didn't match", nao);
+ debuglf(spec_linenum, L"all: some clauses didn't match", nao);
return nil;
}
if ((sym == some || sym == cases) && !some_match) {
- debuglf(spec_linenum, "some/cases: no clauses matched", nao);
+ debuglf(spec_linenum, L"some/cases: no clauses matched", nao);
return nil;
}
if (sym == none && some_match) {
- debuglf(spec_linenum, "none: some clauses matched", nao);
+ debuglf(spec_linenum, L"none: some clauses matched", nao);
return nil;
}
@@ -1255,7 +1260,7 @@ repeat_spec_same_data:
data = new_data;
data_lineno = new_lineno;
} else {
- debuglf(spec_linenum, "collect consumed entire file", nao);
+ debuglf(spec_linenum, L"collect consumed entire file", nao);
data = nil;
}
} else {
@@ -1267,12 +1272,12 @@ repeat_spec_same_data:
uw_block_end;
if (!result) {
- debuglf(spec_linenum, "collect explicitly failed", nao);
+ debuglf(spec_linenum, L"collect explicitly failed", nao);
return nil;
}
if (!bindings_coll) {
- debuglf(spec_linenum, "nothing was collected", nao);
+ debuglf(spec_linenum, L"nothing was collected", nao);
return nil;
}
@@ -1293,7 +1298,7 @@ repeat_spec_same_data:
obj_t *sym = first(iter);
if (!symbolp(sym)) {
- sem_error(spec_linenum, "non-symbol in flatten directive", nao);
+ sem_error(spec_linenum, L"non-symbol in flatten directive", nao);
} else {
obj_t *existing = assoc(bindings, sym);
@@ -1319,7 +1324,7 @@ repeat_spec_same_data:
obj_t *merged = nil;
if (!target || !symbolp(target))
- sem_error(spec_linenum, "bad merge directive", nao);
+ sem_error(spec_linenum, L"bad merge directive", nao);
for (; args; args = rest(args)) {
obj_t *other_sym = first(args);
@@ -1328,9 +1333,9 @@ repeat_spec_same_data:
obj_t *other_lookup = assoc(bindings, other_sym);
if (!symbolp(other_sym))
- sem_error(spec_linenum, "non-symbol in merge directive", nao);
+ sem_error(spec_linenum, L"non-symbol in merge directive", nao);
else if (!other_lookup)
- sem_error(spec_linenum, "merge: nonexistent symbol ~a",
+ sem_error(spec_linenum, L"merge: nonexistent symbol ~a",
other_sym, nao);
if (merged)
@@ -1353,7 +1358,7 @@ repeat_spec_same_data:
obj_t *val = eval_form(form, bindings);
if (!val)
- sem_error(spec_linenum, "bind: unbound variable on right side", nao);
+ sem_error(spec_linenum, L"bind: unbound variable on right side", nao);
bindings = dest_bind(bindings, pattern, cdr(val));
@@ -1371,7 +1376,7 @@ repeat_spec_same_data:
obj_t *sym = first(iter);
if (!symbolp(sym)) {
- sem_error(spec_linenum, "non-symbol in cat directive", nao);
+ sem_error(spec_linenum, L"non-symbol in cat directive", nao);
} else {
obj_t *existing = assoc(bindings, sym);
obj_t *sep = nil;
@@ -1408,28 +1413,28 @@ repeat_spec_same_data:
obj_t *val = eval_form(form, bindings);
if (!val)
- sem_error(spec_linenum, "output: unbound variable in form ~a",
+ sem_error(spec_linenum, L"output: unbound variable in form ~a",
form, nao);
nt = eq(second(new_style_dest), nothrow);
- dest = or2(cdr(val), string("-"));
+ dest = or2(cdr(val), string(L"-"));
}
}
fpip_t fp = (errno = 0, complex_open(dest, t));
- debugf("opening data sink ~a", dest, nao);
+ debugf(L"opening data sink ~a", dest, nao);
if (complex_open_failed(fp)) {
if (nt) {
- debugf("could not open ~a: treating as failed match due to nothrow",
+ debugf(L"could not open ~a: treating as failed match due to nothrow",
dest, nao);
return nil;
} else if (errno != 0) {
- file_err(nil, "could not open ~a (error ~a/~a)", dest,
- num(errno), string(strerror(errno)), nao);
+ file_err(nil, L"could not open ~a (error ~a/~a)", dest,
+ num(errno), string_utf8(strerror(errno)), nao);
} else {
- file_err(nil, "could not open ~a", dest, nao);
+ file_err(nil, L"could not open ~a", dest, nao);
}
} else {
do_output(bindings, specs, fp.f);
@@ -1447,7 +1452,7 @@ repeat_spec_same_data:
obj_t *params = second(args);
if (rest(specline))
- sem_error(spec_linenum, "unexpected material after define", nao);
+ sem_error(spec_linenum, L"unexpected material after define", nao);
uw_set_func(name, cons(params, body));
@@ -1590,7 +1595,7 @@ repeat_spec_same_data:
} else if (sym == defex) {
obj_t *types = rest(first_spec);
if (!all_satisfy(types, func_n1(symbolp), nil))
- sem_error(spec_linenum, "defex arguments must all be symbols", nao);
+ sem_error(spec_linenum, L"defex arguments must all be symbols", nao);
(void) reduce_left(func_n2(uw_register_subtype), types, nil, nil);
if ((spec = rest(spec)) == nil)
break;
@@ -1599,7 +1604,7 @@ repeat_spec_same_data:
obj_t *type = second(first_spec);
obj_t *args = rest(rest(first_spec));
if (!symbolp(type))
- sem_error(spec_linenum, "throw: ~a is not a type symbol",
+ sem_error(spec_linenum, L"throw: ~a is not a type symbol",
first(first_spec), nao);
{
obj_t *values = mapcar(bind2other(func_n2(eval_form), bindings),
@@ -1618,7 +1623,7 @@ repeat_spec_same_data:
obj_t *bindings_cp = copy_alist(bindings);
if (!equal(length(args), length(params)))
- sem_error(spec_linenum, "function ~a takes ~a argument(s)",
+ sem_error(spec_linenum, L"function ~a takes ~a argument(s)",
sym, length(params), nao);
for (piter = params, aiter = args; piter;
@@ -1641,7 +1646,7 @@ repeat_spec_same_data:
obj_t *val = eval_form(arg, bindings);
if (!val)
sem_error(spec_linenum,
- "unbound variable in function argument form", nao);
+ L"unbound variable in function argument form", nao);
bindings_cp = acons_new(bindings_cp, param, cdr(val));
}
}
@@ -1655,7 +1660,7 @@ repeat_spec_same_data:
uw_block_end;
if (!result) {
- debuglf(spec_linenum, "function failed", nao);
+ debuglf(spec_linenum, L"function failed", nao);
return nil;
}
@@ -1671,8 +1676,8 @@ repeat_spec_same_data:
if (newbind) {
bindings = dest_bind(bindings, arg, cdr(newbind));
if (bindings == t) {
- debuglf(spec_linenum, "binding mismatch on ~a "
- "when returning from ~a", arg, sym, nao);
+ debuglf(spec_linenum, L"binding mismatch on ~a "
+ L"when returning from ~a", arg, sym, nao);
return nil;
}
}
@@ -1686,7 +1691,7 @@ repeat_spec_same_data:
data = car(success);
data_lineno = c_num(cdr(success));
} else {
- debuglf(spec_linenum, "function consumed entire file", nao);
+ debuglf(spec_linenum, L"function consumed entire file", nao);
data = nil;
}
}
@@ -1709,7 +1714,7 @@ repeat_spec_same_data:
spec_linenum, num(data_lineno), first(files)));
if (nump(success) && c_num(success) < c_num(length_str(dataline))) {
- debuglf(spec_linenum, "spec only matches line to position ~a: ~a",
+ debuglf(spec_linenum, L"spec only matches line to position ~a: ~a",
success, dataline, nao);
return nil;
}
diff --git a/parser.h b/parser.h
index 5d3e95cf..da2daae3 100644
--- a/parser.h
+++ b/parser.h
@@ -28,7 +28,7 @@
long lineno;
extern int errors;
extern obj_t *yyin_stream;
-extern const char *spec_file;
+extern const wchar_t *spec_file;
extern obj_t *spec_file_str;
int yyparse(void);
obj_t *get_spec(void);
diff --git a/parser.l b/parser.l
index d35c23ad..5919f929 100644
--- a/parser.l
+++ b/parser.l
@@ -33,10 +33,12 @@
#include <limits.h>
#include <errno.h>
#include <dirent.h>
+#include <wchar.h>
#include "y.tab.h"
#include "lib.h"
#include "gc.h"
#include "stream.h"
+#include "utf8.h"
#include "parser.h"
#define YY_NO_UNPUT
@@ -73,7 +75,7 @@ void yyerrorf(const char *s, ...)
if (opt_loglevel >= 1) {
va_list vl;
va_start (vl, s);
- fprintf(stderr, "%s: (%s:%ld): ", progname, spec_file, lineno);
+ fprintf(stderr, "%ls: (%ls:%ld): ", progname, spec_file, lineno);
vfprintf(stderr, s, vl);
putc('\n', stderr);
va_end (vl);
@@ -127,33 +129,33 @@ void yybadtoken(int tok, const char *context)
yyerrorf("unexpected end of input");
}
-static int char_esc(int letter)
+static wchar_t char_esc(int letter)
{
switch (letter) {
- case 'a': return '\a';
- case 'b': return '\b';
- case 't': return '\t';
- case 'n': return '\n';
- case 'v': return '\v';
- case 'f': return '\f';
- case 'r': return '\r';
+ case 'a': return L'\a';
+ case 'b': return L'\b';
+ case 't': return L'\t';
+ case 'n': return L'\n';
+ case 'v': return L'\v';
+ case 'f': return L'\f';
+ case 'r': return L'\r';
case 'e': return 27;
- case '"': return '"';
- case '\'': return '\'';
- case '`': return '`';
+ case '"': return L'"';
+ case '\'': return L'\'';
+ case '`': return L'`';
}
abort();
}
-static int num_esc(char *num)
+static wchar_t num_esc(char *num)
{
if (num[0] == 'x') {
- if (strlen(num) > 3)
+ if (strlen(num) > 7)
yyerror("too many digits in hex character escape");
return strtol(num + 1, 0, 16);
} else {
- if (strlen(num) > 3)
+ if (strlen(num) > 8)
yyerror("too many digits in octal character escape");
return strtol(num, 0, 8);
}
@@ -170,6 +172,17 @@ WS [\t ]*
HEX [0-9A-Fa-f]
OCT [0-7]
+ASC [\x00-\x7f]
+ASCN [\x00-\t\v-\x7f]
+U [\x80-\xbf]
+U2 [\xc2-\xdf]
+U3 [\xe0-\xef]
+U4 [\xf0-\xf4]
+
+UANY {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
+UANYN {ASCN}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
+UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
+
%x SPECIAL NESTED REGEX REGCLASS STRLIT CHRLIT QSILIT
%%
@@ -188,7 +201,7 @@ OCT [0-7]
if (*errp != 0) {
/* not a number */
- yylval.lexeme = strdup(yytext);
+ yylval.lexeme = utf8_dup_from(yytext);
return IDENT;
}
@@ -346,7 +359,7 @@ OCT [0-7]
<SPECIAL>@ {
yy_pop_state();
- yylval.lexeme = strdup("@");
+ yylval.lexeme = wcsdup(L"@");
return TEXT;
}
@@ -365,26 +378,25 @@ OCT [0-7]
}
<SPECIAL>[\\][abtnvfre] {
- char lexeme[2];
+ wchar_t lexeme[2];
lexeme[0] = char_esc(yytext[1]);
lexeme[1] = 0;
- yylval.lexeme = strdup(lexeme);
+ yylval.lexeme = wcsdup(lexeme);
yy_pop_state();
return TEXT;
}
<SPECIAL>[\\](x{HEX}+|{OCT}+) {
- char lexeme[2];
+ wchar_t lexeme[2];
lexeme[0] = num_esc(yytext + 1);
lexeme[1] = 0;
- yylval.lexeme = strdup(lexeme);
+ yylval.lexeme = wcsdup(lexeme);
yy_pop_state();
return TEXT;
}
-<SPECIAL,NESTED>. {
- yyerrorf("bad character in directive: '%c'",
- yytext[0]);
+<SPECIAL,NESTED>{UANYN} {
+ yyerrorf("bad character in directive: '%s'", yytext);
}
<REGEX>[/] {
@@ -433,15 +445,17 @@ OCT [0-7]
return REGCHAR;
}
-<REGEX>. {
- yylval.chr = yytext[0];
+<REGEX>{UANYN} {
+ wchar_t buf[8];
+ utf8_from(buf, yytext);
+ yylval.chr = buf[0];
return REGCHAR;
}
-<INITIAL>[^@\n]+ {
- yylval.lexeme = strdup(yytext);
- return TEXT;
- }
+<INITIAL>({UONLY}|[^@\n])+ {
+ yylval.lexeme = utf8_dup_from(yytext);
+ return TEXT;
+ }
<INITIAL>\n {
lineno++;
@@ -515,9 +529,11 @@ OCT [0-7]
yy_push_state(SPECIAL);
}
-<STRLIT,CHRLIT,QSILIT>. {
- yylval.chr = yytext[0];
- return LITCHAR;
- }
+<STRLIT,CHRLIT,QSILIT>{UANYN} {
+ wchar_t buf[8];
+ utf8_from(buf, yytext);
+ yylval.chr = buf[0];
+ return LITCHAR;
+ }
%%
diff --git a/parser.y b/parser.y
index 7a9b11b0..1a3a020b 100644
--- a/parser.y
+++ b/parser.y
@@ -32,6 +32,7 @@
#include <dirent.h>
#include "lib.h"
#include "regex.h"
+#include "utf8.h"
#include "parser.h"
int yylex(void);
@@ -46,9 +47,9 @@ static obj_t *parsed_spec;
%}
%union {
- char *lexeme;
+ wchar_t *lexeme;
union obj *obj;
- char chr;
+ wchar_t chr;
long num;
}
diff --git a/regex.c b/regex.c
index 630bb012..d67838e6 100644
--- a/regex.c
+++ b/regex.c
@@ -528,9 +528,9 @@ int nfa_move(nfa_state_t **in, int nin, nfa_state_t **out, int ch)
* determines the match length (defaulting to zero
* if no acceptance states were encountered).
*/
-long nfa_run(nfa_t nfa, const char *str)
+long nfa_run(nfa_t nfa, const wchar_t *str)
{
- const char *last_accept_pos = 0, *ptr = str;
+ const wchar_t *last_accept_pos = 0, *ptr = str;
unsigned visited = nfa.start->a.visited + 1;
nfa_state_t **move = chk_malloc(NFA_SET_SIZE * sizeof *move);
nfa_state_t **clos = chk_malloc(NFA_SET_SIZE * sizeof *clos);
@@ -697,7 +697,7 @@ obj_t *search_regex(obj_t *haystack, obj_t *needle_regex, obj_t *start,
if (from_end) {
long i;
long s = c_num(start);
- const char *h = c_str(haystack);
+ const wchar_t *h = c_str(haystack);
for (i = c_num(length_str(haystack)) - 1; i >= s; i--) {
long span = nfa_run(*pnfa, h + i);
diff --git a/regex.h b/regex.h
index 873682a4..8deabb01 100644
--- a/regex.h
+++ b/regex.h
@@ -110,7 +110,7 @@ typedef struct nfa_machine {
nfa_t nfa_compile_regex(obj_t *regex);
void nfa_free(nfa_t);
-long nfa_run(nfa_t nfa, const char *str);
+long nfa_run(nfa_t nfa, const wchar_t *str);
void nfa_machine_reset(nfa_machine_t *);
void nfa_machine_init(nfa_machine_t *, nfa_t);
void nfa_machine_cleanup(nfa_machine_t *);
diff --git a/stream.c b/stream.c
index c76db73a..91ee2e85 100644
--- a/stream.c
+++ b/stream.c
@@ -32,21 +32,23 @@
#include <assert.h>
#include <setjmp.h>
#include <errno.h>
+#include <wchar.h>
#include "lib.h"
#include "gc.h"
#include "unwind.h"
#include "stream.h"
+#include "utf8.h"
obj_t *std_input, *std_output, *std_error;
struct strm_ops {
struct cobj_ops cobj_ops;
- obj_t *(*put_string)(obj_t *, const char *);
+ obj_t *(*put_string)(obj_t *, const wchar_t *);
obj_t *(*put_char)(obj_t *, int);
obj_t *(*get_line)(obj_t *);
obj_t *(*get_char)(obj_t *);
obj_t *(*vcformat)(obj_t *, const char *fmt, va_list vl);
- obj_t *(*vformat)(obj_t *, const char *fmt, va_list vl);
+ obj_t *(*vformat)(obj_t *, const wchar_t *fmt, va_list vl);
obj_t *(*close)(obj_t *, obj_t *);
};
@@ -60,7 +62,7 @@ static void common_destroy(obj_t *obj)
(void) close_stream(obj, nil);
}
-obj_t *common_vformat(obj_t *stream, const char *fmt, va_list vl)
+obj_t *common_vformat(obj_t *stream, const wchar_t *fmt, va_list vl)
{
int ch;
@@ -109,7 +111,7 @@ struct stdio_handle {
void stdio_stream_print(obj_t *stream, obj_t *out)
{
struct stdio_handle *h = (struct stdio_handle *) stream->co.handle;
- format(out, "#<~s ~s>", stream->co.cls, h->descr, nao);
+ format(out, L"#<~s ~s>", stream->co.cls, h->descr, nao);
}
void stdio_stream_destroy(obj_t *stream)
@@ -130,8 +132,8 @@ static obj_t *stdio_maybe_read_error(obj_t *stream)
struct stdio_handle *h = (struct stdio_handle *) stream->co.handle;
if (ferror(h->f)) {
clearerr(h->f);
- uw_throwf(file_error, "error reading ~a: ~a/~s",
- stream, num(errno), string(strerror(errno)));
+ uw_throwf(file_error, L"error reading ~a: ~a/~s",
+ stream, num(errno), string_utf8(strerror(errno)));
}
return nil;
}
@@ -141,16 +143,16 @@ static obj_t *stdio_maybe_write_error(obj_t *stream)
struct stdio_handle *h = (struct stdio_handle *) stream->co.handle;
if (ferror(h->f)) {
clearerr(h->f);
- uw_throwf(file_error, "error writing ~a: ~a/~s",
- stream, num(errno), string(strerror(errno)));
+ uw_throwf(file_error, L"error writing ~a: ~a/~s",
+ stream, num(errno), string_utf8(strerror(errno)));
}
return nil;
}
-static obj_t *stdio_put_string(obj_t *stream, const char *s)
+static obj_t *stdio_put_string(obj_t *stream, const wchar_t *s)
{
struct stdio_handle *h = (struct stdio_handle *) stream->co.handle;
- return (h->f && fputs(s, h->f) != EOF) ? t : stdio_maybe_write_error(stream);
+ return (h->f && fputws(s, h->f) != EOF) ? t : stdio_maybe_write_error(stream);
}
static obj_t *stdio_put_char(obj_t *stream, int ch)
@@ -159,12 +161,12 @@ static obj_t *stdio_put_char(obj_t *stream, int ch)
return (h->f && putc(ch, h->f) != EOF) ? t : stdio_maybe_write_error(stream);
}
-static char *snarf_line(FILE *in)
+static wchar_t *snarf_line(FILE *in)
{
const size_t min_size = 512;
size_t size = 0;
size_t fill = 0;
- char *buf = 0;
+ wchar_t *buf = 0;
for (;;) {
int ch = getc(in);
@@ -174,7 +176,7 @@ static char *snarf_line(FILE *in)
if (fill >= size) {
size_t newsize = size ? size * 2 : min_size;
- buf = chk_realloc(buf, newsize);
+ buf = chk_realloc(buf, newsize * sizeof *buf);
size = newsize;
}
@@ -186,7 +188,7 @@ static char *snarf_line(FILE *in)
}
if (buf)
- buf = chk_realloc(buf, fill);
+ buf = chk_realloc(buf, fill * sizeof *buf);
return buf;
}
@@ -197,7 +199,7 @@ static obj_t *stdio_get_line(obj_t *stream)
return nil;
} else {
struct stdio_handle *h = (struct stdio_handle *) stream->co.handle;
- char *line = snarf_line(h->f);
+ wchar_t *line = snarf_line(h->f);
if (!line)
return stdio_maybe_read_error(stream);
return string_own(line);
@@ -233,8 +235,8 @@ static obj_t *stdio_close(obj_t *stream, obj_t *throw_on_error)
int result = fclose(h->f);
h->f = 0;
if (result == EOF && throw_on_error) {
- uw_throwf(file_error, "error closing ~a: ~a/~s",
- stream, num(errno), string(strerror(errno)));
+ uw_throwf(file_error, L"error closing ~a: ~a/~s",
+ stream, num(errno), string_utf8(strerror(errno)));
}
return result != EOF ? t : nil;
}
@@ -266,22 +268,23 @@ static obj_t *pipe_close(obj_t *stream, obj_t *throw_on_error)
if (status != 0 && throw_on_error) {
if (status < 0) {
- uw_throwf(process_error, "unable to obtain status of command ~a: ~a/~s",
- stream, num(errno), string(strerror(errno)), nao);
+ uw_throwf(process_error,
+ L"unable to obtain status of command ~a: ~a/~s",
+ stream, num(errno), string_utf8(strerror(errno)), nao);
} else if (WIFEXITED(status)) {
int exitstatus = WEXITSTATUS(status);
- uw_throwf(process_error, "pipe ~a terminated with status ~a",
+ uw_throwf(process_error, L"pipe ~a terminated with status ~a",
stream, num(exitstatus), nao);
} else if (WIFSIGNALED(status)) {
int termsig = WTERMSIG(status);
- uw_throwf(process_error, "pipe ~a terminated by signal ~a",
+ uw_throwf(process_error, L"pipe ~a terminated by signal ~a",
stream, num(termsig), nao);
} else if (WIFSTOPPED(status) || WIFCONTINUED(status)) {
- uw_throwf(process_error, "processes of closed pipe ~a still running",
+ uw_throwf(process_error, L"processes of closed pipe ~a still running",
stream, nao);
} else {
- uw_throwf(file_error, "strange status in when closing pipe ~a",
+ uw_throwf(file_error, L"strange status in when closing pipe ~a",
stream, nao);
}
}
@@ -356,7 +359,7 @@ static struct strm_ops string_in_ops = {
};
struct string_output {
- char *buf;
+ wchar_t *buf;
size_t size;
size_t fill;
};
@@ -373,14 +376,14 @@ static void string_out_stream_destroy(obj_t *stream)
}
}
-static obj_t *string_out_put_string(obj_t *stream, const char *s)
+static obj_t *string_out_put_string(obj_t *stream, const wchar_t *s)
{
struct string_output *so = (struct string_output *) stream->co.handle;
if (so == 0) {
return nil;
} else {
- size_t len = strlen(s);
+ size_t len = wcslen(s);
size_t old_size = so->size;
size_t required_size = len + so->fill + 1;
@@ -393,8 +396,8 @@ static obj_t *string_out_put_string(obj_t *stream, const char *s)
return nil;
}
- so->buf = chk_realloc(so->buf, so->size);
- memcpy(so->buf + so->fill, s, len + 1);
+ so->buf = chk_realloc(so->buf, so->size * sizeof *so->buf);
+ memcpy(so->buf + so->fill, s, (len + 1) * sizeof *so->buf);
so->fill += len;
return t;
}
@@ -402,7 +405,7 @@ static obj_t *string_out_put_string(obj_t *stream, const char *s)
static obj_t *string_out_put_char(obj_t *stream, int ch)
{
- char mini[2];
+ wchar_t mini[2];
mini[0] = ch;
mini[1] = 0;
return string_out_put_string(stream, mini);
@@ -415,11 +418,12 @@ obj_t *string_out_vcformat(obj_t *stream, const char *fmt, va_list vl)
if (so == 0) {
return nil;
} else {
- int nchars, nchars2;
+ int nchars, nchars2, nchars3;
char dummy_buf[1];
size_t old_size = so->size;
size_t required_size;
va_list vl_copy;
+ char *utf8_buf;
#if defined va_copy
va_copy (vl_copy, vl);
@@ -437,21 +441,31 @@ obj_t *string_out_vcformat(obj_t *stream, const char *fmt, va_list vl)
bug_unless (nchars >= 0);
- required_size = so->fill + nchars + 1;
+ utf8_buf = chk_malloc(nchars + 1);
+ nchars2 = vsnprintf(utf8_buf, nchars + 1, fmt, vl);
+ bug_unless (nchars == nchars2);
+
+ nchars3 = utf8_from(0, utf8_buf);
- if (required_size < so->fill)
+ required_size = so->fill + nchars3 + 1;
+
+ if (required_size < so->fill) {
+ free(utf8_buf);
return nil;
+ }
while (so->size <= required_size) {
so->size *= 2;
- if (so->size < old_size)
+ if (so->size < old_size) {
+ free(utf8_buf);
return nil;
+ }
}
- so->buf = chk_realloc(so->buf, so->size);
- nchars2 = vsnprintf(so->buf + so->fill, so->size-so->fill, fmt, vl);
- bug_unless (nchars == nchars2);
- so->fill += nchars;
+ so->buf = chk_realloc(so->buf, so->size * sizeof *so->buf);
+ utf8_from(so->buf, utf8_buf);
+ free(utf8_buf);
+ so->fill += nchars3;
return t;
}
}
@@ -483,7 +497,7 @@ static obj_t *dir_get_line(obj_t *stream)
return nil;
if (!strcmp(e->d_name, ".") || !strcmp(e->d_name, ".."))
continue;
- return string(e->d_name);
+ return string_utf8(e->d_name);
}
}
}
@@ -539,7 +553,7 @@ obj_t *make_string_output_stream(void)
{
struct string_output *so = (struct string_output *) chk_malloc(sizeof *so);
so->size = 128;
- so->buf = (char *) chk_malloc(so->size);
+ so->buf = (wchar_t *) chk_malloc(so->size * sizeof so->buf);
so->fill = 0;
so->buf[0] = 0;
return cobj((void *) so, stream_t, &string_out_ops.cobj_ops);
@@ -548,7 +562,7 @@ obj_t *make_string_output_stream(void)
obj_t *get_string_from_stream(obj_t *stream)
{
type_check (stream, COBJ);
- type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream));
+ type_assert (stream->co.cls == stream_t, (L"~a is not a stream", stream));
if (stream->co.ops == &string_out_ops.cobj_ops) {
struct string_output *so = (struct string_output *) stream->co.handle;
@@ -579,7 +593,7 @@ obj_t *make_dir_stream(DIR *dir)
obj_t *close_stream(obj_t *stream, obj_t *throw_on_error)
{
type_check (stream, COBJ);
- type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream));
+ type_assert (stream->co.cls == stream_t, (L"~a is not a stream", stream));
{
struct strm_ops *ops = (struct strm_ops *) stream->co.ops;
@@ -590,7 +604,7 @@ obj_t *close_stream(obj_t *stream, obj_t *throw_on_error)
obj_t *get_line(obj_t *stream)
{
type_check (stream, COBJ);
- type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream));
+ type_assert (stream->co.cls == stream_t, (L"~a is not a stream", stream));
{
struct strm_ops *ops = (struct strm_ops *) stream->co.ops;
@@ -601,7 +615,7 @@ obj_t *get_line(obj_t *stream)
obj_t *get_char(obj_t *stream)
{
type_check (stream, COBJ);
- type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream));
+ type_assert (stream->co.cls == stream_t, (L"~a is not a stream", stream));
{
struct strm_ops *ops = (struct strm_ops *) stream->co.ops;
@@ -609,10 +623,10 @@ obj_t *get_char(obj_t *stream)
}
}
-obj_t *vformat(obj_t *stream, const char *str, va_list vl)
+obj_t *vformat(obj_t *stream, const wchar_t *str, va_list vl)
{
type_check (stream, COBJ);
- type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream));
+ type_assert (stream->co.cls == stream_t, (L"~a is not a stream", stream));
{
struct strm_ops *ops = (struct strm_ops *) stream->co.ops;
@@ -623,7 +637,7 @@ obj_t *vformat(obj_t *stream, const char *str, va_list vl)
obj_t *vcformat(obj_t *stream, const char *string, va_list vl)
{
type_check (stream, COBJ);
- type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream));
+ type_assert (stream->co.cls == stream_t, (L"~a is not a stream", stream));
{
struct strm_ops *ops = (struct strm_ops *) stream->co.ops;
@@ -631,10 +645,10 @@ obj_t *vcformat(obj_t *stream, const char *string, va_list vl)
}
}
-obj_t *format(obj_t *stream, const char *str, ...)
+obj_t *format(obj_t *stream, const wchar_t *str, ...)
{
type_check (stream, COBJ);
- type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream));
+ type_assert (stream->co.cls == stream_t, (L"~a is not a stream", stream));
{
struct strm_ops *ops = (struct strm_ops *) stream->co.ops;
@@ -651,7 +665,7 @@ obj_t *format(obj_t *stream, const char *str, ...)
obj_t *cformat(obj_t *stream, const char *string, ...)
{
type_check (stream, COBJ);
- type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream));
+ type_assert (stream->co.cls == stream_t, (L"~a is not a stream", stream));
{
struct strm_ops *ops = (struct strm_ops *) stream->co.ops;
@@ -668,7 +682,7 @@ obj_t *cformat(obj_t *stream, const char *string, ...)
obj_t *put_string(obj_t *stream, obj_t *string)
{
type_check (stream, COBJ);
- type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream));
+ type_assert (stream->co.cls == stream_t, (L"~a is not a stream", stream));
{
struct strm_ops *ops = (struct strm_ops *) stream->co.ops;
@@ -676,10 +690,10 @@ obj_t *put_string(obj_t *stream, obj_t *string)
}
}
-obj_t *put_cstring(obj_t *stream, const char *str)
+obj_t *put_cstring(obj_t *stream, const wchar_t *str)
{
type_check (stream, COBJ);
- type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream));
+ type_assert (stream->co.cls == stream_t, (L"~a is not a stream", stream));
{
struct strm_ops *ops = (struct strm_ops *) stream->co.ops;
@@ -690,7 +704,7 @@ obj_t *put_cstring(obj_t *stream, const char *str)
obj_t *put_char(obj_t *stream, obj_t *ch)
{
type_check (stream, COBJ);
- type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream));
+ type_assert (stream->co.cls == stream_t, (L"~a is not a stream", stream));
{
struct strm_ops *ops = (struct strm_ops *) stream->co.ops;
@@ -701,7 +715,7 @@ obj_t *put_char(obj_t *stream, obj_t *ch)
obj_t *put_cchar(obj_t *stream, int ch)
{
type_check (stream, COBJ);
- type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream));
+ type_assert (stream->co.cls == stream_t, (L"~a is not a stream", stream));
{
struct strm_ops *ops = (struct strm_ops *) stream->co.ops;
@@ -717,7 +731,7 @@ obj_t *put_line(obj_t *stream, obj_t *string)
void stream_init(void)
{
protect(&std_input, &std_output, &std_error, (obj_t **) 0);
- std_input = make_stdio_stream(stdin, string("stdin"), t, nil);
- std_output = make_stdio_stream(stdout, string("stdout"), nil, t);
- std_error = make_stdio_stream(stderr, string("stderr"), nil, t);
+ std_input = make_stdio_stream(stdin, string(L"stdin"), t, nil);
+ std_output = make_stdio_stream(stdout, string(L"stdout"), nil, t);
+ std_error = make_stdio_stream(stderr, string(L"stderr"), nil, t);
}
diff --git a/stream.h b/stream.h
index ba483b27..78f83f93 100644
--- a/stream.h
+++ b/stream.h
@@ -35,13 +35,13 @@ obj_t *make_dir_stream(DIR *);
obj_t *close_stream(obj_t *stream, obj_t *throw_on_error);
obj_t *get_line(obj_t *);
obj_t *get_char(obj_t *);
-obj_t *vformat(obj_t *stream, const char *string, va_list); /* nao-terminated */
+obj_t *vformat(obj_t *stream, const wchar_t *string, va_list); /* nao-term */
obj_t *vcformat(obj_t *stream, const char *string, va_list); /* printf-style */
-obj_t *format(obj_t *stream, const char *string, ...);
+obj_t *format(obj_t *stream, const wchar_t *string, ...);
obj_t *cformat(obj_t *stream, const char *string, ...);
obj_t *put_string(obj_t *stream, obj_t *string);
obj_t *put_line(obj_t *stream, obj_t *string);
-obj_t *put_cstring(obj_t *stream, const char *);
+obj_t *put_cstring(obj_t *stream, const wchar_t *);
obj_t *put_char(obj_t *stream, obj_t *ch);
obj_t *put_cchar(obj_t *stream, int ch);
diff --git a/txr.c b/txr.c
index c68b4d47..6192014b 100644
--- a/txr.c
+++ b/txr.c
@@ -31,17 +31,19 @@
#include <limits.h>
#include <dirent.h>
#include <setjmp.h>
+#include <wchar.h>
#include "lib.h"
#include "stream.h"
#include "gc.h"
#include "unwind.h"
#include "parser.h"
#include "match.h"
+#include "utf8.h"
#include "txr.h"
-const char *version = "020";
-const char *progname = "txr";
-const char *spec_file = "stdin";
+const wchar_t *version = L"020";
+const wchar_t *progname = L"txr";
+const wchar_t *spec_file = L"stdin";
obj_t *spec_file_str;
/*
@@ -51,7 +53,7 @@ obj_t *spec_file_str;
*/
void *oom_realloc_handler(void *old, size_t size)
{
- fprintf(stderr, "%s: out of memory\n", progname);
+ fprintf(stderr, "%ls: out of memory\n", progname);
puts("false");
abort();
}
@@ -60,13 +62,13 @@ void help(void)
{
const char *text =
"\n"
-"txr version %s\n"
+"txr version %ls\n"
"\n"
"copyright 2009, Kaz Kylheku <kkylheku@gmail.com>\n"
"\n"
"usage:\n"
"\n"
-" %s [ options ] query-file { data-file }*\n"
+" %ls [ options ] query-file { data-file }*\n"
"\n"
"The query-file or data-file arguments may be specified as -, in which case\n"
"standard input is used. All data-file arguments which begin with a !\n"
@@ -110,7 +112,7 @@ void help(void)
void hint(void)
{
- fprintf(stderr, "%s: incorrect arguments: try --help\n", progname);
+ fprintf(stderr, "%ls: incorrect arguments: try --help\n", progname);
}
obj_t *remove_hash_bang_line(obj_t *spec)
@@ -119,7 +121,7 @@ obj_t *remove_hash_bang_line(obj_t *spec)
return spec;
{
- obj_t *shbang = string("#!");
+ obj_t *shbang = string(L"#!");
obj_t *firstline = first(spec);
obj_t *items = rest(firstline);
@@ -138,7 +140,7 @@ static int txr_main(int argc, char **argv);
int main(int argc, char **argv)
{
obj_t *stack_bottom = nil;
- progname = argv[0] ? argv[0] : progname;
+ progname = argv[0] ? utf8_dup_from(argv[0]) : progname;
init(progname, oom_realloc_handler, &stack_bottom);
return txr_main(argc, argv);
}
@@ -149,7 +151,6 @@ static int txr_main(int argc, char **argv)
obj_t *spec = nil;
obj_t *bindings = nil;
int match_loglevel = opt_loglevel;
- progname = argv[0] ? argv[0] : progname;
prot1(&spec_file_str);
@@ -189,7 +190,7 @@ static int txr_main(int argc, char **argv)
val[piece] = 0;
- list = cons(string(val), list);
+ list = cons(string_utf8(val), list);
if (!comma_p)
break;
@@ -198,13 +199,13 @@ static int txr_main(int argc, char **argv)
}
list = nreverse(list);
- bindings = cons(cons(intern(string(var)), list), bindings);
+ bindings = cons(cons(intern(string_utf8(var)), list), bindings);
} else if (equals) {
char *val = equals + 1;
*equals = 0;
- bindings = cons(cons(intern(string(var)), string(val)), bindings);
+ bindings = cons(cons(intern(string_utf8(var)), string_utf8(val)), bindings);
} else {
- bindings = cons(cons(intern(string(var)), null_string), bindings);
+ bindings = cons(cons(intern(string_utf8(var)), null_string), bindings);
}
argc--, argv++;
@@ -212,7 +213,7 @@ static int txr_main(int argc, char **argv)
}
if (!strcmp(*argv, "--version")) {
- printf("%s: version %s\n", progname, version);
+ printf("%ls: version %ls\n", progname, version);
return 0;
}
@@ -227,7 +228,7 @@ static int txr_main(int argc, char **argv)
char opt = (*argv)[1];
if (argc == 1) {
- fprintf(stderr, "%s: option %c needs argument\n", progname, opt);
+ fprintf(stderr, "%ls: option %c needs argument\n", progname, opt);
return EXIT_FAILURE;
}
@@ -238,7 +239,7 @@ static int txr_main(int argc, char **argv)
case 'a':
val = strtol(*argv, &errp, 10);
if (*errp != 0) {
- fprintf(stderr, "%s: option %c needs numeric argument, not %s\n",
+ fprintf(stderr, "%ls: option %c needs numeric argument, not %s\n",
progname, opt, *argv);
return EXIT_FAILURE;
}
@@ -246,10 +247,10 @@ static int txr_main(int argc, char **argv)
opt_arraydims = val;
break;
case 'c':
- specstring = string(*argv);
+ specstring = string_utf8(*argv);
break;
case 'f':
- spec_file_str = string(*argv);
+ spec_file_str = string_utf8(*argv);
break;
}
@@ -279,14 +280,14 @@ static int txr_main(int argc, char **argv)
case 'a':
case 'c':
case 'D':
- fprintf(stderr, "%s: option -%c does not clump\n", progname, *popt);
+ fprintf(stderr, "%ls: option -%c does not clump\n", progname, *popt);
return EXIT_FAILURE;
case '-':
- fprintf(stderr, "%s: unrecognized long option: --%s\n",
+ fprintf(stderr, "%ls: unrecognized long option: --%s\n",
progname, popt + 1);
return EXIT_FAILURE;
default:
- fprintf(stderr, "%s: unrecognized option: %c\n", progname, *popt);
+ fprintf(stderr, "%ls: unrecognized option: %c\n", progname, *popt);
return EXIT_FAILURE;
}
}
@@ -296,22 +297,22 @@ static int txr_main(int argc, char **argv)
}
if (specstring && spec_file_str) {
- fprintf(stderr, "%s: cannot specify both -f and -c\n", progname);
+ fprintf(stderr, "%ls: cannot specify both -f and -c\n", progname);
return EXIT_FAILURE;
}
if (specstring) {
- spec_file = "cmdline";
+ spec_file = L"cmdline";
spec_file_str = string(spec_file);
yyin_stream = make_string_input_stream(specstring);
} else if (spec_file_str) {
- if (strcmp(c_str(spec_file_str), "-") != 0) {
- FILE *in = fopen(c_str(spec_file_str), "r");
+ if (wcscmp(c_str(spec_file_str), L"-") != 0) {
+ FILE *in = w_fopen(c_str(spec_file_str), L"r");
if (in == 0)
uw_throwcf(file_error, "unable to open %s", c_str(spec_file_str));
yyin_stream = make_stdio_stream(in, spec_file_str, t, nil);
} else {
- spec_file = "stdin";
+ spec_file = L"stdin";
}
} else {
if (argc < 1) {
@@ -323,10 +324,10 @@ static int txr_main(int argc, char **argv)
FILE *in = fopen(*argv, "r");
if (in == 0)
uw_throwcf(file_error, "unable to open %s", *argv);
- yyin_stream = make_stdio_stream(in, string(*argv), t, nil);
- spec_file = *argv;
+ yyin_stream = make_stdio_stream(in, string_utf8(*argv), t, nil);
+ spec_file = utf8_dup_from(*argv);
} else {
- spec_file = "stdin";
+ spec_file = L"stdin";
}
argc--, argv++;
spec_file_str = string(spec_file);
@@ -345,8 +346,8 @@ static int txr_main(int argc, char **argv)
opt_loglevel = match_loglevel;
if (opt_loglevel >= 2) {
- format(std_error, "spec:\n~s\n", spec, nao);
- format(std_error, "bindings:\n~s\n", bindings, nao);
+ format(std_error, L"spec:\n~s\n", spec, nao);
+ format(std_error, L"bindings:\n~s\n", bindings, nao);
}
{
@@ -354,7 +355,7 @@ static int txr_main(int argc, char **argv)
list_collect_decl(filenames, iter);
while (*argv)
- list_collect(iter, string(*argv++));
+ list_collect(iter, string_utf8(*argv++));
retval = extract(spec, filenames, bindings);
diff --git a/txr.h b/txr.h
index 51b4990f..d230cd28 100644
--- a/txr.h
+++ b/txr.h
@@ -28,6 +28,6 @@ extern int opt_loglevel;
extern int opt_nobindings;
extern int opt_arraydims;
extern int opt_gc_debug;
-extern const char *version;
-extern const char *progname;
+extern const wchar_t *version;
+extern const wchar_t *progname;
extern int output_produced;
diff --git a/unwind.c b/unwind.c
index e3cbe5e5..f209f71b 100644
--- a/unwind.c
+++ b/unwind.c
@@ -217,15 +217,15 @@ obj_t *uw_throw(obj_t *sym, obj_t *exception)
if (ex == 0) {
if (opt_loglevel >= 1) {
obj_t *s = stringp(exception);
- format(std_error, "~a: unhandled exception of type ~a:\n",
+ format(std_error, L"~a: unhandled exception of type ~a:\n",
prog_string, sym, nao);
- format(std_error, s ? "~a: ~a\n" : "~a: ~s\n",
+ format(std_error, s ? L"~a: ~a\n" : L"~a: ~s\n",
prog_string, exception, nao);
}
if (uw_exception_subtype_p(sym, query_error) ||
uw_exception_subtype_p(sym, file_error)) {
if (!output_produced)
- put_cstring(std_output, "false\n");
+ put_cstring(std_output, L"false\n");
exit(EXIT_FAILURE);
}
abort();
@@ -238,7 +238,7 @@ obj_t *uw_throw(obj_t *sym, obj_t *exception)
abort();
}
-obj_t *uw_throwf(obj_t *sym, const char *fmt, ...)
+obj_t *uw_throwf(obj_t *sym, const wchar_t *fmt, ...)
{
va_list vl;
obj_t *stream = make_string_output_stream();
@@ -251,7 +251,7 @@ obj_t *uw_throwf(obj_t *sym, const char *fmt, ...)
abort();
}
-obj_t *uw_errorf(const char *fmt, ...)
+obj_t *uw_errorf(const wchar_t *fmt, ...)
{
va_list vl;
obj_t *stream = make_string_output_stream();
@@ -290,7 +290,7 @@ obj_t *uw_errorcf(const char *fmt, ...)
abort();
}
-obj_t *type_mismatch(const char *fmt, ...)
+obj_t *type_mismatch(const wchar_t *fmt, ...)
{
va_list vl;
obj_t *stream = make_string_output_stream();
@@ -317,21 +317,21 @@ obj_t *uw_register_subtype(obj_t *sub, obj_t *sup)
if (sub == t) {
if (sup == t)
return sup;
- uw_throwf(type_error, "cannot define ~a as an exception subtype of ~a",
+ uw_throwf(type_error, L"cannot define ~a as an exception subtype of ~a",
sub, sup, nao);
}
if (sup == nil) {
- uw_throwf(type_error, "cannot define ~a as an exception subtype of ~a",
+ uw_throwf(type_error, L"cannot define ~a as an exception subtype of ~a",
sub, sup, nao);
}
if (uw_exception_subtype_p(sub, sup))
- uw_throwf(type_error, "~a is already an exception subtype of ~a",
+ uw_throwf(type_error, L"~a is already an exception subtype of ~a",
sub, sup, nao);
if (uw_exception_subtype_p(sup, sub))
- uw_throwf(type_error, "~a is already an exception supertype of ~a",
+ uw_throwf(type_error, L"~a is already an exception supertype of ~a",
sub, sup, nao);
/* If sup symbol not registered, then we make it
diff --git a/unwind.h b/unwind.h
index 74d3e920..8d8cf5ae 100644
--- a/unwind.h
+++ b/unwind.h
@@ -79,8 +79,8 @@ obj_t *uw_set_func(obj_t *sym, obj_t *value);
obj_t *uw_block_return(obj_t *tag, obj_t *result);
void uw_push_catch(uw_frame_t *, obj_t *matches);
noreturn obj_t *uw_throw(obj_t *sym, obj_t *exception);
-noreturn obj_t *uw_throwf(obj_t *sym, const char *fmt, ...);
-noreturn obj_t *uw_errorf(const char *fmt, ...);
+noreturn obj_t *uw_throwf(obj_t *sym, const wchar_t *fmt, ...);
+noreturn obj_t *uw_errorf(const wchar_t *fmt, ...);
noreturn obj_t *uw_throwcf(obj_t *sym, const char *fmt, ...);
noreturn obj_t *uw_errorcf(const char *fmt, ...);
obj_t *uw_register_subtype(obj_t *sub, obj_t *super);
@@ -89,7 +89,7 @@ void uw_continue(uw_frame_t *curr, uw_frame_t *target);
void uw_pop_frame(uw_frame_t *);
void uw_init(void);
-noreturn obj_t *type_mismatch(const char *, ...);
+noreturn obj_t *type_mismatch(const wchar_t *, ...);
#define uw_block_begin(TAG, RESULTVAR) \
obj_t *RESULTVAR = nil; \
diff --git a/utf8.c b/utf8.c
new file mode 100644
index 00000000..a3a23e8e
--- /dev/null
+++ b/utf8.c
@@ -0,0 +1,168 @@
+/* Copyright 2009
+ * Kaz Kylheku <kkylheku@gmail.com>
+ * Vancouver, Canada
+ * All rights reserved.
+ *
+ * BSD License:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "lib.h"
+
+size_t utf8_from(wchar_t *wdst, const unsigned char *src)
+{
+ size_t nchar = 1;
+ enum { init, more1, more2, more3 } state;
+ const char *backtrack = 0;
+ int ch;
+ wchar_t wch = 0;
+
+ for (state = init; (ch = *src); src++) {
+ switch (state) {
+ case init:
+ if (ch < 0x80) {
+ if (wdst)
+ *wdst++ = ch;
+ nchar++;
+ } else if (ch >= 0xc2 && ch <= 0xe0) {
+ state = more1;
+ wch = (ch & 0x1f);
+ } else if (ch >= 0xe0 && ch <= 0xef) {
+ state = more2;
+ wch = (ch & 0xf);
+ } else if (ch >= 0xf0 && ch < 0xf5) {
+ state = more3;
+ wch = (ch & 0x7);
+ } else {
+ if (wdst)
+ *wdst++ = 0xdc00 | ch;
+ nchar++;
+ }
+ backtrack = src;
+ break;
+ case more1:
+ case more2:
+ case more3:
+ if (ch >= 0x80 && ch < 0xc0) {
+ wch <<= 6;
+ wch |= (ch & 0x3f);
+ if (wdst)
+ *wdst++ = wch;
+ nchar++;
+ state--;
+ } else {
+ src = backtrack;
+ if (wdst)
+ *wdst++ = 0xdc00 | *src;
+ nchar++;
+ state = init;
+ }
+ break;
+ }
+ }
+
+ if (state != init) {
+ if (wdst)
+ *wdst++ = 0xdc00 | *backtrack;
+ nchar++;
+ }
+
+ if (wdst)
+ *wdst++ = 0;
+ return nchar;
+}
+
+size_t utf8_to(unsigned char *dst, const wchar_t *wsrc)
+{
+ size_t nbyte = 1;
+ wchar_t wch;
+
+ while ((wch = *wsrc++)) {
+ if (wch < 0x80) {
+ nbyte += 1;
+ if (dst)
+ *dst++ = wch;
+ } else if (wch < 0x800) {
+ nbyte += 2;
+ if (dst) {
+ *dst++ = 0xC0 | (wch >> 6);
+ *dst++ = 0x80 | (wch & 0x3F);
+ }
+ } else if (wch < 0x10000) {
+ nbyte += 3;
+ if (dst) {
+ *dst++ = 0xE0 | (wch >> 12);
+ *dst++ = 0x80 | ((wch >> 6) & 0x3F);
+ *dst++ = 0x80 | (wch & 0x3F);
+ }
+ } else if (wch < 0x110000) {
+ nbyte += 4;
+ if (dst) {
+ *dst++ = 0xF0 | (wch >> 18);
+ *dst++ = 0x80 | ((wch >> 12) & 0x3F);
+ *dst++ = 0x80 | ((wch >> 6) & 0x3F);
+ *dst++ = 0x80 | (wch & 0x3F);
+ }
+ }
+ }
+
+ if (dst)
+ *dst++ = 0;
+ return nbyte;
+}
+
+wchar_t *utf8_dup_from(const unsigned char *str)
+{
+ size_t nchar = utf8_from(0, str);
+ wchar_t *wstr = chk_malloc(sizeof *wstr * nchar);
+ utf8_from(wstr, str);
+ return wstr;
+}
+
+unsigned char *utf8_dup_to(const wchar_t *wstr)
+{
+ size_t nbyte = utf8_to(0, wstr);
+ unsigned char *str = chk_malloc(nbyte);
+ utf8_to(str, wstr);
+ return str;
+}
+
+FILE *w_fopen(const wchar_t *wname, const wchar_t *wmode)
+{
+ char *name = (char *) utf8_dup_to(wname);
+ char *mode = (char *) utf8_dup_to(wmode);
+ FILE *f = fopen(name, mode);
+ free(name);
+ free(mode);
+ return f;
+}
+
+FILE *w_popen(const wchar_t *wcmd, const wchar_t *wmode)
+{
+ char *cmd = (char *) utf8_dup_to(wcmd);
+ char *mode = (char *) utf8_dup_to(wmode);
+ FILE *f = popen(cmd, mode);
+ free(cmd);
+ free(mode);
+ return f;
+}
diff --git a/utf8.h b/utf8.h
new file mode 100644
index 00000000..28e67fe2
--- /dev/null
+++ b/utf8.h
@@ -0,0 +1,32 @@
+/* Copyright 2009
+ * Kaz Kylheku <kkylheku@gmail.com>
+ * Vancouver, Canada
+ * All rights reserved.
+ *
+ * BSD License:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+size_t utf8_from(wchar_t *, const unsigned char *);
+size_t utf8_to(unsigned char *, const wchar_t *);
+wchar_t *utf8_dup_from(const unsigned char *);
+unsigned char *utf8_dup_to(const wchar_t *);
+FILE *w_fopen(const wchar_t *, const wchar_t *);
+FILE *w_popen(const wchar_t *, const wchar_t *);