diff options
-rw-r--r-- | Makefile | 1 | ||||
-rwxr-xr-x | configure | 28 | ||||
-rw-r--r-- | gzio.c | 511 | ||||
-rw-r--r-- | gzio.h | 33 | ||||
-rw-r--r-- | stream.c | 59 | ||||
-rw-r--r-- | stream.h | 8 |
6 files changed, 631 insertions, 9 deletions
@@ -64,6 +64,7 @@ OBJS-$(have_ftw) += ftw.o OBJS-$(have_posix_sigs) += signal.o OBJS-$(have_sockets) += socket.o OBJS-$(have_termios) += termios.o +OBJS-$(have_zlib) += gzio.o EXTRA_OBJS-$(add_win_res) += win/txr.res STDLIB_SRCS := $(wildcard stdlib/*.tl) @@ -185,6 +185,7 @@ txr_dbg_opts=--gc-debug valgrind= extra_debugging= debug_support=y +have_zlib= big_time= big_time_given= gen_gc=y @@ -949,6 +950,9 @@ termios_define := $termios_define # do we compile in debug support? debug_support := $debug_support +# do we compile in zlib? +have_zlib := $have_zlib + # allow parallel make? parallelmake := $parallelmake @@ -3956,6 +3960,30 @@ else printf "no\n" fi +printf "Checking for zlib ... " +cat > conftest.c <<! +#include <zlib.h> + +int main(void) +{ + gzFile gf = gzopen("foo.gz", "r"); + gzclose(gf); + return 0; +} +! + +if conftest ; then + printf "yes\n" + printf "#define HAVE_ZLIB 1\n" >> config.h + have_zlib=y +elif conftest EXTRA_LDLIBS="-lz" ; then + printf "yes\n" + printf "#define HAVE_ZLIB 1\n" >> config.h + conf_ldlibs="${conf_ldlibs:+"$conf_ldlibs "}-lz" + have_zlib=y +else + printf "no\n" +fi # # Dependent variables @@ -0,0 +1,511 @@ +/* Copyright 2022 + * Kaz Kylheku <kaz@kylheku.com> + * Vancouver, Canada + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <wchar.h> +#include <signal.h> +#include <errno.h> +#include <zlib.h> +#include "config.h" +#include "alloca.h" +#include "lib.h" +#include "stream.h" +#include "gc.h" +#include "args.h" +#include "utf8.h" +#include "eval.h" +#include "signal.h" +#include "unwind.h" +#include "sysif.h" +#include "itypes.h" +#include "gzio.h" + +struct gzio_handle { + struct strm_base a; + gzFile f; + val descr; + val unget_c; + utf8_decoder_t ud; + val err, errstr; + char *buf; + int fd; + unsigned is_real_time : 8; + unsigned is_byte_oriented : 8; + unsigned is_output : 8; +}; + +struct cobj_class *gzio_stream_cls; + +static void gzio_stream_destroy(val stream) +{ + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + close_stream(stream, nil); + strm_base_cleanup(&h->a); + free(h->buf); + free(h); +} + +static void gzio_stream_mark(val stream) +{ + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + strm_base_mark(&h->a); + gc_mark(h->descr); + gc_mark(h->err); + gc_mark(h->errstr); +} + +static val gzio_maybe_read_error(val stream) +{ + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + const char *gztxt; + int gzerr; + + if (h->f == 0) { + uw_throwf(file_error_s, lit("error reading ~s: file closed"), stream, nao); + } else if (gzeof(h->f)) { + h->err = t; + h->errstr = lit("eof"); + } else if ((gztxt = gzerror(h->f, &gzerr)) != 0 && gzerr != Z_OK) { + if (gzerr == Z_ERRNO) { + int eno = errno; + h->err = num(eno); + h->errstr = nil; +#ifdef EAGAIN + if (errno == EAGAIN) + uw_ethrowf(timeout_error_s, lit("timed out reading ~s"), stream, nao); +#endif + uw_ethrowf(file_error_s, lit("error reading ~s: ~d/~s"), + stream, h->err, errno_to_string(h->err), nao); + } else { + h->err = negone; + h->errstr = string_utf8(gztxt); + } + } else { + h->err = nil; + h->errstr = lit("no error"); + } + + return nil; +} + +static val gzio_maybe_error(val stream, val action) +{ + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + val err = num(errno); + if (h->f == 0) + uw_ethrowf(file_error_s, lit("error ~a ~s: file closed"), action, stream, nao); + h->err = err; +#ifdef EAGAIN + if (errno == EAGAIN) + uw_ethrowf(timeout_error_s, lit("timed out on ~s"), stream, nao); +#endif + uw_ethrowf(file_error_s, lit("error ~a ~s: ~d/~s"), + action, stream, err, errno_to_string(err), nao); +} + +static val gzio_get_error(val stream) +{ + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + if (h->f != 0 && gzeof(h->f)) + return t; + return h->err; +} + +static val gzio_get_error_str(val stream) +{ + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + + if (h->f != 0 && gzeof(h->f)) + return lit("eof"); + return h->errstr; +} + +static val gzio_clear_error(val stream) +{ + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + val ret = h->err; + if (h->f != 0) + gzclearerr(h->f); + h->err = h->errstr = lit("no error"); + return ret; +} + +static val gzio_get_fd(val stream) +{ + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + return (h->f && h->fd != -1) ? num(h->fd) : nil; +} + +static int se_gzputc(int ch, gzFile f) +{ + int ret; + sig_save_enable; + ret = gzputc(f, ch); + sig_restore_enable; + return ret; +} + +static int se_gzgetc(gzFile f) +{ + int ret; + sig_save_enable; + ret = gzgetc(f); + sig_restore_enable; + return ret; +} + +static int gzio_get_char_callback(mem_t *f) +{ + return se_gzgetc(coerce(gzFile, f)); +} + +static val gzio_get_char(val stream) +{ + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + + if (h->unget_c) + return rcyc_pop(&h->unget_c); + + if (h->f) { + wint_t ch; + + if (h->is_byte_oriented) { + ch = se_gzgetc(h->f); + if (ch == 0) + ch = 0xDC00; + } else { + ch = utf8_decode(&h->ud, gzio_get_char_callback, + coerce(mem_t *, h->f)); + } + + return (ch != WEOF) ? chr(ch) : gzio_maybe_read_error(stream); + } + return gzio_maybe_read_error(stream); +} + +static val gzio_get_byte(val stream) +{ + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + + if (h->f) { + int ch = se_gzgetc(h->f); + return (ch != EOF) ? num(ch) : gzio_maybe_read_error(stream); + } + return gzio_maybe_read_error(stream); +} + +static val gzio_unget_char(val stream, val ch) +{ + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + mpush(ch, mkloc(h->unget_c, stream)); + return ch; +} + +static val gzio_unget_byte(val stream, int byte) +{ + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + + errno = 0; + return h->f != 0 && gzungetc(byte, coerce(gzFile, h->f)) != EOF + ? num_fast(byte) + : gzio_maybe_error(stream, lit("writing")); +} + +static ucnum gzio_fill_buf(val stream, mem_t *ptr, ucnum len, ucnum pos) +{ + val self = lit("fill-buf"); + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + if (convert(size_t, len) != len || len > INT_PTR_MAX) + uw_throwf(error_s, lit("~a: buffer too large"), self, nao); + if (pos >= len) + return len; + errno = 0; + if (h->f != 0) { + cnum nread = gzread(h->f, ptr + pos, len - pos); + if (nread > 0) + return pos + nread; + } + gzio_maybe_read_error(stream); + return pos; +} + +static val gzio_close(val stream, val throw_on_error) +{ + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + + if (h->f != 0) { + int result = gzclose(h->f); + h->f = 0; + if (result != Z_OK) { + if (default_null_arg(throw_on_error)) + gzio_maybe_error(stream, lit("closing")); + return nil; + } + return t; + } + return nil; +} + +static val num_z_off_t(z_off_t off) +{ + if (sizeof (off) <= sizeof (cnum)) { + return num(off); + } else if (NUM_MIN <= off && off <= NUM_MAX) { + return num(off); + } else if (sizeof (off) <= sizeof (i64_t)) { + return num_64(off); + } else { + internal_error("portme: unsupported z_off_t size"); + } +} +static z_off_t z_off_t_num(val num, val self) +{ + switch (CHAR_BIT * sizeof(z_off_t)) { + case 32: + return c_i32(num, self); + case 64: + return c_i64(num, self); + default: + internal_error("portme: unsupported z_off_t size"); + } +} +static val gzio_seek(val stream, val offset, enum strm_whence whence) +{ + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + val self = lit("seek-stream"); + + errno = 0; + + if (h->f != 0) { + if (offset == zero && whence == strm_cur) { + return num_z_off_t(gztell(h->f)); + } else { + if (gzseek(h->f, z_off_t_num(offset, self), whence) >= 0) { + if (!h->is_output) + utf8_decoder_init(&h->ud); + h->unget_c = nil; + return t; + } + } + } + + return gzio_maybe_error(stream, lit("seeking")); +} + +static int gzio_put_char_callback(int ch, mem_t *f) +{ + int ret = se_gzputc(ch, coerce(gzFile, f)) != EOF; + return ret; +} + +static val gzio_put_string(val stream, val str) +{ + val self = lit("put-string"); + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + + errno = 0; + + if (h->f != 0) { + const wchar_t *s = c_str(str, self); + + while (*s) { + if (!utf8_encode(*s++, gzio_put_char_callback, coerce(mem_t *, h->f))) + return gzio_maybe_error(stream, lit("writing")); + } + return t; + } + return gzio_maybe_error(stream, lit("writing")); +} + +static val gzio_put_char(val stream, val ch) +{ + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + errno = 0; + return h->f != 0 && utf8_encode(c_chr(ch), gzio_put_char_callback, + coerce(mem_t *, h->f)) + ? t : gzio_maybe_error(stream, lit("writing")); +} + +static val gzio_put_byte(val stream, int b) +{ + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + errno = 0; + return h->f != 0 && se_gzputc(b, coerce(gzFile, h->f)) != EOF + ? t : gzio_maybe_error(stream, lit("writing")); +} + +static ucnum gzio_put_buf(val stream, mem_t *ptr, ucnum len, ucnum pos) +{ + val self = lit("put-buf"); + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + if (convert(size_t, len) != len || len > INT_PTR_MAX) + uw_throwf(error_s, lit("~a: buffer too large"), self, nao); + if (pos >= len) + return len; + errno = 0; + if (h->f != 0) { + cnum nwrit = gzwrite(h->f, ptr + pos, len - pos); + if (nwrit > 0) + return pos + nwrit; + } + gzio_maybe_error(stream, lit("writing")); + return 0; +} + +static val gzio_get_prop(val stream, val ind) +{ + if (ind == name_k) { + struct strm_ops *ops = coerce(struct strm_ops *, stream->co.ops); + val name = static_str(ops->name); + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + return format(nil, lit("~a ~a"), name, h->descr, nao); + } + return nil; +} + +static val gzio_set_prop(val stream, val ind, val prop) +{ + if (ind == name_k) { + struct gzio_handle *h = coerce(struct gzio_handle *, stream->co.handle); + h->descr = prop; + return t; + } + return nil; +} + +static struct strm_ops gzio_ops_rd = + strm_ops_init(cobj_ops_init(eq, + stream_print_op, + gzio_stream_destroy, + gzio_stream_mark, + cobj_eq_hash_op), + wli("gzip-input-stream"), + 0, + 0, + 0, + generic_get_line, + gzio_get_char, + gzio_get_byte, + gzio_unget_char, + gzio_unget_byte, + 0, + gzio_fill_buf, + gzio_close, + 0, + gzio_seek, + 0, + gzio_get_prop, + gzio_set_prop, + gzio_get_error, + gzio_get_error_str, + gzio_clear_error, + gzio_get_fd); + +static struct strm_ops gzio_ops_wr = + strm_ops_init(cobj_ops_init(eq, + stream_print_op, + gzio_stream_destroy, + gzio_stream_mark, + cobj_eq_hash_op), + wli("gzip-output-stream"), + gzio_put_string, + gzio_put_char, + gzio_put_byte, + 0, + 0, + 0, + 0, + 0, + gzio_put_buf, + 0, + gzio_close, + 0, + gzio_seek, + 0, + gzio_get_prop, + gzio_set_prop, + gzio_get_error, + gzio_get_error_str, + gzio_clear_error, + gzio_get_fd); + +void gzio_init(void) +{ + fill_stream_ops(&gzio_ops_rd); + fill_stream_ops(&gzio_ops_wr); + gzio_stream_s = intern(lit("gzip-stream"), user_package); + gzio_stream_cls = cobj_register_super(gzio_stream_s, stream_cls); +} + +gzFile w_gzopen_mode(const wchar_t *wname, const wchar_t *wmode, + const struct stdio_mode m, val self) +{ + if (m.buforder >= 0 || m.nonblock || m.notrunc || m.unbuf || + m.linebuf || m.interactive) + { + uw_throwf(file_error_s, + lit("~a: invalid modes for gzip stream"), self, nao); + } + + if (m.read && m.write) { + uw_throwf(file_error_s, + lit("~a: gzip stream cannot both read and write"), self, nao); + } + { + char *name = utf8_dup_to(wname); + char *mode = utf8_dup_to(wmode); + gzFile f = gzopen(name, mode); + free(name); + free(mode); + return f; + } +} + +val make_gzio_stream(gzFile f, int fd, val descr, int is_output) +{ + struct gzio_handle *h = coerce(struct gzio_handle *, chk_malloc(sizeof *h)); + val stream = cobj(coerce(mem_t *, h), gzio_stream_cls, + if3(is_output, + &gzio_ops_wr.cobj_ops, &gzio_ops_rd.cobj_ops)); + strm_base_init(&h->a); + h->f = f; + h->fd = fd; + h->descr = descr; + h->unget_c = nil; + utf8_decoder_init(&h->ud); + h->err = nil; + h->errstr = lit("no error"); + h->buf = 0; + h->is_real_time = 0; + h->is_byte_oriented = 0; + h->is_output = is_output; + return stream; +} @@ -0,0 +1,33 @@ +/* Copyright 2022 + * Kaz Kylheku <kaz@kylheku.com> + * Vancouver, Canada + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +val gzio_stream_s; +void gzio_init(void); +gzFile w_gzopen_mode(const wchar_t *wname, const wchar_t *wmode, + const struct stdio_mode m, val self); +val make_gzio_stream(gzFile f, int fd, val descr, int is_output); @@ -58,6 +58,9 @@ #if HAVE_WSPAWN || HAVE_SPAWN #include <process.h> #endif +#if HAVE_ZLIB +#include <zlib.h> +#endif #include "alloca.h" #include "lib.h" #include "gc.h" @@ -71,6 +74,9 @@ #include "regex.h" #include "txr.h" #include "buf.h" +#if HAVE_ZLIB +#include "gzio.h" +#endif #define max(a, b) ((a) > (b) ? (a) : (b)) #define min(a, b) ((a) < (b) ? (a) : (b)) @@ -1574,6 +1580,13 @@ static struct stdio_mode do_parse_mode(val mode_str, struct stdio_mode m_dfl, nredir++; break; } + case 'z': + m.gzip = 1; + if (isdigit(convert(unsigned char, ms[1]))) { + m.gzlevel = *++ms - '0'; + break; + } + break; default: m.malformed = 1; return m; @@ -1616,11 +1629,14 @@ static val format_mode(const struct stdio_mode m) *ptr++ = '+'; } - if (m.binary) + if (m.binary && !m.gzip) *ptr++ = 'b'; + if (m.gzip && m.gzlevel) + *ptr++ = '0' + m.gzlevel; + #ifdef __CYGWIN__ - if (!m.binary && (opt_compat == 144 || opt_compat == 145)) + if (!m.gzip && !m.binary && (opt_compat == 144 || opt_compat == 145)) *ptr++ = 't'; #endif @@ -4219,15 +4235,42 @@ val open_file(val path, val mode_str) val self = lit("open-file"); struct stdio_mode m, m_r = stdio_mode_init_r; val norm_mode = normalize_mode(&m, mode_str, m_r, self); - FILE *f = w_fopen_mode(c_str(path, self), c_str(norm_mode, self), m); - if (!f) { +again: + if (!m.gzip) { + FILE *f = w_fopen_mode(c_str(path, self), c_str(norm_mode, self), m); + + if (!f) + goto error; + + return set_mode_props(m, make_stdio_stream(f, path)); + } else { +#if HAVE_ZLIB + gzFile f = w_gzopen_mode(c_str(path, self), c_str(norm_mode, self), + m, self); + + if (!f) + goto error; + + if (m.read && gzdirect(f)) { + gzclose(f); + m.gzip = 0; + goto again; + } + + return make_gzio_stream(f, -1, path, m.write); +#else + uw_ethrowf(file_error_s, lit("~s: not built with zlib support"), + self, nao); +#endif + } + +error: + { int eno = errno; uw_ethrowf(errno_to_file_error(eno), lit("error opening ~s: ~d/~s"), path, num(eno), errno_to_str(eno), nao); } - - return set_mode_props(m, make_stdio_stream(f, path)); } val open_fileno(val fd, val mode_str) @@ -5698,6 +5741,10 @@ void stream_init(void) } } #endif + +#if HAVE_ZLIB + gzio_init(); +#endif } void stream_compat_fixup(int compat_ver) @@ -116,13 +116,15 @@ struct stdio_mode { unsigned interactive : 1; unsigned unbuf : 1; unsigned linebuf : 1; + unsigned gzip : 1; + unsigned gzlevel : 4; int buforder : 5; int redir[STDIO_MODE_NREDIRS][2]; }; -#define stdio_mode_init_blank { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, { { 0 } } } -#define stdio_mode_init_r { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, { { 0 } } } -#define stdio_mode_init_rpb { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, -1, { { 0 } } } +#define stdio_mode_init_blank { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, { { 0 } } } +#define stdio_mode_init_r { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, { { 0 } } } +#define stdio_mode_init_rpb { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, -1, { { 0 } } } #define std_input (deref(lookup_var_l(nil, stdin_s))) #define std_output (deref(lookup_var_l(nil, stdout_s))) |