summaryrefslogtreecommitdiffstats
path: root/hash.h
blob: df2ca0b3ebcf43148319e11a967a6e2c047b80fd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/* Copyright 2009-2019
 * Kaz Kylheku <kaz@kylheku.com>
 * Vancouver, Canada
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

extern val weak_keys_k, weak_vals_k, equal_based_k, eql_based_k, userdata_k;

ucnum equal_hash(val obj, int *count, ucnum);
val make_seeded_hash(val weak_keys, val weak_vals, val equal_based, val seed);
val make_hash(val weak_keys, val weak_vals, val equal_based);
val make_similar_hash(val existing);
val copy_hash(val existing);
val gethash_c(val self, val hash, val key, loc new_p);
val gethash_e(val self, val hash, val key);
val gethash(val hash, val key);
val inhash(val hash, val key, val init);
val gethash_n(val hash, val key, val notfound_val);
val gethash_f(val self, val hash, val key, loc found);
val sethash(val hash, val key, val value);
val pushhash(val hash, val key, val value);
val remhash(val hash, val key);
val clearhash(val hash);
val hash_count(val hash);
val get_hash_userdata(val hash);
val set_hash_userdata(val hash, val data);
val hashp(val obj);
val maphash(val func, val hash);
val hash_begin(val hash);
val hash_next(val iter);
val hash_eql(val obj);
val hash_equal(val obj, val seed);
val hashv(struct args *args);
val hashl(val args);
val hash_construct(val hashl_args, val pairs);
val hash_from_pairs_v(val pairs, struct args *hashv_args);
val hash_list(val keys, struct args *hashv_args);
val group_by(val func, val seq, struct args *hashv_args);
val group_reduce(val hash, val by_fun, val reduce_fun, val seq,
                 val initval, val filter_fun);
val hash_keys(val hash);
val hash_values(val hash);
val hash_pairs(val hash);
val hash_alist(val hash);
val hash_uni(val hash1, val hash2, val join_func);
val hash_diff(val hash1, val hash2);
val hash_isec(val hash1, val hash2, val join_func);
val hash_subset(val hash1, val hash2);
val hash_proper_subset(val hash1, val hash2);
val hash_update(val hash, val fun);
val hash_update_1(val hash, val key, val fun, val init);
val hash_revget(val hash, val value, val test, val keyfun);

void hash_process_weak(void);

INLINE loc gethash_l(val self, val hash, val key, loc new_p)
{
  return cdr_l(gethash_c(self, hash, key, new_p));
}

void hash_init(void);
racter that needs full unicode support")); } #endif size_t utf8_from_buf(wchar_t *wdst, const unsigned char *src, size_t nbytes) { size_t nchar = 1; enum utf8_state state = utf8_init; const unsigned char *backtrack = 0; wchar_t wch = 0, wch_min = 0; while (nbytes-- > 0) { int ch = *src++; switch (state) { case utf8_init: switch (ch >> 4) { case 0x0: case 0x1: case 0x2: case 0x3: case 0x4: case 0x5: case 0x6: case 0x7: if (wdst) { if (ch) *wdst++ = ch; else *wdst++ = 0xDC00; } nchar++; break; case 0xC: case 0xD: state = utf8_more1; wch = (ch & 0x1F); wch_min = 0x80; break; case 0xE: state = utf8_more2; wch = (ch & 0xF); wch_min = 0x800; break; case 0xF: #ifdef FULL_UNICODE if (ch < 0xF5) { state = utf8_more3; wch = (ch & 0x7); wch_min = 0x10000; break; } /* fallthrough */ #else conversion_error(); #endif default: if (wdst) *wdst++ = 0xDC00 | ch; nchar++; break; } backtrack = src; break; case utf8_more1: case utf8_more2: case utf8_more3: if (ch >= 0x80 && ch < 0xC0) { wch <<= 6; wch |= (ch & 0x3F); state = convert(enum utf8_state, state - 1); if (state == utf8_init) { if (wch < wch_min || (wch <= 0xFFFF && (wch & 0xFF00) == 0xDC00) || (wch > 0x10FFFF)) { src = backtrack; if (wdst) *wdst++ = 0xDC00 | *src; } else { if (wdst) *wdst++ = wch; } nchar++; } } else { src = backtrack; if (wdst) *wdst++ = 0xDC00 | *src; nchar++; state = utf8_init; } break; } } if (wdst) *wdst++ = 0; return nchar; } size_t utf8_from(wchar_t *wdst, const char *src) { size_t nbytes = strlen(src); return utf8_from_buf(wdst, coerce(const unsigned char *, src), nbytes); } size_t utf8_to_buf(unsigned char *dst, const wchar_t *wsrc, int null_term) { size_t nbyte = 0; wchar_t wch; while ((wch = *wsrc++)) { if (wch < 0x80) { nbyte += 1; if (dst) *dst++ = wch; } else if (wch < 0x800) { nbyte += 2; if (dst) { *dst++ = 0xC0 | (wch >> 6); *dst++ = 0x80 | (wch & 0x3F); } } else if (wch < 0x10000) { if ((wch & 0xFF00) == 0xDC00) { nbyte += 1; if (dst) *dst++ = (wch & 0xFF); } else { nbyte += 3; if (dst) { *dst++ = 0xE0 | (wch >> 12); *dst++ = 0x80 | ((wch >> 6) & 0x3F); *dst++ = 0x80 | (wch & 0x3F); } } } else if (wch < 0x110000) { nbyte += 4; if (dst) { *dst++ = 0xF0 | (wch >> 18); *dst++ = 0x80 | ((wch >> 12) & 0x3F); *dst++ = 0x80 | ((wch >> 6) & 0x3F); *dst++ = 0x80 | (wch & 0x3F); } } } if (null_term) { if (dst) *dst++ = 0; nbyte++; } return nbyte; } size_t utf8_to(char *dst, const wchar_t *wsrc) { return utf8_to_buf(coerce(unsigned char *, dst), wsrc, 1); } wchar_t *utf8_dup_from(const char *str) { size_t len = strlen(str); size_t nchar = utf8_from_buf(0, coerce(const unsigned char *, str), len); wchar_t *wstr = chk_wmalloc(nchar); utf8_from_buf(wstr, coerce(const unsigned char *, str), len); return wstr; } wchar_t *utf8_dup_from_buf(const char *str, size_t size) { size_t nchar = utf8_from_buf(0, coerce(const unsigned char *, str), size); wchar_t *wstr = chk_wmalloc(nchar); utf8_from_buf(wstr, coerce(const unsigned char *, str), size); return wstr; } unsigned char *utf8_dup_to_buf(const wchar_t *wstr, size_t *pnbytes, int null_term) { size_t nbyte = utf8_to_buf(0, wstr, null_term); unsigned char *str = chk_malloc(nbyte); utf8_to_buf(str, wstr, null_term); *pnbytes = nbyte; return str; } char *utf8_dup_to(const wchar_t *wstr) { size_t len = utf8_to(0, wstr) - 1; char *str = coerce(char *, chk_malloc(len + 1)); utf8_to(str, wstr); str[len] = 0; if (strlen(str) != len) { free(str); uw_throw(error_s, lit("Cannot convert string with embedded NUL to UTF-8 string")); } return str; } int utf8_encode(wchar_t wch, int (*put)(int ch, mem_t *ctx), mem_t *ctx) { if (wch < 0x80) { return put(wch, ctx); } else if (wch < 0x800) { return put(0xC0 | (wch >> 6), ctx) && put(0x80 | (wch & 0x3F), ctx); } else if (wch < 0x10000) { if ((wch & 0xFF00) == 0xDC00) { return put(wch & 0xFF, ctx); } else { return put(0xE0 | (wch >> 12), ctx) && put(0x80 | ((wch >> 6) & 0x3F), ctx) && put(0x80 | (wch & 0x3F), ctx); } } else if (wch < 0x110000) { return put(0xF0 | (wch >> 18), ctx) && put(0x80 | ((wch >> 12) & 0x3F), ctx) && put(0x80 | ((wch >> 6) & 0x3F), ctx) && put(0x80 | (wch & 0x3F), ctx); } uw_throwf(error_s, lit("cannot convert character value #x~x to UTF-8"), num(wch), nao); } void utf8_decoder_init(utf8_decoder_t *ud) { ud->state = utf8_init; ud->flags = 0; ud->wch = 0; ud->head = ud->tail = ud->back = 0; } wint_t utf8_decode(utf8_decoder_t *ud, int (*get)(mem_t *ctx), mem_t *ctx) { for (;;) { int ch; if (ud->tail != ud->head) { ch = ud->buf[ud->tail]; ud->tail = (ud->tail + 1) % 8; } else { ch = get(ctx); ud->buf[ud->head] = ch; ud->head = ud->tail = (ud->head + 1) % 8; } if (ch == EOF) { if (ud->state == utf8_init) { return WEOF; } else { wchar_t wch = 0xDC00 | ud->buf[ud->back]; ud->tail = ud->back = (ud->back + 1) % 8; ud->state = utf8_init; return wch; } } switch (ud->state) { case utf8_init: switch (ch >> 4) { case 0x0: case 0x1: case 0x2: case 0x3: case 0x4: case 0x5: case 0x6: case 0x7: ud->back = ud->tail; if (ch == 0 && (ud->flags & UTF8_ADMIT_NUL) == 0) return 0xDC00; return ch; case 0xC: case 0xD: ud->state = utf8_more1; ud->wch = (ch & 0x1F); ud->wch_min = 0x80; break; case 0xE: ud->state = utf8_more2; ud->wch = (ch & 0xF); ud->wch_min = 0x800; break; case 0xF: #ifdef FULL_UNICODE if (ch < 0xF5) { ud->state = utf8_more3; ud->wch = (ch & 0x7); ud->wch_min = 0x100000; break; } /* fallthrough */ #else conversion_error(); #endif default: ud->back = ud->tail; return 0xDC00 | ch; } break; case utf8_more1: case utf8_more2: case utf8_more3: if (ch >= 0x80 && ch < 0xC0) { ud->wch <<= 6; ud->wch |= (ch & 0x3F); ud->state = convert(enum utf8_state, ud->state - 1); if (ud->state == utf8_init) { if (ud->wch < ud->wch_min || (ud->wch <= 0xFFFF && (ud->wch & 0xFF00) == 0xDC00) || (ud->wch > 0x10FFFF)) { wchar_t wch = 0xDC00 | ud->buf[ud->back]; ud->tail = ud->back = (ud->back + 1) % 8; return wch; } else { ud->back = ud->tail; return ud->wch; } } } else { wchar_t wch = 0xDC00 | ud->buf[ud->back]; ud->tail = ud->back = (ud->back + 1) % 8; ud->state = utf8_init; return wch; } break; } } } FILE *w_fopen(const wchar_t *wname, const wchar_t *wmode) { char *name = utf8_dup_to(wname); char *mode = utf8_dup_to(wmode); FILE *f = fopen(name, mode); free(name); free(mode); return f; } FILE *w_popen(const wchar_t *wcmd, const wchar_t *wmode) { char *cmd = utf8_dup_to(wcmd); char *mode = utf8_dup_to(wmode); FILE *f = popen(cmd, mode); free(cmd); free(mode); return f; } FILE *w_freopen(const wchar_t *wname, const wchar_t *wmode, FILE *fold) { char *name = utf8_dup_to(wname); char *mode = utf8_dup_to(wmode); FILE *f = fold ? freopen(name, mode, fold) : fopen(name, mode); free(name); free(mode); return f; } FILE *w_fdopen(int fd, const wchar_t *wmode) { char *mode = utf8_dup_to(wmode); FILE *f = fdopen(fd, mode); free(mode); return f; } int w_remove(const wchar_t *wpath) { char *path = utf8_dup_to(wpath); int err = remove(path); free(path); return err; } int w_rename(const wchar_t *wfrom, const wchar_t *wto) { char *from = utf8_dup_to(wfrom); char *to = utf8_dup_to(wto); int err = rename(from, to); free(to); free(from); return err; }