summaryrefslogtreecommitdiffstats
path: root/regex.h
diff options
context:
space:
mode:
Diffstat (limited to 'regex.h')
-rw-r--r--regex.h107
1 files changed, 107 insertions, 0 deletions
diff --git a/regex.h b/regex.h
new file mode 100644
index 00000000..10fcf4b4
--- /dev/null
+++ b/regex.h
@@ -0,0 +1,107 @@
+/* Copyright 2009
+ * Kaz Kylheku <kkylheku@gmail.com>
+ * Vancouver, Canada
+ * All rights reserved.
+ *
+ * BSD License:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <limits.h>
+
+typedef unsigned int bitcell_t;
+#define BITCELL_ALL1 UINT_MAX
+#define BITCELL_LIT(NUMTOKEN) NUMTOKEN ## U
+
+#define CHAR_SET_SIZE ((UCHAR_MAX + 1) / (sizeof (bitcell_t) * CHAR_BIT))
+
+typedef struct char_set {
+ bitcell_t bitcell[CHAR_SET_SIZE];
+} char_set_t;
+
+void char_set_clear(char_set_t *);
+void char_set_compl(char_set_t *);
+void char_set_add(char_set_t *, int);
+void char_set_add_range(char_set_t *, int, int); /* inclusive */
+int char_set_contains(char_set_t *, int);
+
+typedef enum {
+ nfa_accept, nfa_empty, nfa_wild, nfa_single, nfa_set
+} nfa_kind_t;
+
+typedef union nfa_state nfa_state_t;
+
+struct nfa_state_accept {
+ nfa_kind_t kind;
+ unsigned visited;
+};
+
+struct nfa_state_empty {
+ nfa_kind_t kind;
+ unsigned visited;
+ nfa_state_t *trans0;
+ nfa_state_t *trans1;
+};
+
+struct nfa_state_single {
+ nfa_kind_t kind;
+ unsigned visited;
+ nfa_state_t *trans;
+ int ch;
+};
+
+struct nfa_state_set {
+ nfa_kind_t kind;
+ unsigned visited;
+ nfa_state_t *trans;
+ char_set_t *set;
+};
+
+union nfa_state {
+ struct nfa_state_accept a;
+ struct nfa_state_empty e;
+ struct nfa_state_single o;
+ struct nfa_state_set s;
+};
+
+nfa_state_t *nfa_state_accept(void);
+nfa_state_t *nfa_state_empty(nfa_state_t *, nfa_state_t *);
+nfa_state_t *nfa_state_single(nfa_state_t *, int ch);
+nfa_state_t *nfa_state_wild(nfa_state_t *);
+nfa_state_t *nfa_state_set(nfa_state_t *);
+void nfa_state_free(nfa_state_t *st);
+void nfa_state_shallow_free(nfa_state_t *st);
+void nfa_state_merge(nfa_state_t *accept, nfa_state_t *);
+
+typedef struct nfa nfa_t;
+
+struct nfa {
+ nfa_state_t *start;
+ nfa_state_t *accept;
+};
+
+nfa_t nfa_compile_regex(obj_t *regex);
+void nfa_free(nfa_t);
+long nfa_run(nfa_t nfa, const char *str);
+obj_t *regex_compile(obj_t *regex_sexp);
+nfa_t *regex_nfa(obj_t *);
+obj_t *search_regex(obj_t *haystack, obj_t *needle_regex, obj_t *start_num,
+ obj_t *from_end);
+obj_t *match_regex(obj_t *str, obj_t *regex, obj_t *pos);