summaryrefslogtreecommitdiffstats
path: root/regex.h
blob: 10fcf4b48e72c2bf68c1277f8ee259e9a93d9d4b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/* Copyright 2009
 * Kaz Kylheku <kkylheku@gmail.com>
 * Vancouver, Canada
 * All rights reserved.
 *
 * BSD License:
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in
 *      the documentation and/or other materials provided with the
 *      distribution.
 *   3. The name of the author may not be used to endorse or promote
 *      products derived from this software without specific prior
 *      written permission.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 */

#include <limits.h>

typedef unsigned int bitcell_t;
#define BITCELL_ALL1 UINT_MAX
#define BITCELL_LIT(NUMTOKEN) NUMTOKEN ## U

#define CHAR_SET_SIZE ((UCHAR_MAX + 1) / (sizeof (bitcell_t) * CHAR_BIT))

typedef struct char_set {
  bitcell_t bitcell[CHAR_SET_SIZE];
} char_set_t;

void char_set_clear(char_set_t *);
void char_set_compl(char_set_t *);
void char_set_add(char_set_t *, int);
void char_set_add_range(char_set_t *, int, int); /* inclusive */
int char_set_contains(char_set_t *, int);

typedef enum { 
  nfa_accept, nfa_empty, nfa_wild, nfa_single, nfa_set 
} nfa_kind_t;

typedef union nfa_state nfa_state_t;

struct nfa_state_accept {
  nfa_kind_t kind;
  unsigned visited;
};

struct nfa_state_empty {
  nfa_kind_t kind;
  unsigned visited;
  nfa_state_t *trans0;
  nfa_state_t *trans1;
};

struct nfa_state_single {
  nfa_kind_t kind;
  unsigned visited;
  nfa_state_t *trans;
  int ch;
};

struct nfa_state_set {
  nfa_kind_t kind;
  unsigned visited;
  nfa_state_t *trans;
  char_set_t *set;
};

union nfa_state {
  struct nfa_state_accept a;
  struct nfa_state_empty e;
  struct nfa_state_single o;
  struct nfa_state_set s;
};

nfa_state_t *nfa_state_accept(void);
nfa_state_t *nfa_state_empty(nfa_state_t *, nfa_state_t *);
nfa_state_t *nfa_state_single(nfa_state_t *, int ch);
nfa_state_t *nfa_state_wild(nfa_state_t *);
nfa_state_t *nfa_state_set(nfa_state_t *);
void nfa_state_free(nfa_state_t *st);
void nfa_state_shallow_free(nfa_state_t *st);
void nfa_state_merge(nfa_state_t *accept, nfa_state_t *);

typedef struct nfa nfa_t;

struct nfa {
  nfa_state_t *start;
  nfa_state_t *accept;
};

nfa_t nfa_compile_regex(obj_t *regex);
void nfa_free(nfa_t);
long nfa_run(nfa_t nfa, const char *str);
obj_t *regex_compile(obj_t *regex_sexp);
nfa_t *regex_nfa(obj_t *);
obj_t *search_regex(obj_t *haystack, obj_t *needle_regex, obj_t *start_num,
                    obj_t *from_end);
obj_t *match_regex(obj_t *str, obj_t *regex, obj_t *pos);