1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
/* Copyright 2009
* Kaz Kylheku <kkylheku@gmail.com>
* Vancouver, Canada
* All rights reserved.
*
* BSD License:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* 3. The name of the author may not be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
#include <limits.h>
typedef unsigned int bitcell_t;
#define BITCELL_ALL1 UINT_MAX
#define BITCELL_LIT(NUMTOKEN) NUMTOKEN ## U
#define CHAR_SET_SIZE ((UCHAR_MAX + 1) / (sizeof (bitcell_t) * CHAR_BIT))
typedef struct char_set {
bitcell_t bitcell[CHAR_SET_SIZE];
} char_set_t;
void char_set_clear(char_set_t *);
void char_set_compl(char_set_t *);
void char_set_add(char_set_t *, int);
void char_set_add_range(char_set_t *, int, int); /* inclusive */
int char_set_contains(char_set_t *, int);
typedef enum {
nfa_accept, nfa_empty, nfa_wild, nfa_single, nfa_set
} nfa_kind_t;
typedef union nfa_state nfa_state_t;
struct nfa_state_accept {
nfa_kind_t kind;
unsigned visited;
};
struct nfa_state_empty {
nfa_kind_t kind;
unsigned visited;
nfa_state_t *trans0;
nfa_state_t *trans1;
};
struct nfa_state_single {
nfa_kind_t kind;
unsigned visited;
nfa_state_t *trans;
int ch;
};
struct nfa_state_set {
nfa_kind_t kind;
unsigned visited;
nfa_state_t *trans;
char_set_t *set;
};
union nfa_state {
struct nfa_state_accept a;
struct nfa_state_empty e;
struct nfa_state_single o;
struct nfa_state_set s;
};
nfa_state_t *nfa_state_accept(void);
nfa_state_t *nfa_state_empty(nfa_state_t *, nfa_state_t *);
nfa_state_t *nfa_state_single(nfa_state_t *, int ch);
nfa_state_t *nfa_state_wild(nfa_state_t *);
nfa_state_t *nfa_state_set(nfa_state_t *);
void nfa_state_free(nfa_state_t *st);
void nfa_state_shallow_free(nfa_state_t *st);
void nfa_state_merge(nfa_state_t *accept, nfa_state_t *);
typedef struct nfa nfa_t;
struct nfa {
nfa_state_t *start;
nfa_state_t *accept;
};
nfa_t nfa_compile_regex(obj_t *regex);
void nfa_free(nfa_t);
long nfa_run(nfa_t nfa, const char *str);
obj_t *regex_compile(obj_t *regex_sexp);
nfa_t *regex_nfa(obj_t *);
obj_t *search_regex(obj_t *haystack, obj_t *needle_regex, obj_t *start_num,
obj_t *from_end);
obj_t *match_regex(obj_t *str, obj_t *regex, obj_t *pos);
|