summaryrefslogtreecommitdiffstats
path: root/regex.c
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2009-11-02 19:09:45 -0800
committerKaz Kylheku <kaz@kylheku.com>2009-11-02 19:09:45 -0800
commit0f9f85d9edaa26285d596f5fe7ee04ff95a2aa84 (patch)
tree451a2b8684468d3ebe62b37f9b6cad4385a729f0 /regex.c
parentf6c4f253681b576f39d939e602e9de7bc1b8892b (diff)
downloadtxr-0f9f85d9edaa26285d596f5fe7ee04ff95a2aa84.tar.gz
txr-0f9f85d9edaa26285d596f5fe7ee04ff95a2aa84.tar.bz2
txr-0f9f85d9edaa26285d596f5fe7ee04ff95a2aa84.zip
Got regex working over lazy strings from freeform.
Bugfixes.
Diffstat (limited to 'regex.c')
-rw-r--r--regex.c107
1 files changed, 82 insertions, 25 deletions
diff --git a/regex.c b/regex.c
index 81e16be7..3ea69eef 100644
--- a/regex.c
+++ b/regex.c
@@ -578,20 +578,17 @@ long nfa_machine_match_span(nfa_machine_t *nfam)
* NFA machine: represents the logic of the nfa_run function as state machine
* object which can be fed one character at a time.
*/
-void nfa_machine_init(nfa_machine_t *nfam, nfa_t nfa)
+
+void nfa_machine_reset(nfa_machine_t *nfam)
{
int accept = 0;
- nfam->nfa = nfa;
nfam->last_accept_pos = -1;
- nfam->visited = nfa.start->a.visited + 1;
- nfam->move = chk_malloc(NFA_SET_SIZE * sizeof *nfam->move);
- nfam->clos = chk_malloc(NFA_SET_SIZE * sizeof *nfam->clos);
- nfam->stack = chk_malloc(NFA_SET_SIZE * sizeof *nfam->stack);
+ nfam->visited = nfam->nfa.start->a.visited + 1;
nfam->nmove = 1;
nfam->count = 0;
- nfam->move[0] = nfa.start;
+ nfam->move[0] = nfam->nfa.start;
nfam->nclos = nfa_closure(nfam->stack, nfam->move, nfam->nmove,
nfam->clos, nfam->visited++, &accept);
@@ -600,6 +597,15 @@ void nfa_machine_init(nfa_machine_t *nfam, nfa_t nfa)
nfam->last_accept_pos = nfam->count;
}
+void nfa_machine_init(nfa_machine_t *nfam, nfa_t nfa)
+{
+ nfam->nfa = nfa;
+ nfam->move = chk_malloc(NFA_SET_SIZE * sizeof *nfam->move);
+ nfam->clos = chk_malloc(NFA_SET_SIZE * sizeof *nfam->clos);
+ nfam->stack = chk_malloc(NFA_SET_SIZE * sizeof *nfam->stack);
+ nfa_machine_reset(nfam);
+}
+
void nfa_machine_cleanup(nfa_machine_t *nfam)
{
free(nfam->stack);
@@ -612,7 +618,7 @@ void nfa_machine_cleanup(nfa_machine_t *nfam)
nfam->nfa.accept = 0;
}
-int nfa_machine_feed(nfa_machine_t *nfam, int ch)
+nfam_result_t nfa_machine_feed(nfa_machine_t *nfam, int ch)
{
int accept = 0;
@@ -680,26 +686,54 @@ nfa_t *regex_nfa(obj_t *reg)
return (nfa_t *) reg->co.handle;
}
-obj_t *search_regex(obj_t *haystack, obj_t *needle_regex, obj_t *start_num,
+obj_t *search_regex(obj_t *haystack, obj_t *needle_regex, obj_t *start,
obj_t *from_end)
{
- const char *h = c_str(haystack);
- long len = c_num(length_str(haystack));
- long start = c_num(start_num);
nfa_t *pnfa = regex_nfa(needle_regex);
- if (start > len) {
+ if (length_str_lt(haystack, start)) {
return nil;
} else {
- long begin = from_end ? len : start;
- long end = from_end ? start - 1 : len + 1;
- int incr = from_end ? -1 : 1;
- long i;
-
- for (i = begin; i != end; i += incr) {
- long span = nfa_run(*pnfa, h + i);
- if (span >= 0)
- return cons(num(i), num(span));
+ if (from_end) {
+ long i;
+ long s = c_num(start);
+ const char *h = c_str(haystack);
+
+ for (i = c_num(length_str(haystack)) - 1; i >= s; i--) {
+ long span = nfa_run(*pnfa, h + i);
+ if (span >= 0)
+ return cons(num(i), num(span));
+ }
+ } else {
+ nfa_machine_t nfam;
+ obj_t *i, *pos = start, *retval;
+ nfam_result_t last_res = NFAM_INCOMPLETE;
+
+ nfa_machine_init(&nfam, *pnfa);
+
+again:
+ for (i = pos; length_str_gt(haystack, i); i = plus(i, one)) {
+ last_res = nfa_machine_feed(&nfam, c_chr(chr_str(haystack, i)));
+
+ if (last_res == NFAM_FAIL) {
+ nfa_machine_reset(&nfam);
+ pos = plus(pos, one);
+ goto again;
+ }
+ }
+
+ last_res = nfa_machine_feed(&nfam, 0);
+
+ switch (last_res) {
+ case NFAM_INCOMPLETE:
+ case NFAM_MATCH:
+ retval = cons(pos, num(nfa_machine_match_span(&nfam)));
+ nfa_machine_cleanup(&nfam);
+ return retval;
+ case NFAM_FAIL:
+ nfa_machine_cleanup(&nfam);
+ return nil;
+ }
}
return nil;
@@ -708,8 +742,31 @@ obj_t *search_regex(obj_t *haystack, obj_t *needle_regex, obj_t *start_num,
obj_t *match_regex(obj_t *str, obj_t *reg, obj_t *pos)
{
+ nfa_machine_t nfam;
+ obj_t *i, *retval;
+ nfam_result_t last_res = NFAM_INCOMPLETE;
nfa_t *pnfa = regex_nfa(reg);
- long cpos = c_num(pos);
- long span = nfa_run(*pnfa, c_str(str) + cpos);
- return span >= 0 ? num(span + cpos) : nil;
+
+ nfa_machine_init(&nfam, *pnfa);
+
+ for (i = pos; length_str_gt(str, i); i = plus(i, one)) {
+ last_res = nfa_machine_feed(&nfam, c_chr(chr_str(str, i)));
+ if (last_res == NFAM_FAIL)
+ break;
+ }
+
+ last_res = nfa_machine_feed(&nfam, 0);
+
+ switch (last_res) {
+ case NFAM_INCOMPLETE:
+ case NFAM_MATCH:
+ retval = plus(pos, num(nfa_machine_match_span(&nfam)));
+ nfa_machine_cleanup(&nfam);
+ return retval;
+ case NFAM_FAIL:
+ nfa_machine_cleanup(&nfam);
+ return nil;
+ }
+
+ return nil;
}