summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog22
-rw-r--r--lib.c7
-rw-r--r--match.c12
-rw-r--r--regex.c107
-rw-r--r--regex.h7
5 files changed, 119 insertions, 36 deletions
diff --git a/ChangeLog b/ChangeLog
index dbe446c8..b0cb02ad 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,27 @@
2009-10-20 Kaz Kylheku <kkylheku@gmail.com>
+ Working regex over lazy strings from freeform.
+
+ Bugfixes.
+
+ * lib.c (length_str): Fixed recursion to wrong length function.
+ (lazy_str_force): March down list properly. Update lazy
+ string's limit value.
+
+ * match.c (match_line): Convert to lazy-string-aware style; i.e.
+ avoidance of triggering a force of the whole string.
+ (match_files): Bugfix in argument processing of freeform directive.
+
+ * regex.h (nfam_result_t): New typedef.
+ (nfa_machine_reset): New function declaration.
+ (nfa_machine_feed): Updated declaration.
+
+ * regex.c (nfa_machine_init): Refactor to use nfa_machine_reset.
+ (nfa_machine_feed): Return nfam_result_t rather than just int.
+ (search_regex, match_regex): Refactor to work with lazy strings well.
+
+2009-10-20 Kaz Kylheku <kkylheku@gmail.com>
+
Implement custom separator and limit in freeform.
* lib.h (lazy_string): New struct member, opts.
diff --git a/lib.c b/lib.c
index b942703a..2bf937aa 100644
--- a/lib.c
+++ b/lib.c
@@ -736,7 +736,7 @@ obj_t *length_str(obj_t *str)
if (str->ls.type == LSTR) {
lazy_str_force(str);
- return length(str->ls.prefix);
+ return length_str(str->ls.prefix);
}
if (!str->st.len)
@@ -1391,10 +1391,13 @@ obj_t *lazy_str_force(obj_t *lstr)
} else while (gt(lim, zero) && lstr->ls.list) {
lstr->ls.prefix = cat_str(list(lstr->ls.prefix, car(lstr->ls.list), nao),
or2(car(lstr->ls.opts), string("\n")));
- lstr->ls.list = nil;
+ lstr->ls.list = cdr(lstr->ls.list);
lim = minus(lim, one);
}
+ if (lim)
+ *cdr_l(lstr->ls.opts) = lim;
+
return lstr->ls.prefix;
}
diff --git a/match.c b/match.c
index 675e3d0f..08065af1 100644
--- a/match.c
+++ b/match.c
@@ -318,8 +318,7 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
} else if (nump(modifier)) {
obj_t *past = plus(pos, modifier);
- if (c_num(past) > c_num(length_str(dataline)) ||
- c_num(past) < c_num(pos))
+ if (length_str_lt(dataline, past) || lt(past, pos))
{
LOG_MISMATCH("fixed field size");
return nil;
@@ -351,8 +350,7 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
pos = past;
} else if (nump(modifier)) {
obj_t *past = plus(pos, modifier);
- if (c_num(past) > c_num(length_str(dataline)) ||
- c_num(past) < c_num(pos))
+ if (length_str_lt(dataline, past) || lt(past, pos))
{
LOG_MISMATCH("count based var");
return nil;
@@ -466,12 +464,12 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
if (new_pos && !equal(new_pos, pos)) {
pos = new_pos;
- assert (c_num(pos) <= c_num(length_str(dataline)));
+ bug_unless (length_str_ge(dataline, pos));
} else {
pos = plus(pos, one);
}
- if (c_num(pos) >= c_num(length_str(dataline)))
+ if (length_str_le(dataline, pos))
break;
}
@@ -984,7 +982,7 @@ repeat_spec_same_data:
return nil;
}
} else if (sym == freeform) {
- obj_t *args = rest(rest(first_spec));
+ obj_t *args = rest(first_spec);
obj_t *vals = mapcar(func_n1(cdr),
mapcar(bind2other(func_n2(eval_form),
bindings), args));
diff --git a/regex.c b/regex.c
index 81e16be7..3ea69eef 100644
--- a/regex.c
+++ b/regex.c
@@ -578,20 +578,17 @@ long nfa_machine_match_span(nfa_machine_t *nfam)
* NFA machine: represents the logic of the nfa_run function as state machine
* object which can be fed one character at a time.
*/
-void nfa_machine_init(nfa_machine_t *nfam, nfa_t nfa)
+
+void nfa_machine_reset(nfa_machine_t *nfam)
{
int accept = 0;
- nfam->nfa = nfa;
nfam->last_accept_pos = -1;
- nfam->visited = nfa.start->a.visited + 1;
- nfam->move = chk_malloc(NFA_SET_SIZE * sizeof *nfam->move);
- nfam->clos = chk_malloc(NFA_SET_SIZE * sizeof *nfam->clos);
- nfam->stack = chk_malloc(NFA_SET_SIZE * sizeof *nfam->stack);
+ nfam->visited = nfam->nfa.start->a.visited + 1;
nfam->nmove = 1;
nfam->count = 0;
- nfam->move[0] = nfa.start;
+ nfam->move[0] = nfam->nfa.start;
nfam->nclos = nfa_closure(nfam->stack, nfam->move, nfam->nmove,
nfam->clos, nfam->visited++, &accept);
@@ -600,6 +597,15 @@ void nfa_machine_init(nfa_machine_t *nfam, nfa_t nfa)
nfam->last_accept_pos = nfam->count;
}
+void nfa_machine_init(nfa_machine_t *nfam, nfa_t nfa)
+{
+ nfam->nfa = nfa;
+ nfam->move = chk_malloc(NFA_SET_SIZE * sizeof *nfam->move);
+ nfam->clos = chk_malloc(NFA_SET_SIZE * sizeof *nfam->clos);
+ nfam->stack = chk_malloc(NFA_SET_SIZE * sizeof *nfam->stack);
+ nfa_machine_reset(nfam);
+}
+
void nfa_machine_cleanup(nfa_machine_t *nfam)
{
free(nfam->stack);
@@ -612,7 +618,7 @@ void nfa_machine_cleanup(nfa_machine_t *nfam)
nfam->nfa.accept = 0;
}
-int nfa_machine_feed(nfa_machine_t *nfam, int ch)
+nfam_result_t nfa_machine_feed(nfa_machine_t *nfam, int ch)
{
int accept = 0;
@@ -680,26 +686,54 @@ nfa_t *regex_nfa(obj_t *reg)
return (nfa_t *) reg->co.handle;
}
-obj_t *search_regex(obj_t *haystack, obj_t *needle_regex, obj_t *start_num,
+obj_t *search_regex(obj_t *haystack, obj_t *needle_regex, obj_t *start,
obj_t *from_end)
{
- const char *h = c_str(haystack);
- long len = c_num(length_str(haystack));
- long start = c_num(start_num);
nfa_t *pnfa = regex_nfa(needle_regex);
- if (start > len) {
+ if (length_str_lt(haystack, start)) {
return nil;
} else {
- long begin = from_end ? len : start;
- long end = from_end ? start - 1 : len + 1;
- int incr = from_end ? -1 : 1;
- long i;
-
- for (i = begin; i != end; i += incr) {
- long span = nfa_run(*pnfa, h + i);
- if (span >= 0)
- return cons(num(i), num(span));
+ if (from_end) {
+ long i;
+ long s = c_num(start);
+ const char *h = c_str(haystack);
+
+ for (i = c_num(length_str(haystack)) - 1; i >= s; i--) {
+ long span = nfa_run(*pnfa, h + i);
+ if (span >= 0)
+ return cons(num(i), num(span));
+ }
+ } else {
+ nfa_machine_t nfam;
+ obj_t *i, *pos = start, *retval;
+ nfam_result_t last_res = NFAM_INCOMPLETE;
+
+ nfa_machine_init(&nfam, *pnfa);
+
+again:
+ for (i = pos; length_str_gt(haystack, i); i = plus(i, one)) {
+ last_res = nfa_machine_feed(&nfam, c_chr(chr_str(haystack, i)));
+
+ if (last_res == NFAM_FAIL) {
+ nfa_machine_reset(&nfam);
+ pos = plus(pos, one);
+ goto again;
+ }
+ }
+
+ last_res = nfa_machine_feed(&nfam, 0);
+
+ switch (last_res) {
+ case NFAM_INCOMPLETE:
+ case NFAM_MATCH:
+ retval = cons(pos, num(nfa_machine_match_span(&nfam)));
+ nfa_machine_cleanup(&nfam);
+ return retval;
+ case NFAM_FAIL:
+ nfa_machine_cleanup(&nfam);
+ return nil;
+ }
}
return nil;
@@ -708,8 +742,31 @@ obj_t *search_regex(obj_t *haystack, obj_t *needle_regex, obj_t *start_num,
obj_t *match_regex(obj_t *str, obj_t *reg, obj_t *pos)
{
+ nfa_machine_t nfam;
+ obj_t *i, *retval;
+ nfam_result_t last_res = NFAM_INCOMPLETE;
nfa_t *pnfa = regex_nfa(reg);
- long cpos = c_num(pos);
- long span = nfa_run(*pnfa, c_str(str) + cpos);
- return span >= 0 ? num(span + cpos) : nil;
+
+ nfa_machine_init(&nfam, *pnfa);
+
+ for (i = pos; length_str_gt(str, i); i = plus(i, one)) {
+ last_res = nfa_machine_feed(&nfam, c_chr(chr_str(str, i)));
+ if (last_res == NFAM_FAIL)
+ break;
+ }
+
+ last_res = nfa_machine_feed(&nfam, 0);
+
+ switch (last_res) {
+ case NFAM_INCOMPLETE:
+ case NFAM_MATCH:
+ retval = plus(pos, num(nfa_machine_match_span(&nfam)));
+ nfa_machine_cleanup(&nfam);
+ return retval;
+ case NFAM_FAIL:
+ nfa_machine_cleanup(&nfam);
+ return nil;
+ }
+
+ return nil;
}
diff --git a/regex.h b/regex.h
index 973ab501..c69180d7 100644
--- a/regex.h
+++ b/regex.h
@@ -95,7 +95,9 @@ typedef struct nfa {
nfa_state_t *accept;
} nfa_t;
-enum nfam_result { NFAM_INCOMPLETE, NFAM_FAIL, NFAM_MATCH };
+typedef enum nfam_result {
+ NFAM_INCOMPLETE, NFAM_FAIL, NFAM_MATCH
+} nfam_result_t;
typedef struct nfa_machine {
long last_accept_pos;
@@ -109,9 +111,10 @@ typedef struct nfa_machine {
nfa_t nfa_compile_regex(obj_t *regex);
void nfa_free(nfa_t);
long nfa_run(nfa_t nfa, const char *str);
+void nfa_machine_reset(nfa_machine_t *);
void nfa_machine_init(nfa_machine_t *, nfa_t);
void nfa_machine_cleanup(nfa_machine_t *);
-int nfa_machine_feed(nfa_machine_t *, int ch);
+nfam_result_t nfa_machine_feed(nfa_machine_t *, int ch);
long nfa_machine_match_span(nfa_machine_t *);
obj_t *regex_compile(obj_t *regex_sexp);
obj_t *regexp(obj_t *);