summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog31
-rw-r--r--parser.l13
-rw-r--r--parser.y7
-rw-r--r--txr.120
4 files changed, 59 insertions, 12 deletions
diff --git a/ChangeLog b/ChangeLog
index 9108376e..ecfc54cd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,36 @@
2014-01-07 Kaz Kylheku <kaz@kylheku.com>
+ The lisp-parse function can now be called multiple times
+ on the same stream to extract multiple objects; the requirement
+ that the stream must hold exactly one complete Lisp object
+ with no following material is now lifted.
+
+ * parser.l (YY_INPUT): Modified the macro so that it reads no more
+ than one character. Though this probably makes the lexer less
+ efficient, it gives us the important property that the lexer does
+ not scan ahead into the input stream, hogging data into its buffer
+ which is then destroyed. This is essential if the lisp-parse function
+ is to support multiple calls to pull objects one by one out of
+ a stream.
+
+ * parser.y (spec): Use YYACCEPT in the SECRET_ESCAPE_E clause for
+ pulling a single expression out of the token stream. YYACCEPT
+ is a trick for not invoking the $accept : spec . $end production
+ which is implicitly built into the grammar, and which causes
+ a token of lookahead to occur. This allows us to read a full
+ expression without stealing any further token: but only if the
+ grammar is structured right.
+ (exprs): This phrase structure now handles the DOTDOT syntax.
+ There is no such thing as an expr DOTDOT expr expression any more;
+ it is in the list syntax (and not supported in the dot position).
+ (expr): Remove DOTDOT syntax.
+
+ * txr.1: Updated description of .. syntax, and relaxed the description
+ of lisp-parse since it now allows multiple calls to extract
+ multiple objects.
+
+2014-01-07 Kaz Kylheku <kaz@kylheku.com>
+
* match.c (v_load): Call yylex_destroy after yyparse, so we don't
have stray scanner state possibly interfering with a subsquent
parse job.
diff --git a/parser.l b/parser.l
index 2ab713ab..bdac7a6a 100644
--- a/parser.l
+++ b/parser.l
@@ -52,15 +52,10 @@
#define YY_INPUT(buf, result, max_size) \
do { \
- val c = nil; \
- size_t n; \
- int ch = '*'; \
- for (n = 0; n < max_size && \
- (c = get_byte(yyin_stream)) && \
- (ch = c_num(c)) != '\n'; ++n) \
- buf[n] = (char) ch; \
- if (ch == '\n') \
- buf[n++] = (char) ch; \
+ val c = get_byte(yyin_stream); \
+ int n = 0; \
+ if (c) \
+ buf[n++] = (char) c_num(c); \
result = n; \
} while (0)
diff --git a/parser.y b/parser.y
index 8e7577ee..40c231c1 100644
--- a/parser.y
+++ b/parser.y
@@ -118,7 +118,7 @@ static val parsed_spec;
spec : clauses { parsed_spec = $1; }
| /* empty */ { parsed_spec = nil; }
| SECRET_ESCAPE_R regexpr { parsed_spec = $2; end_of_regex(); }
- | SECRET_ESCAPE_E expr { parsed_spec = $2; }
+ | SECRET_ESCAPE_E expr { parsed_spec = $2; YYACCEPT; }
| error '\n' { parsed_spec = nil;
if (errors >= 8)
YYABORT;
@@ -720,9 +720,13 @@ meta_expr : METAPAR exprs ')' { $$ = rlcp(cons(expr_s, expand($2)), $2); }
| METAPAR error { $$ = nil;
yybadtoken(yychar, lit("meta expression")); }
;
+
exprs : expr { $$ = rlcp(cons($1, nil), $1); }
| expr exprs { $$ = rlcp(cons($1, $2), $1); }
| expr '.' expr { $$ = rlcp(cons($1, $3), $1); }
+ | expr DOTDOT exprs { $$ = rlcp(cons(list(cons_s, $1,
+ car($3), nao),
+ cdr($3)), $1); }
;
exprs_opt : exprs { $$ = $1; }
@@ -743,7 +747,6 @@ expr : SYMTOK { $$ = rl(sym_helper($1, t), num(lineno)); }
| chrlit { $$ = rl($1, num(lineno)); }
| strlit { $$ = $1; }
| quasilit { $$ = $1; }
- | expr DOTDOT expr { $$ = list(cons_s, $1, $3, nao); }
;
regex : '/' regexpr '/' { $$ = cons(regex_s, $2); end_of_regex();
diff --git a/txr.1 b/txr.1
index 941c2255..94462c80 100644
--- a/txr.1
+++ b/txr.1
@@ -4963,6 +4963,20 @@ to represent a pair of numbers or other objects. For instance, if L
is a list, then [L 1 .. 3] computes a sublist of L consisting of
elements 1 through 2 (counting from zero).
+.TP
+Restrictions:
+
+The notation must be enclosed in a list. For instance a..b is not an
+expression, but (a..b) is. This is important if Lisp data is being parsed from
+a string or stream using the lisp-parse function. If the data "a..b" is
+parsed, the symbol "a" will be extracted, leaving "..a", which, if parsed,
+produces a syntax error since it consists of a "dotdot" token followed by
+a symbol, which is not valid syntax, akin to something like ")a" or ".a".
+
+The notation cannot occur in the dot position; that is, the syntax (a . b .. c)
+is invalid. The dotdot operator can only be used between the non-dot-position
+elements of a list.
+
.SS The DWIM Brackets
TXR Lisp has a square bracket notation. The syntax [...] is a shorthand
@@ -10525,7 +10539,11 @@ Description:
The lisp-parse function converts text denoting TXR Lisp structure, into the
corresponding data structure. The <source> argument may be either a character
string, or a stream. The source must provide the syntax of one complete Lisp
-object, without any stray tokens after that object.
+object.
+
+Multiple calls to lisp-parse on the same stream will extract successive objects
+from the stream. To parse successive objects from a string, it is necessary
+to convert it to a string stream.
The optional <error-stream> argument can be used to specify a stream to which
parse errors diagnostics are sent. If absent, the diagnostics are suppressed.