summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2014-01-07 19:01:33 -0800
committerKaz Kylheku <kaz@kylheku.com>2014-01-07 19:01:33 -0800
commitd1ecfd527d7717921e013d35be3070e7f95265e5 (patch)
treec6de538e9b15b1a210d21223f311c9294f554c19
parent9578ad156a1b076905eb26dd746261a506a0edcf (diff)
downloadtxr-d1ecfd527d7717921e013d35be3070e7f95265e5.tar.gz
txr-d1ecfd527d7717921e013d35be3070e7f95265e5.tar.bz2
txr-d1ecfd527d7717921e013d35be3070e7f95265e5.zip
The lisp-parse function can now be called multiple times
on the same stream to extract multiple objects; the requirement that the stream must hold exactly one complete Lisp object with no following material is now lifted. * parser.l (YY_INPUT): Modified the macro so that it reads no more than one character. Though this probably makes the lexer less efficient, it gives us the important property that the lexer does not scan ahead into the input stream, hogging data into its buffer which is then destroyed. This is essential if the lisp-parse function is to support multiple calls to pull objects one by one out of a stream. * parser.y (spec): Use YYACCEPT in the SECRET_ESCAPE_E clause for pulling a single expression out of the token stream. YYACCEPT is a trick for not invoking the $accept : spec . $end production which is implicitly built into the grammar, and which causes a token of lookahead to occur. This allows us to read a full expression without stealing any further token: but only if the grammar is structured right. (exprs): This phrase structure now handles the DOTDOT syntax. There is no such thing as an expr DOTDOT expr expression any more; it is in the list syntax (and not supported in the dot position). (expr): Remove DOTDOT syntax. * txr.1: Updated description of .. syntax, and relaxed the description of lisp-parse since it now allows multiple calls to extract multiple objects.
-rw-r--r--ChangeLog31
-rw-r--r--parser.l13
-rw-r--r--parser.y7
-rw-r--r--txr.120
4 files changed, 59 insertions, 12 deletions
diff --git a/ChangeLog b/ChangeLog
index 9108376e..ecfc54cd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,36 @@
2014-01-07 Kaz Kylheku <kaz@kylheku.com>
+ The lisp-parse function can now be called multiple times
+ on the same stream to extract multiple objects; the requirement
+ that the stream must hold exactly one complete Lisp object
+ with no following material is now lifted.
+
+ * parser.l (YY_INPUT): Modified the macro so that it reads no more
+ than one character. Though this probably makes the lexer less
+ efficient, it gives us the important property that the lexer does
+ not scan ahead into the input stream, hogging data into its buffer
+ which is then destroyed. This is essential if the lisp-parse function
+ is to support multiple calls to pull objects one by one out of
+ a stream.
+
+ * parser.y (spec): Use YYACCEPT in the SECRET_ESCAPE_E clause for
+ pulling a single expression out of the token stream. YYACCEPT
+ is a trick for not invoking the $accept : spec . $end production
+ which is implicitly built into the grammar, and which causes
+ a token of lookahead to occur. This allows us to read a full
+ expression without stealing any further token: but only if the
+ grammar is structured right.
+ (exprs): This phrase structure now handles the DOTDOT syntax.
+ There is no such thing as an expr DOTDOT expr expression any more;
+ it is in the list syntax (and not supported in the dot position).
+ (expr): Remove DOTDOT syntax.
+
+ * txr.1: Updated description of .. syntax, and relaxed the description
+ of lisp-parse since it now allows multiple calls to extract
+ multiple objects.
+
+2014-01-07 Kaz Kylheku <kaz@kylheku.com>
+
* match.c (v_load): Call yylex_destroy after yyparse, so we don't
have stray scanner state possibly interfering with a subsquent
parse job.
diff --git a/parser.l b/parser.l
index 2ab713ab..bdac7a6a 100644
--- a/parser.l
+++ b/parser.l
@@ -52,15 +52,10 @@
#define YY_INPUT(buf, result, max_size) \
do { \
- val c = nil; \
- size_t n; \
- int ch = '*'; \
- for (n = 0; n < max_size && \
- (c = get_byte(yyin_stream)) && \
- (ch = c_num(c)) != '\n'; ++n) \
- buf[n] = (char) ch; \
- if (ch == '\n') \
- buf[n++] = (char) ch; \
+ val c = get_byte(yyin_stream); \
+ int n = 0; \
+ if (c) \
+ buf[n++] = (char) c_num(c); \
result = n; \
} while (0)
diff --git a/parser.y b/parser.y
index 8e7577ee..40c231c1 100644
--- a/parser.y
+++ b/parser.y
@@ -118,7 +118,7 @@ static val parsed_spec;
spec : clauses { parsed_spec = $1; }
| /* empty */ { parsed_spec = nil; }
| SECRET_ESCAPE_R regexpr { parsed_spec = $2; end_of_regex(); }
- | SECRET_ESCAPE_E expr { parsed_spec = $2; }
+ | SECRET_ESCAPE_E expr { parsed_spec = $2; YYACCEPT; }
| error '\n' { parsed_spec = nil;
if (errors >= 8)
YYABORT;
@@ -720,9 +720,13 @@ meta_expr : METAPAR exprs ')' { $$ = rlcp(cons(expr_s, expand($2)), $2); }
| METAPAR error { $$ = nil;
yybadtoken(yychar, lit("meta expression")); }
;
+
exprs : expr { $$ = rlcp(cons($1, nil), $1); }
| expr exprs { $$ = rlcp(cons($1, $2), $1); }
| expr '.' expr { $$ = rlcp(cons($1, $3), $1); }
+ | expr DOTDOT exprs { $$ = rlcp(cons(list(cons_s, $1,
+ car($3), nao),
+ cdr($3)), $1); }
;
exprs_opt : exprs { $$ = $1; }
@@ -743,7 +747,6 @@ expr : SYMTOK { $$ = rl(sym_helper($1, t), num(lineno)); }
| chrlit { $$ = rl($1, num(lineno)); }
| strlit { $$ = $1; }
| quasilit { $$ = $1; }
- | expr DOTDOT expr { $$ = list(cons_s, $1, $3, nao); }
;
regex : '/' regexpr '/' { $$ = cons(regex_s, $2); end_of_regex();
diff --git a/txr.1 b/txr.1
index 941c2255..94462c80 100644
--- a/txr.1
+++ b/txr.1
@@ -4963,6 +4963,20 @@ to represent a pair of numbers or other objects. For instance, if L
is a list, then [L 1 .. 3] computes a sublist of L consisting of
elements 1 through 2 (counting from zero).
+.TP
+Restrictions:
+
+The notation must be enclosed in a list. For instance a..b is not an
+expression, but (a..b) is. This is important if Lisp data is being parsed from
+a string or stream using the lisp-parse function. If the data "a..b" is
+parsed, the symbol "a" will be extracted, leaving "..a", which, if parsed,
+produces a syntax error since it consists of a "dotdot" token followed by
+a symbol, which is not valid syntax, akin to something like ")a" or ".a".
+
+The notation cannot occur in the dot position; that is, the syntax (a . b .. c)
+is invalid. The dotdot operator can only be used between the non-dot-position
+elements of a list.
+
.SS The DWIM Brackets
TXR Lisp has a square bracket notation. The syntax [...] is a shorthand
@@ -10525,7 +10539,11 @@ Description:
The lisp-parse function converts text denoting TXR Lisp structure, into the
corresponding data structure. The <source> argument may be either a character
string, or a stream. The source must provide the syntax of one complete Lisp
-object, without any stray tokens after that object.
+object.
+
+Multiple calls to lisp-parse on the same stream will extract successive objects
+from the stream. To parse successive objects from a string, it is necessary
+to convert it to a string stream.
The optional <error-stream> argument can be used to specify a stream to which
parse errors diagnostics are sent. If absent, the diagnostics are suppressed.