summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog18
-rw-r--r--parser.l35
-rw-r--r--parser.y20
-rw-r--r--txr.140
4 files changed, 102 insertions, 11 deletions
diff --git a/ChangeLog b/ChangeLog
index 2ee31328..2d3fc453 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+2014-03-25 Kaz Kylheku <kaz@kylheku.com>
+
+ Introducing word list literals.
+
+ * parser.l (WLIT): New exclusive start state.
+ Extend lexical grammar to transition to WLIT state upon
+ the #" or #*" sequence which kicks off a word literal,
+ and in that state, piecewise lexically analyze the literal,
+ mostly by borrowing rules from other literals.
+
+ * parser.y (WORDS, WSPLICE): New tokens.
+ (n_exprs): Integrate splicing form of word list literal syntax.
+ (n_expr): Integrate non-splicit for of word list literal syntax.
+ (litchars): Propagate line number info.
+ (wordslit): New grammar rule.
+
+ * txr.1: Updated.
+
2014-03-23 Kaz Kylheku <kaz@kylheku.com>
* eval.c (eval_init): Register last function as intrinsic.
diff --git a/parser.l b/parser.l
index d678100a..3bd3436a 100644
--- a/parser.l
+++ b/parser.l
@@ -190,7 +190,7 @@ UANY {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
UANYN {ASCN}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
-%x SPECIAL BRACED NESTED REGEX STRLIT CHRLIT QSILIT QSPECIAL
+%x SPECIAL BRACED NESTED REGEX STRLIT CHRLIT QSILIT QSPECIAL WLIT
%%
@@ -574,6 +574,16 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return '`';
}
+<SPECIAL,QSPECIAL,NESTED,BRACED>#\" {
+ yy_push_state(WLIT);
+ return WORDS;
+}
+
+<SPECIAL,QSPECIAL,NESTED,BRACED>#\*\" {
+ yy_push_state(WLIT);
+ return WSPLICE;
+}
+
<NESTED,BRACED># {
return '#';
}
@@ -761,7 +771,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
/* comment to end of line */
}
-<STRLIT>\" {
+<STRLIT,WLIT>\" {
yy_pop_state();
return yytext[0];
}
@@ -771,21 +781,21 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return yytext[0];
}
-<STRLIT,QSILIT>[\\][abtnvfre "`'\\ ] {
+<STRLIT,QSILIT,WLIT>[\\][abtnvfre "`'\\ ] {
yylval.chr = char_esc(yytext[1]);
return LITCHAR;
}
-<STRLIT,QSILIT>{WS}[\\]\n{WS} {
+<STRLIT,QSILIT,WLIT>{WS}[\\]\n{WS} {
lineno++;
}
-<STRLIT,QSILIT>[\\](x{HEX}+|{OCT}+);? {
+<STRLIT,QSILIT,WLIT>[\\](x{HEX}+|{OCT}+);? {
yylval.chr = num_esc(yytext+1);
return LITCHAR;
}
-<STRLIT,QSILIT>[\\]. {
+<STRLIT,QSILIT,WLIT>[\\]. {
yyerrorf(lit("unrecognized escape: \\~a"), chr(yytext[1]), nao);
}
@@ -825,18 +835,27 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return ERRTOK;
}
+<WLIT>\n {
+ lineno++;
+ return '\n';
+}
+
<QSILIT>@ {
yy_push_state(QSPECIAL);
}
-<STRLIT,CHRLIT,QSILIT>{UANYN} {
+<WLIT>{WS} {
+ return ' ';
+}
+
+<STRLIT,CHRLIT,QSILIT,WLIT>{UANYN} {
wchar_t buf[8];
utf8_from(buf, yytext);
yylval.chr = buf[0];
return LITCHAR;
}
-<STRLIT,CHRLIT,QSILIT>. {
+<STRLIT,CHRLIT,QSILIT,WLIT>. {
yyerrprepf(lit("non-UTF-8 byte in literal: '\\x~02x'"),
num((unsigned char) yytext[0]), nao);
return ERRTOK;
diff --git a/parser.y b/parser.y
index 0aa5b2a3..aaa3d837 100644
--- a/parser.y
+++ b/parser.y
@@ -79,6 +79,7 @@ static val parsed_spec;
%token <lineno> MOD MODLAST DEFINE TRY CATCH FINALLY
%token <lineno> ERRTOK /* deliberately not used in grammar */
%token <lineno> HASH_BACKSLASH HASH_SLASH DOTDOT HASH_H
+%token <lineno> WORDS WSPLICE
%token <lineno> SECRET_ESCAPE_R SECRET_ESCAPE_E
%token <val> NUMBER METANUM
@@ -100,7 +101,7 @@ static val parsed_spec;
%type <val> o_elems_opt o_elems o_elem o_var rep_elem rep_parts_opt
%type <val> regex lisp_regex regexpr regbranch
%type <val> regterm regtoken regclass regclassterm regrange
-%type <val> strlit chrlit quasilit quasi_items quasi_item litchars
+%type <val> strlit chrlit quasilit quasi_items quasi_item litchars wordslit
%type <val> not_a_clause
%type <chr> regchar
%type <lineno> '(' '[' '@'
@@ -753,6 +754,9 @@ n_exprs : n_expr { $$ = rlcp(cons($1, nil), $1); }
$$ = rlcp(cons(list(cons_s, $1,
car($3), nao),
cdr($3)), or2($1, $3)); }
+ | WSPLICE wordslit { $$ = rl($2, num($1)); }
+ | WSPLICE wordslit
+ n_exprs { $$ = nappend2(rl($2, num($1)), $3); }
;
n_expr : SYMTOK { $$ = sym_helper($1, t); }
@@ -768,6 +772,7 @@ n_expr : SYMTOK { $$ = sym_helper($1, t); }
| chrlit { $$ = $1; }
| strlit { $$ = $1; }
| quasilit { $$ = $1; }
+ | WORDS wordslit { $$ = rl($2, num($1)); }
| '\'' n_expr { $$ = rlcp(list(quote_s, $2, nao), $2); }
| '^' n_expr { $$ = rlcp(list(sys_qquote_s, $2, nao), $2); }
| ',' n_expr { $$ = rlcp(list(sys_unquote_s, $2, nao), $2); }
@@ -923,8 +928,17 @@ quasi_item : litchars { $$ = lit_char_helper($1); }
| list { $$ = rlcp(cons(expr_s, $1), $1); }
;
-litchars : LITCHAR { $$ = cons(chr($1), nil); }
- | LITCHAR litchars { $$ = cons(chr($1), $2); }
+litchars : LITCHAR { $$ = rl(cons(chr($1), nil), num(lineno)); }
+ | LITCHAR litchars { $$ = rl(cons(chr($1), $2), num(lineno)); }
+ ;
+
+wordslit : '"' { $$ = nil; }
+ | ' ' wordslit { $$ = $2; }
+ | '\n' wordslit { $$ = $2; }
+ | litchars wordslit { val word = lit_char_helper($1);
+ $$ = rlcp(cons(word, $2), $1); }
+ | error { $$ = nil;
+ yybadtoken(yychar, lit("word literal")); }
;
not_a_clause : ALL { $$ = make_expr(all_s, nil, num(lineno)); }
diff --git a/txr.1 b/txr.1
index defd6a17..5f281f2d 100644
--- a/txr.1
+++ b/txr.1
@@ -1129,6 +1129,46 @@ Example:
The first string literal is the string "foobar". The second two are "foo bar".
+.SS Word List Literals
+
+A word list literal provides a convenient way to write a list of strings
+when such a list can be given as whitespace-delimited words.
+
+There are two flavors of the word list literal: the regular word list
+literal which begins with #" (hash, double-quote) and the splicing
+list literal which begins with #*" (hash, star, double-quote).
+
+Both literals are terminated by a double quote, which may be escaped
+as \e" in order to include it as a character. All the escaping conventions
+used in string literals can be used in words literals.
+
+Unlike in string literals, whitespace (tabs, spaces and newlines) is not
+significant in word literals: it separates words. Whitespace may be
+escaped with a backslash in order to include it as a literal character.
+
+
+Example:
+
+ #"abc def ghi" --> notates ("abc" "def" "ghi")
+
+ #"abc def
+ ghi" --> notates ("abc" "def" "ghi")
+
+ #"abc\ def ghi" --> notates ("abc def" "ghi")
+
+A splicing word literal differs from a word literal in that it deos not
+produce a list of string literals, but rather it produces a sequence of string
+literal tokens that is merged into the surrounding syntax.
+
+Example:
+
+ (1 2 3 #*"abc def" 4 5 #"abc def")
+
+ --> (1 2 3 "abc" "def" 4 5 ("abc" "def"))
+
+The regular word list literal produced a single list object, but the splicing
+word list literal expanded into multiple string literal objects.
+
.SS String Quasiliterals
Quasiliterals are similar to string literals, except that they may