summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--parser.l91
-rw-r--r--txr.1100
2 files changed, 161 insertions, 30 deletions
diff --git a/parser.l b/parser.l
index 7016f332..418ecc48 100644
--- a/parser.l
+++ b/parser.l
@@ -425,123 +425,123 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return SYMTOK;
}
-<SPECIAL>\({WS}all{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?all{WS}\) {
return directive_tok(yyscanner, ALL, 0);
}
-<SPECIAL>\({WS}some/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?some/{ID_END} {
return directive_tok(yyscanner, SOME, NESTED);
}
-<SPECIAL>\({WS}none{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?none{WS}\) {
return directive_tok(yyscanner, NONE, 0);
}
-<SPECIAL>\({WS}maybe{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?maybe{WS}\) {
return directive_tok(yyscanner, MAYBE, 0);
}
-<SPECIAL>\({WS}cases{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?cases{WS}\) {
return directive_tok(yyscanner, CASES, 0);
}
-<SPECIAL>\({WS}block/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?block/{ID_END} {
return directive_tok(yyscanner, BLOCK, NESTED);
}
-<SPECIAL>\({WS}choose/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?choose/{ID_END} {
return directive_tok(yyscanner, CHOOSE, NESTED);
}
-<SPECIAL>\({WS}gather/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?gather/{ID_END} {
return directive_tok(yyscanner, GATHER, NESTED);
}
-<SPECIAL>\({WS}and{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?and{WS}\) {
return directive_tok(yyscanner, AND, 0);
}
-<SPECIAL>\({WS}or{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?or{WS}\) {
return directive_tok(yyscanner, OR, 0);
}
-<SPECIAL>\({WS}end{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?end{WS}\) {
return directive_tok(yyscanner, END, 0);
}
-<SPECIAL>\({WS}collect/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?collect/{ID_END} {
return directive_tok(yyscanner, COLLECT, NESTED);
}
-<SPECIAL>\({WS}coll/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?coll/{ID_END} {
return directive_tok(yyscanner, COLL, NESTED);
}
-<SPECIAL>\({WS}until/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?until/{ID_END} {
return directive_tok(yyscanner, UNTIL, NESTED);
}
-<SPECIAL>\({WS}output/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?output/{ID_END} {
return directive_tok(yyscanner, OUTPUT, NESTED);
}
-<SPECIAL>\({WS}repeat/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?repeat/{ID_END} {
return directive_tok(yyscanner, REPEAT, NESTED);
}
-<SPECIAL>\({WS}rep/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?rep/{ID_END} {
return directive_tok(yyscanner, REP, NESTED);
}
-<SPECIAL>\({WS}single{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?single{WS}\) {
return directive_tok(yyscanner, SINGLE, 0);
}
-<SPECIAL>\({WS}first{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?first{WS}\) {
return directive_tok(yyscanner, FIRST, 0);
}
-<SPECIAL>\({WS}last/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?last/{ID_END} {
return directive_tok(yyscanner, LAST, NESTED);
}
-<SPECIAL>\({WS}empty{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?empty{WS}\) {
return directive_tok(yyscanner, EMPTY, 0);
}
-<SPECIAL>\({WS}mod/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?mod/{ID_END} {
return directive_tok(yyscanner, MOD, NESTED);
}
-<SPECIAL>\({WS}modlast/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?modlast/{ID_END} {
return directive_tok(yyscanner, MODLAST, NESTED);
}
-<SPECIAL>\({WS}define/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?define/{ID_END} {
return directive_tok(yyscanner, DEFINE, NESTED);
}
-<SPECIAL>\({WS}try{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?try{WS}\) {
return directive_tok(yyscanner, TRY, 0);
}
-<SPECIAL>\({WS}catch/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?catch/{ID_END} {
return directive_tok(yyscanner, CATCH, NESTED);
}
-<SPECIAL>\({WS}finally{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?finally{WS}\) {
return directive_tok(yyscanner, FINALLY, 0);
}
-<SPECIAL>\({WS}if/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?if/{ID_END} {
return directive_tok(yyscanner, IF, NESTED);
}
-<SPECIAL>\({WS}elif/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?elif/{ID_END} {
return directive_tok(yyscanner, ELIF, NESTED);
}
-<SPECIAL>\({WS}else{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?else{WS}\) {
return directive_tok(yyscanner, ELSE, 0);
}
@@ -1020,6 +1020,37 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
static int directive_tok(scanner_t *yyscanner, int tok, int state)
{
struct yyguts_t *yyg = convert(struct yyguts_t *, yyscanner);
+ char *pstart = yytext + 1 + strspn(yytext + 1, " \t");
+ char *pcolon = strchr(pstart, ':');
+ char *pend = pstart + strspn(pstart, ":-abcdefghijklmnopqrstuvwxyz");
+
+ *pend = 0;
+
+ if (pcolon != 0) {
+ val pkgname = string_utf8((*pcolon = 0, pstart));
+ val package = if3(pstart[0], find_package(pkgname), keyword_package);
+ if (!package) {
+ yyerrprepf(yyg, lit("package ~a not found"), pkgname, nao);
+ tok = ERRTOK;
+ }
+ if (package != user_package && package != keyword_package) {
+ val sym = string_utf8(pcolon + 1);
+ yyerrprepf(yyg, lit("~a:~a: original usr package expected, not ~a"),
+ pkgname, sym, pkgname, nao);
+ tok = ERRTOK;
+ }
+ } else {
+ val symname = string_utf8(pstart);
+ val sym = intern_fallback(symname, cur_package);
+ val package = symbol_package(sym);
+
+ if (package != user_package && package != keyword_package) {
+ yyerrprepf(yyg, lit("~s: this is ~a:~s: not from the usr package"),
+ sym, package_name(package), sym, nao);
+ tok = ERRTOK;
+ }
+ }
+
if (state != 0)
yy_push_state(state, yyscanner);
else
diff --git a/txr.1 b/txr.1
index 6f03abcc..877ea80e 100644
--- a/txr.1
+++ b/txr.1
@@ -42165,6 +42165,106 @@ to be catenated with
(outside-macro "a") -> ;; error: + invalid operands "a" 42
.cble
+.NP* Packages and the Extraction Language
+The \*(TX extraction language has a syntax in which certain Lisp symbolic
+expressions denoting directives
+.code "@(collect ...)"
+or
+.code "@(end)"
+behave as if they were the tokens of a phrase structure. As a matter of
+implementation, these are processed specially in the parser and lexical
+analyzer, and are not read in the same way as ordinary Lisp forms.
+
+On the other hand, some directives are not this way. For instance the
+.codn "@(bind ...)" ,
+syntax is processed as a true Lisp expression, in which the
+.code bind
+token is subject to the usual rules for interning a symbol, sensitive to
+.code *package*
+in the usual way.
+
+The following notes describe the treatment of "special" directives that are
+involved in phrase structure syntax. It applies to all directives which head
+off a block that must be terminated by
+.codn "@(end)" ,
+all "punctuation" directives like
+.code "@(and)"
+or
+.code "@(end)"
+and all sub-phrase indicators like
+.code "@(last)"
+or
+.codn "@(elif)" .
+
+Firstly, each such directive may have a package prefix on its main symbol, yet
+is still recognized as the same token. That is to say,
+.code "@(foo:collect)"
+is still treated by the tokenizer and parser as the
+.code "@(collect)"
+token, regardless of the package prefix, and regardless of whether
+.code foo:end
+is the same symbol as the
+.code usr:end
+symbol.
+
+However, this doesn't mean that any
+.code foo:collect
+is allowed to denote the
+.code collect
+directive.
+
+A qualified symbol such as
+.code foo:collect
+must correspond to (be the same object as) precisely one of two symbols:
+either the same-named symbol in the
+.code usr
+package, or else the same-named symbol in the
+.code keyword
+package. If this condition isn't satisfied, the situation is a syntax
+error. Note that this check uses the original
+.code usr
+and
+.code keyword
+packages, not the packages which are currently named
+.str "usr"
+or
+.str "keyword"
+in the current
+.codn *package-alist* .
+
+A check is also performed for an unqualified symbol.
+An unqualified symbol like
+.code collect
+must also resolve, in the context of the current value of the
+.code *package*
+variable, to the same named-symbol in either the original
+.code usr
+or
+.code keyword
+package. Thus if the current package isn't
+.codn usr ,
+and
+.code "@(collect)"
+is being processed, the current package must be such that
+.code collect
+resolves to
+.codn usr:collect .
+either because that symbol is present in the current pack via
+import, or else visible via the fallback list.
+
+These rules are designed to approximate what the behavior would be
+if these directives were actually scanned as Lisp forms in the usual
+way and then recognized as phrase structure tokens according to
+the identity of their leading symbol. The additional restriction is added that
+that the directive symbol names are treated as reserved. If there exists a
+user-defined pattern function called
+.code mypackage:end
+it may not be invoked using the syntax
+.codn "@(mypackage:end)" ,
+which is erroneous; though it is invokable indirectly via the
+.code "@(call)"
+directive.
+
.NP* Package Library Conventions
Various functions in the package and symbol area of the library have a
.meta package