summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2017-03-27 22:29:52 -0700
committerKaz Kylheku <kaz@kylheku.com>2017-03-27 22:29:52 -0700
commit2a4764f3f1934248798531d9ba803a4a296eb5df (patch)
tree9a654ce17bf8c47efde0e202f7514b4f83597f88
parentc88835415a30ff1937adfc37de404cb5d5641dd2 (diff)
downloadtxr-2a4764f3f1934248798531d9ba803a4a296eb5df.tar.gz
txr-2a4764f3f1934248798531d9ba803a4a296eb5df.tar.bz2
txr-2a4764f3f1934248798531d9ba803a4a296eb5df.zip
Package prefix handling on directive symbols.
The directives which are involved in special phrase structure syntax like @(collect), @(end), @(and) and many others have always been a hack, recognized specially in the lexical analyzer and handled in the parser. The identifiers were not treated via the normal Lisp interning mechanism. In this patch, we try to make the illusion more complete and functional. Going forward, these symbols are understood as being interned in the usr package. As a special relaxation, keyword symbols may be used in their place, so that @(:end) is the same as @(end) and @(:collect) is the same as @(collect). Suppose that @(collect) is scanned, but the collect symbol interned in the current package isn't usr:collect, or keyword:collect. Then this is an error. Further, package prefixes may be used. The syntax @(abc:collect) is still valid and is still recognized as the head of the @(collect) phrase structure syntax. However, if abc:collect isn't the same symbol as either usr:collect or :collect, then an error is triggered. * parser.l (grammar): Recognize optional package prefixes on directive phrase structure identifiers. (directive_tok): Extract package prefix and symbol from lexeme. Implement the above described checks for all the cases. * txr.1: Added description of this under the Packages and Symbols section.
-rw-r--r--parser.l91
-rw-r--r--txr.1100
2 files changed, 161 insertions, 30 deletions
diff --git a/parser.l b/parser.l
index 7016f332..418ecc48 100644
--- a/parser.l
+++ b/parser.l
@@ -425,123 +425,123 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return SYMTOK;
}
-<SPECIAL>\({WS}all{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?all{WS}\) {
return directive_tok(yyscanner, ALL, 0);
}
-<SPECIAL>\({WS}some/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?some/{ID_END} {
return directive_tok(yyscanner, SOME, NESTED);
}
-<SPECIAL>\({WS}none{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?none{WS}\) {
return directive_tok(yyscanner, NONE, 0);
}
-<SPECIAL>\({WS}maybe{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?maybe{WS}\) {
return directive_tok(yyscanner, MAYBE, 0);
}
-<SPECIAL>\({WS}cases{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?cases{WS}\) {
return directive_tok(yyscanner, CASES, 0);
}
-<SPECIAL>\({WS}block/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?block/{ID_END} {
return directive_tok(yyscanner, BLOCK, NESTED);
}
-<SPECIAL>\({WS}choose/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?choose/{ID_END} {
return directive_tok(yyscanner, CHOOSE, NESTED);
}
-<SPECIAL>\({WS}gather/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?gather/{ID_END} {
return directive_tok(yyscanner, GATHER, NESTED);
}
-<SPECIAL>\({WS}and{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?and{WS}\) {
return directive_tok(yyscanner, AND, 0);
}
-<SPECIAL>\({WS}or{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?or{WS}\) {
return directive_tok(yyscanner, OR, 0);
}
-<SPECIAL>\({WS}end{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?end{WS}\) {
return directive_tok(yyscanner, END, 0);
}
-<SPECIAL>\({WS}collect/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?collect/{ID_END} {
return directive_tok(yyscanner, COLLECT, NESTED);
}
-<SPECIAL>\({WS}coll/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?coll/{ID_END} {
return directive_tok(yyscanner, COLL, NESTED);
}
-<SPECIAL>\({WS}until/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?until/{ID_END} {
return directive_tok(yyscanner, UNTIL, NESTED);
}
-<SPECIAL>\({WS}output/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?output/{ID_END} {
return directive_tok(yyscanner, OUTPUT, NESTED);
}
-<SPECIAL>\({WS}repeat/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?repeat/{ID_END} {
return directive_tok(yyscanner, REPEAT, NESTED);
}
-<SPECIAL>\({WS}rep/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?rep/{ID_END} {
return directive_tok(yyscanner, REP, NESTED);
}
-<SPECIAL>\({WS}single{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?single{WS}\) {
return directive_tok(yyscanner, SINGLE, 0);
}
-<SPECIAL>\({WS}first{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?first{WS}\) {
return directive_tok(yyscanner, FIRST, 0);
}
-<SPECIAL>\({WS}last/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?last/{ID_END} {
return directive_tok(yyscanner, LAST, NESTED);
}
-<SPECIAL>\({WS}empty{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?empty{WS}\) {
return directive_tok(yyscanner, EMPTY, 0);
}
-<SPECIAL>\({WS}mod/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?mod/{ID_END} {
return directive_tok(yyscanner, MOD, NESTED);
}
-<SPECIAL>\({WS}modlast/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?modlast/{ID_END} {
return directive_tok(yyscanner, MODLAST, NESTED);
}
-<SPECIAL>\({WS}define/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?define/{ID_END} {
return directive_tok(yyscanner, DEFINE, NESTED);
}
-<SPECIAL>\({WS}try{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?try{WS}\) {
return directive_tok(yyscanner, TRY, 0);
}
-<SPECIAL>\({WS}catch/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?catch/{ID_END} {
return directive_tok(yyscanner, CATCH, NESTED);
}
-<SPECIAL>\({WS}finally{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?finally{WS}\) {
return directive_tok(yyscanner, FINALLY, 0);
}
-<SPECIAL>\({WS}if/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?if/{ID_END} {
return directive_tok(yyscanner, IF, NESTED);
}
-<SPECIAL>\({WS}elif/{ID_END} {
+<SPECIAL>\({WS}({NT0}?:)?elif/{ID_END} {
return directive_tok(yyscanner, ELIF, NESTED);
}
-<SPECIAL>\({WS}else{WS}\) {
+<SPECIAL>\({WS}({NT0}?:)?else{WS}\) {
return directive_tok(yyscanner, ELSE, 0);
}
@@ -1020,6 +1020,37 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
static int directive_tok(scanner_t *yyscanner, int tok, int state)
{
struct yyguts_t *yyg = convert(struct yyguts_t *, yyscanner);
+ char *pstart = yytext + 1 + strspn(yytext + 1, " \t");
+ char *pcolon = strchr(pstart, ':');
+ char *pend = pstart + strspn(pstart, ":-abcdefghijklmnopqrstuvwxyz");
+
+ *pend = 0;
+
+ if (pcolon != 0) {
+ val pkgname = string_utf8((*pcolon = 0, pstart));
+ val package = if3(pstart[0], find_package(pkgname), keyword_package);
+ if (!package) {
+ yyerrprepf(yyg, lit("package ~a not found"), pkgname, nao);
+ tok = ERRTOK;
+ }
+ if (package != user_package && package != keyword_package) {
+ val sym = string_utf8(pcolon + 1);
+ yyerrprepf(yyg, lit("~a:~a: original usr package expected, not ~a"),
+ pkgname, sym, pkgname, nao);
+ tok = ERRTOK;
+ }
+ } else {
+ val symname = string_utf8(pstart);
+ val sym = intern_fallback(symname, cur_package);
+ val package = symbol_package(sym);
+
+ if (package != user_package && package != keyword_package) {
+ yyerrprepf(yyg, lit("~s: this is ~a:~s: not from the usr package"),
+ sym, package_name(package), sym, nao);
+ tok = ERRTOK;
+ }
+ }
+
if (state != 0)
yy_push_state(state, yyscanner);
else
diff --git a/txr.1 b/txr.1
index 6f03abcc..877ea80e 100644
--- a/txr.1
+++ b/txr.1
@@ -42165,6 +42165,106 @@ to be catenated with
(outside-macro "a") -> ;; error: + invalid operands "a" 42
.cble
+.NP* Packages and the Extraction Language
+The \*(TX extraction language has a syntax in which certain Lisp symbolic
+expressions denoting directives
+.code "@(collect ...)"
+or
+.code "@(end)"
+behave as if they were the tokens of a phrase structure. As a matter of
+implementation, these are processed specially in the parser and lexical
+analyzer, and are not read in the same way as ordinary Lisp forms.
+
+On the other hand, some directives are not this way. For instance the
+.codn "@(bind ...)" ,
+syntax is processed as a true Lisp expression, in which the
+.code bind
+token is subject to the usual rules for interning a symbol, sensitive to
+.code *package*
+in the usual way.
+
+The following notes describe the treatment of "special" directives that are
+involved in phrase structure syntax. It applies to all directives which head
+off a block that must be terminated by
+.codn "@(end)" ,
+all "punctuation" directives like
+.code "@(and)"
+or
+.code "@(end)"
+and all sub-phrase indicators like
+.code "@(last)"
+or
+.codn "@(elif)" .
+
+Firstly, each such directive may have a package prefix on its main symbol, yet
+is still recognized as the same token. That is to say,
+.code "@(foo:collect)"
+is still treated by the tokenizer and parser as the
+.code "@(collect)"
+token, regardless of the package prefix, and regardless of whether
+.code foo:end
+is the same symbol as the
+.code usr:end
+symbol.
+
+However, this doesn't mean that any
+.code foo:collect
+is allowed to denote the
+.code collect
+directive.
+
+A qualified symbol such as
+.code foo:collect
+must correspond to (be the same object as) precisely one of two symbols:
+either the same-named symbol in the
+.code usr
+package, or else the same-named symbol in the
+.code keyword
+package. If this condition isn't satisfied, the situation is a syntax
+error. Note that this check uses the original
+.code usr
+and
+.code keyword
+packages, not the packages which are currently named
+.str "usr"
+or
+.str "keyword"
+in the current
+.codn *package-alist* .
+
+A check is also performed for an unqualified symbol.
+An unqualified symbol like
+.code collect
+must also resolve, in the context of the current value of the
+.code *package*
+variable, to the same named-symbol in either the original
+.code usr
+or
+.code keyword
+package. Thus if the current package isn't
+.codn usr ,
+and
+.code "@(collect)"
+is being processed, the current package must be such that
+.code collect
+resolves to
+.codn usr:collect .
+either because that symbol is present in the current pack via
+import, or else visible via the fallback list.
+
+These rules are designed to approximate what the behavior would be
+if these directives were actually scanned as Lisp forms in the usual
+way and then recognized as phrase structure tokens according to
+the identity of their leading symbol. The additional restriction is added that
+that the directive symbol names are treated as reserved. If there exists a
+user-defined pattern function called
+.code mypackage:end
+it may not be invoked using the syntax
+.codn "@(mypackage:end)" ,
+which is erroneous; though it is invokable indirectly via the
+.code "@(call)"
+directive.
+
.NP* Package Library Conventions
Various functions in the package and symbol area of the library have a
.meta package