diff options
-rw-r--r-- | parser.l | 91 | ||||
-rw-r--r-- | txr.1 | 100 |
2 files changed, 161 insertions, 30 deletions
@@ -425,123 +425,123 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return SYMTOK; } -<SPECIAL>\({WS}all{WS}\) { +<SPECIAL>\({WS}({NT0}?:)?all{WS}\) { return directive_tok(yyscanner, ALL, 0); } -<SPECIAL>\({WS}some/{ID_END} { +<SPECIAL>\({WS}({NT0}?:)?some/{ID_END} { return directive_tok(yyscanner, SOME, NESTED); } -<SPECIAL>\({WS}none{WS}\) { +<SPECIAL>\({WS}({NT0}?:)?none{WS}\) { return directive_tok(yyscanner, NONE, 0); } -<SPECIAL>\({WS}maybe{WS}\) { +<SPECIAL>\({WS}({NT0}?:)?maybe{WS}\) { return directive_tok(yyscanner, MAYBE, 0); } -<SPECIAL>\({WS}cases{WS}\) { +<SPECIAL>\({WS}({NT0}?:)?cases{WS}\) { return directive_tok(yyscanner, CASES, 0); } -<SPECIAL>\({WS}block/{ID_END} { +<SPECIAL>\({WS}({NT0}?:)?block/{ID_END} { return directive_tok(yyscanner, BLOCK, NESTED); } -<SPECIAL>\({WS}choose/{ID_END} { +<SPECIAL>\({WS}({NT0}?:)?choose/{ID_END} { return directive_tok(yyscanner, CHOOSE, NESTED); } -<SPECIAL>\({WS}gather/{ID_END} { +<SPECIAL>\({WS}({NT0}?:)?gather/{ID_END} { return directive_tok(yyscanner, GATHER, NESTED); } -<SPECIAL>\({WS}and{WS}\) { +<SPECIAL>\({WS}({NT0}?:)?and{WS}\) { return directive_tok(yyscanner, AND, 0); } -<SPECIAL>\({WS}or{WS}\) { +<SPECIAL>\({WS}({NT0}?:)?or{WS}\) { return directive_tok(yyscanner, OR, 0); } -<SPECIAL>\({WS}end{WS}\) { +<SPECIAL>\({WS}({NT0}?:)?end{WS}\) { return directive_tok(yyscanner, END, 0); } -<SPECIAL>\({WS}collect/{ID_END} { +<SPECIAL>\({WS}({NT0}?:)?collect/{ID_END} { return directive_tok(yyscanner, COLLECT, NESTED); } -<SPECIAL>\({WS}coll/{ID_END} { +<SPECIAL>\({WS}({NT0}?:)?coll/{ID_END} { return directive_tok(yyscanner, COLL, NESTED); } -<SPECIAL>\({WS}until/{ID_END} { +<SPECIAL>\({WS}({NT0}?:)?until/{ID_END} { return directive_tok(yyscanner, UNTIL, NESTED); } -<SPECIAL>\({WS}output/{ID_END} { +<SPECIAL>\({WS}({NT0}?:)?output/{ID_END} { return directive_tok(yyscanner, OUTPUT, NESTED); } -<SPECIAL>\({WS}repeat/{ID_END} { +<SPECIAL>\({WS}({NT0}?:)?repeat/{ID_END} { return directive_tok(yyscanner, REPEAT, NESTED); } -<SPECIAL>\({WS}rep/{ID_END} { +<SPECIAL>\({WS}({NT0}?:)?rep/{ID_END} { return directive_tok(yyscanner, REP, NESTED); } -<SPECIAL>\({WS}single{WS}\) { +<SPECIAL>\({WS}({NT0}?:)?single{WS}\) { return directive_tok(yyscanner, SINGLE, 0); } -<SPECIAL>\({WS}first{WS}\) { +<SPECIAL>\({WS}({NT0}?:)?first{WS}\) { return directive_tok(yyscanner, FIRST, 0); } -<SPECIAL>\({WS}last/{ID_END} { +<SPECIAL>\({WS}({NT0}?:)?last/{ID_END} { return directive_tok(yyscanner, LAST, NESTED); } -<SPECIAL>\({WS}empty{WS}\) { +<SPECIAL>\({WS}({NT0}?:)?empty{WS}\) { return directive_tok(yyscanner, EMPTY, 0); } -<SPECIAL>\({WS}mod/{ID_END} { +<SPECIAL>\({WS}({NT0}?:)?mod/{ID_END} { return directive_tok(yyscanner, MOD, NESTED); } -<SPECIAL>\({WS}modlast/{ID_END} { +<SPECIAL>\({WS}({NT0}?:)?modlast/{ID_END} { return directive_tok(yyscanner, MODLAST, NESTED); } -<SPECIAL>\({WS}define/{ID_END} { +<SPECIAL>\({WS}({NT0}?:)?define/{ID_END} { return directive_tok(yyscanner, DEFINE, NESTED); } -<SPECIAL>\({WS}try{WS}\) { +<SPECIAL>\({WS}({NT0}?:)?try{WS}\) { return directive_tok(yyscanner, TRY, 0); } -<SPECIAL>\({WS}catch/{ID_END} { +<SPECIAL>\({WS}({NT0}?:)?catch/{ID_END} { return directive_tok(yyscanner, CATCH, NESTED); } -<SPECIAL>\({WS}finally{WS}\) { +<SPECIAL>\({WS}({NT0}?:)?finally{WS}\) { return directive_tok(yyscanner, FINALLY, 0); } -<SPECIAL>\({WS}if/{ID_END} { +<SPECIAL>\({WS}({NT0}?:)?if/{ID_END} { return directive_tok(yyscanner, IF, NESTED); } -<SPECIAL>\({WS}elif/{ID_END} { +<SPECIAL>\({WS}({NT0}?:)?elif/{ID_END} { return directive_tok(yyscanner, ELIF, NESTED); } -<SPECIAL>\({WS}else{WS}\) { +<SPECIAL>\({WS}({NT0}?:)?else{WS}\) { return directive_tok(yyscanner, ELSE, 0); } @@ -1020,6 +1020,37 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} static int directive_tok(scanner_t *yyscanner, int tok, int state) { struct yyguts_t *yyg = convert(struct yyguts_t *, yyscanner); + char *pstart = yytext + 1 + strspn(yytext + 1, " \t"); + char *pcolon = strchr(pstart, ':'); + char *pend = pstart + strspn(pstart, ":-abcdefghijklmnopqrstuvwxyz"); + + *pend = 0; + + if (pcolon != 0) { + val pkgname = string_utf8((*pcolon = 0, pstart)); + val package = if3(pstart[0], find_package(pkgname), keyword_package); + if (!package) { + yyerrprepf(yyg, lit("package ~a not found"), pkgname, nao); + tok = ERRTOK; + } + if (package != user_package && package != keyword_package) { + val sym = string_utf8(pcolon + 1); + yyerrprepf(yyg, lit("~a:~a: original usr package expected, not ~a"), + pkgname, sym, pkgname, nao); + tok = ERRTOK; + } + } else { + val symname = string_utf8(pstart); + val sym = intern_fallback(symname, cur_package); + val package = symbol_package(sym); + + if (package != user_package && package != keyword_package) { + yyerrprepf(yyg, lit("~s: this is ~a:~s: not from the usr package"), + sym, package_name(package), sym, nao); + tok = ERRTOK; + } + } + if (state != 0) yy_push_state(state, yyscanner); else @@ -42165,6 +42165,106 @@ to be catenated with (outside-macro "a") -> ;; error: + invalid operands "a" 42 .cble +.NP* Packages and the Extraction Language +The \*(TX extraction language has a syntax in which certain Lisp symbolic +expressions denoting directives +.code "@(collect ...)" +or +.code "@(end)" +behave as if they were the tokens of a phrase structure. As a matter of +implementation, these are processed specially in the parser and lexical +analyzer, and are not read in the same way as ordinary Lisp forms. + +On the other hand, some directives are not this way. For instance the +.codn "@(bind ...)" , +syntax is processed as a true Lisp expression, in which the +.code bind +token is subject to the usual rules for interning a symbol, sensitive to +.code *package* +in the usual way. + +The following notes describe the treatment of "special" directives that are +involved in phrase structure syntax. It applies to all directives which head +off a block that must be terminated by +.codn "@(end)" , +all "punctuation" directives like +.code "@(and)" +or +.code "@(end)" +and all sub-phrase indicators like +.code "@(last)" +or +.codn "@(elif)" . + +Firstly, each such directive may have a package prefix on its main symbol, yet +is still recognized as the same token. That is to say, +.code "@(foo:collect)" +is still treated by the tokenizer and parser as the +.code "@(collect)" +token, regardless of the package prefix, and regardless of whether +.code foo:end +is the same symbol as the +.code usr:end +symbol. + +However, this doesn't mean that any +.code foo:collect +is allowed to denote the +.code collect +directive. + +A qualified symbol such as +.code foo:collect +must correspond to (be the same object as) precisely one of two symbols: +either the same-named symbol in the +.code usr +package, or else the same-named symbol in the +.code keyword +package. If this condition isn't satisfied, the situation is a syntax +error. Note that this check uses the original +.code usr +and +.code keyword +packages, not the packages which are currently named +.str "usr" +or +.str "keyword" +in the current +.codn *package-alist* . + +A check is also performed for an unqualified symbol. +An unqualified symbol like +.code collect +must also resolve, in the context of the current value of the +.code *package* +variable, to the same named-symbol in either the original +.code usr +or +.code keyword +package. Thus if the current package isn't +.codn usr , +and +.code "@(collect)" +is being processed, the current package must be such that +.code collect +resolves to +.codn usr:collect . +either because that symbol is present in the current pack via +import, or else visible via the fallback list. + +These rules are designed to approximate what the behavior would be +if these directives were actually scanned as Lisp forms in the usual +way and then recognized as phrase structure tokens according to +the identity of their leading symbol. The additional restriction is added that +that the directive symbol names are treated as reserved. If there exists a +user-defined pattern function called +.code mypackage:end +it may not be invoked using the syntax +.codn "@(mypackage:end)" , +which is erroneous; though it is invokable indirectly via the +.code "@(call)" +directive. + .NP* Package Library Conventions Various functions in the package and symbol area of the library have a .meta package |