From 030ce483baef93392d54a4aec90bfa7b5906bc53 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Sun, 15 Dec 2013 00:17:39 -0800 Subject: Changing the tokenizer to get rid of IDENT, KEYWORD and METAVAR token categories, replaced by a single one called SYMTOK. Package prefixes are now recognized and processed in tokens. * lib.c (delete_package): Fix problem in no-such-package error case: it would always report nil as the name. (intern): Fix nonsensical error message: in the no-such-package case it would report that the symbol exists already. * parser.l (grammar): Occurences of KEYWORD, METAVAR, and IDENT scrubbed. All rules reporting any of these now return SYMTOK. The main one of these is greatly simplified. * parser.y (sym_helper): New function. (char_from_name): const qualifier inside param's type declaration. (grammar): IDENT, KEYWORD and METAVAR tokens are gone. New token SYMTOK. Grammar refactored around SYMTOK and using the new sym_helper function. (char_from_name): Updated. --- ChangeLog | 22 ++++++++++++ lib.c | 7 ++-- parser.l | 19 +++-------- parser.y | 113 ++++++++++++++++++++++++++++++++++++++++---------------------- 4 files changed, 105 insertions(+), 56 deletions(-) diff --git a/ChangeLog b/ChangeLog index a75d44ba..96962b7a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,25 @@ +2013-12-15 Kaz Kylheku + + Changing the tokenizer to get rid of IDENT, KEYWORD and METAVAR + token categories, replaced by a single one called SYMTOK. + Package prefixes are now recognized and processed in tokens. + + * lib.c (delete_package): Fix problem in no-such-package + error case: it would always report nil as the name. + (intern): Fix nonsensical error message: in the no-such-package case it + would report that the symbol exists already. + + * parser.l (grammar): Occurences of KEYWORD, METAVAR, and IDENT + scrubbed. All rules reporting any of these now return + SYMTOK. The main one of these is greatly simplified. + + * parser.y (sym_helper): New function. + (char_from_name): const qualifier inside param's type declaration. + (grammar): IDENT, KEYWORD and METAVAR tokens are gone. + New token SYMTOK. Grammar refactored around SYMTOK and using + the new sym_helper function. + (char_from_name): Updated. + 2013-12-14 Kaz Kylheku Support for parsing Lisp expression out of strings and streams. diff --git a/lib.c b/lib.c index 245c864f..e95d72f8 100644 --- a/lib.c +++ b/lib.c @@ -2542,9 +2542,10 @@ val find_package(val name) val delete_package(val package) { if (stringp(package)) { - package = find_package(package); - if (!package) + val p = find_package(package); + if (!p) uw_throwf(error_s, lit("delete-package: no such package: ~s"), package, nao); + package = p; } type_check (package, PKG); @@ -2562,7 +2563,7 @@ val intern(val str, val package) } else if (stringp(package)) { package = find_package(str); if (!package) - uw_throwf(error_s, lit("intern: symbol ~s exists already"), str, nao); + uw_throwf(error_s, lit("intern: ~s no such package"), str, nao); } type_check (package, PKG); diff --git a/parser.l b/parser.l index 78e29408..ef92db03 100644 --- a/parser.l +++ b/parser.l @@ -260,17 +260,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} || yy_top_state() == QSILIT) yy_pop_state(); - switch (yytext[0]) { - case ':': - yylval.lexeme = utf8_dup_from(yytext + 1); - return KEYWORD; - case '@': - yylval.lexeme = utf8_dup_from(yytext + 1); - return METAVAR; - default: - yylval.lexeme = utf8_dup_from(yytext); - return IDENT; - } + yylval.lexeme = utf8_dup_from(yytext); + return SYMTOK; } : { @@ -278,7 +269,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} || yy_top_state() == QSILIT) yy_pop_state(); yylval.lexeme = utf8_dup_from(""); - return KEYWORD; + return SYMTOK; } \({WS}all{WS}\) { @@ -732,12 +723,12 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} {SYM} { yylval.lexeme = utf8_dup_from(yytext); - return IDENT; + return SYMTOK; } [^ \t\n] { yylval.lexeme = utf8_dup_from(yytext); - return IDENT; /* hack */ + return SYMTOK; /* hack */ } \n { diff --git a/parser.y b/parser.y index 8ff5f3cb..928ae457 100644 --- a/parser.y +++ b/parser.y @@ -48,6 +48,7 @@ int yylex(void); void yyerror(const char *); +static val sym_helper(wchar_t *lexeme, val meta_allowed); static val repeat_rep_helper(val sym, val args, val main, val parts); static val o_elems_transform(val output_form); static val define_transform(val define_form); @@ -55,7 +56,7 @@ static val lit_char_helper(val litchars); static val optimize_text(val text_form); static val unquotes_occur(val quoted_form); static val choose_quote(val quoted_form); -static wchar_t char_from_name(wchar_t *name); +static wchar_t char_from_name(const wchar_t *name); static val parsed_spec; @@ -68,7 +69,7 @@ static val parsed_spec; cnum lineno; } -%token SPACE TEXT IDENT KEYWORD METAVAR +%token SPACE TEXT SYMTOK %token ALL SOME NONE MAYBE CASES BLOCK CHOOSE GATHER %token AND OR END COLLECT %token UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY @@ -100,7 +101,7 @@ static val parsed_spec; %type '(' '[' %nonassoc LOW /* used for precedence assertion */ -%right IDENT '{' '}' +%right SYMTOK '{' '}' %right ALL SOME NONE MAYBE CASES CHOOSE AND OR END COLLECT UNTIL COLL %right OUTPUT REPEAT REP FIRST LAST EMPTY DEFINE %right SPACE TEXT NUMBER @@ -617,35 +618,34 @@ rep_parts_opt : SINGLE o_elems_opt /* This sucks, but factoring '*' into a nonterminal * that generates an empty phrase causes reduce/reduce conflicts. */ -var : IDENT { $$ = list(var_s, intern(string_own($1), nil), - nao); } - | IDENT elem { $$ = list(var_s, intern(string_own($1), nil), +var : SYMTOK { $$ = list(var_s, sym_helper($1, nil), nao); } + | SYMTOK elem { $$ = list(var_s, sym_helper($1, nil), $2, nao); } - | '{' IDENT '}' { $$ = list(var_s, intern(string_own($2), nil), - nao); } - | '{' IDENT '}' elem { $$ = list(var_s, intern(string_own($2), nil), + | '{' SYMTOK '}' { $$ = list(var_s, sym_helper($2, nil), nao); } + | '{' SYMTOK '}' elem { $$ = list(var_s, sym_helper($2, nil), $4, nao); } - | '{' IDENT modifiers '}' { $$ = list(var_s, intern(string_own($2), nil), + | '{' SYMTOK modifiers '}' { $$ = list(var_s, sym_helper($2, nil), nil, $3, nao); } - | '{' IDENT modifiers '}' elem - { $$ = list(var_s, intern(string_own($2), nil), + | '{' SYMTOK modifiers '}' elem + { $$ = list(var_s, sym_helper($2, nil), $5, $3, nao); } - | var_op IDENT { $$ = list(var_s, intern(string_own($2), nil), + | var_op SYMTOK { $$ = list(var_s, sym_helper($2, nil), nil, $1, nao); } - | var_op IDENT elem { $$ = list(var_s, intern(string_own($2), nil), + | var_op SYMTOK elem { $$ = list(var_s, sym_helper($2, nil), $3, $1, nao); } - | var_op '{' IDENT '}' { $$ = list(var_s, intern(string_own($3), nil), + | var_op '{' SYMTOK '}' { $$ = list(var_s, sym_helper($3, nil), nil, $1, nao); } - | var_op '{' IDENT '}' elem { $$ = list(var_s, intern(string_own($3), nil), + | var_op '{' SYMTOK '}' elem + { $$ = list(var_s, sym_helper($3, nil), $5, $1, nao); } - | var_op '{' IDENT regex '}' { $$ = nil; + | var_op '{' SYMTOK regex '}' { $$ = nil; yyerror("longest match " "not useable with regex"); } - | var_op '{' IDENT NUMBER '}' { $$ = nil; + | var_op '{' SYMTOK NUMBER '}' { $$ = nil; yyerror("longest match " "not useable with " "fixed width match"); } - | IDENT error { $$ = nil; + | SYMTOK error { $$ = nil; yybadtoken(yychar, lit("variable spec")); } | var_op error { $$ = nil; yybadtoken(yychar, lit("variable spec")); } @@ -661,15 +661,14 @@ modifiers : NUMBER { $$ = cons($1, nil); } | list { $$ = cons($1, nil); } ; -o_var : IDENT { $$ = list(var_s, intern(string_own($1), nil), - nao); } - | IDENT o_elem { $$ = list(var_s, intern(string_own($1), nil), +o_var : SYMTOK { $$ = list(var_s, sym_helper($1, nil), nao); } + | SYMTOK o_elem { $$ = list(var_s, sym_helper($1, nil), $2, nao); } | '{' expr exprs_opt '}' { $$ = list(var_s, $2, nil, $3, nao); } | '{' expr exprs_opt '}' o_elem { $$ = list(var_s, $2, $5, $3, nao); } - | IDENT error { $$ = nil; + | SYMTOK error { $$ = nil; yybadtoken(yychar, lit("variable spec")); } ; @@ -730,14 +729,7 @@ exprs_opt : exprs { $$ = $1; } | /* empty */ { $$ = nil; } ; -expr : IDENT { $$ = rl(intern(string_own($1), nil), - num(lineno)); } - | KEYWORD { $$ = rl(intern(string_own($1), - keyword_package), - num(lineno)); } - | METAVAR { $$ = list(var_s, - intern(string_own($1), nil), nao); - rl($$, num(lineno)); } +expr : SYMTOK { $$ = rl(sym_helper($1, t), num(lineno)); } | METANUM { $$ = cons(var_s, cons($1, nil)); rl($$, num(lineno)); } | NUMBER { $$ = $1; } @@ -860,12 +852,14 @@ strlit : '"' '"' { $$ = null_string; } yybadtoken(yychar, lit("string literal")); } ; -chrlit : HASH_BACKSLASH IDENT { wchar_t ch; +chrlit : HASH_BACKSLASH SYMTOK { wchar_t ch; val str = string_own($2); - if ($2[1] == 0) - { ch = $2[0]; } + const wchar_t *cstr = c_str(str); + + if (cstr[1] == 0) + { ch = cstr[0]; } else - { ch = char_from_name($2); + { ch = char_from_name(cstr); if (ch == L'!') { yyerrorf(lit("unknown character name: ~a"), str, nao); }} @@ -903,6 +897,49 @@ litchars : LITCHAR { $$ = cons(chr($1), nil); } %% +static val sym_helper(wchar_t *lexeme, val meta_allowed) +{ + int leading_at = *lexeme == L'@'; + wchar_t *tokfree = lexeme; + wchar_t *colon = wcschr(lexeme, L':'); + val sym_name = nil; + val package = nil; + val sym; + + if (leading_at) { + if (!meta_allowed) { + val tok = string_own(lexeme); + yyerrorf(lit("~a: meta variable not allowed in this context"), tok, nao); + return nil; + } + lexeme++; + } + + if (colon != 0) + *colon = 0; + + if (colon == lexeme) { + package = keyword_package; + sym_name = string(colon + 1); + free(tokfree); + } else if (colon != 0) { + package = string(lexeme); + sym_name = string(colon + 1); + free(tokfree); + if (!package) { + yyerrorf(lit("~a:~a: package ~a not found"), package, sym_name, package, nao); + return nil; + } + } else { + sym_name = string(lexeme); + free(tokfree); + } + + sym = intern(sym_name, package); + + return leading_at ? list(var_s, sym, nao) : sym; +} + static val repeat_rep_helper(val sym, val args, val main, val parts) { uses_or2; @@ -1076,7 +1113,7 @@ val rlset(val form, val info) return form; } -static wchar_t char_from_name(wchar_t *name) +static wchar_t char_from_name(const wchar_t *name) { static struct { const wchar_t *name; @@ -1123,9 +1160,7 @@ void yybadtoken(int tok, val context) return; case SPACE: problem = lit("space"); break; case TEXT: problem = lit("text"); break; - case IDENT: problem = lit("identifier"); break; - case KEYWORD: problem = lit("keyword"); break; - case METAVAR: problem = lit("metavar"); break; + case SYMTOK: problem = lit("symbol-token"); break; case METANUM: problem = lit("metanum"); break; case ALL: problem = lit("\"all\""); break; case SOME: problem = lit("\"some\""); break; -- cgit v1.2.3