diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2017-05-30 23:07:32 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2017-05-30 23:07:32 -0700 |
commit | 1bf965ceb66aa0ebc0404df306b6563253af9f1a (patch) | |
tree | f825d9b58a109e689714caebe1077deef954be4f | |
parent | c91af9a17b0533c6df846ff712e7ade306c7b38a (diff) | |
download | txr-1bf965ceb66aa0ebc0404df306b6563253af9f1a.tar.gz txr-1bf965ceb66aa0ebc0404df306b6563253af9f1a.tar.bz2 txr-1bf965ceb66aa0ebc0404df306b6563253af9f1a.zip |
Refactoring hash bang support; hash bang null hack.
The hash bang mechanism is handled in one place,
and disentangled from all parsing logic.
It is also endowed with special powers.
* eval.c (load): Pass one less argument to read_eval_stream.
* match.c (v_load): Likewise.
* parser.c (read_eval_stream): hash_bang_support Boolean
argument removed. Hash bang logic removed.
(load_rcfile): Pass only two arguments to read_eval_stream.
* parser.h (read_eval_stream): Declaration updated.
* txr.c (remove_hash_bang_line): Function removed.
(check_hash_bang): New static function.
(txr_main): Recognize the script file name while still
inside the argument processing loop. Open the file,
and check for a hash bang line, doing the special
processing which can generate more arguments from material
after a null byte in the hash bang line. The parse_stream
variable is now initialized to nil and doubles as a Boolean
indicating whether a stream has been opened. After the
loop, we remove the script file from the arguments, if we
have an open stream and the spec_file_str matches.
read_eval_stream is called only with two arguments.
* txr.1: Revised existing documentation and described
new features.
-rw-r--r-- | eval.c | 2 | ||||
-rw-r--r-- | match.c | 2 | ||||
-rw-r--r-- | parser.c | 14 | ||||
-rw-r--r-- | parser.h | 2 | ||||
-rw-r--r-- | txr.1 | 186 | ||||
-rw-r--r-- | txr.c | 129 |
6 files changed, 252 insertions, 83 deletions
@@ -3897,7 +3897,7 @@ val load(val target) env_vbind(dyn_env, load_recursive_s, t); env_vbind(dyn_env, package_s, cur_package); - if (!read_eval_stream(stream, std_error, nil)) { + if (!read_eval_stream(stream, std_error)) { close_stream(stream, nil); uw_throwf(error_s, lit("load: ~a contains errors"), path, nao); } @@ -4276,7 +4276,7 @@ static val v_load(match_files_ctx *c) } } } else { - if (!read_eval_stream(stream, std_error, nil)){ + if (!read_eval_stream(stream, std_error)){ close_stream(stream, nil); sem_error(specline, lit("load: ~a contains errors"), path, nao); } @@ -577,21 +577,11 @@ val iread(val source_in, val error_stream, val error_return_val, name_in, lineno); } -val read_eval_stream(val stream, val error_stream, val hash_bang_support) +val read_eval_stream(val stream, val error_stream) { val error_val = gensym(nil); val name = stream_get_prop(stream, name_k); - if (hash_bang_support) { - val firstline = get_line(stream); - - if (firstline && !match_str(firstline, lit("#!"), nil)) { - val flwnl = scat(nil, firstline, lit("\n"), nao); - val string_stream = make_string_byte_input_stream(flwnl); - stream = make_catenated_stream(list(string_stream, stream, nao)); - } - } - for (;;) { val form = lisp_parse(stream, error_stream, error_val, name, colon_k); val parser = get_parser(stream); @@ -639,7 +629,7 @@ static void load_rcfile(val name) } else { val saved_dyn_env = set_dyn_env(make_env(nil, nil, dyn_env)); env_vbind(dyn_env, load_path_s, resolved_name); - read_eval_stream(stream, std_output, nil); + read_eval_stream(stream, std_output); dyn_env = saved_dyn_env; } } @@ -116,7 +116,7 @@ val lisp_parse(val source_in, val error_stream, val error_return_val, val name_in, val lineno); val iread(val source_in, val error_stream, val error_return_val, val name_in, val lineno); -val read_eval_stream(val stream, val error_stream, val hash_bang_support); +val read_eval_stream(val stream, val error_stream); #if HAVE_TERMIOS val repl(val bindings, val in_stream, val out_stream); #endif @@ -1023,13 +1023,24 @@ character can be used. This is an obsolescent feature. \*(TX has several features which support use of the "hash bang" convention for creating apparently stand-alone executable programs. -If the first line of a query begins with the characters +.NP* Basic Hash Bang +Special processing is applied to \*(TX query or \*(TL script files that are +specified on the command line via the +.code -f +option or as the first non-option argument. If the first line of such +a file begins with the characters .codn #! , -that entire line is deleted from the query. This allows +that entire line is consumed and processed specially. + +This removal for \*(TX queries to be turned into standalone executable programs in the POSIX -environment. +environment using the "hash bang" mechanism. Unlike most interpreters, +\*(TX applies special processing to the +.code #! +line, which is described below, in the section +.BR "Argument Generation with the Null Hack" . -Shell example: create a simple executable program called +Shell session example: create a simple executable program called .str "twoline.txr" and run it. This assumes \*(TX is installed in @@ -1088,6 +1099,7 @@ The .code -B option is honored. +.coNP Argument Generation with @ --args and @ --eargs On some operating systems, it is not possible to pass more than one argument through the hash bang mechanism. That is to say, this will not work. @@ -1211,6 +1223,172 @@ script name is inserted anywhere among them, possibly multiple times. Arguments for the interpreter can be encoded, as well as arguments to be processed by the script. +.coNP Argument Generation with the Null Hack +The +.code --args +and +.code --eargs +mechanisms do not solve the following problem: the POSIX +.code env +utility is often exploited for its +.code PATH +searching capability, and used to express hash bang scripts in the following +way: + +.cblk + #!/usr/bin/env txr +.cble + +Here, the +.code env +utility searches for the +.code txr +program in the directories indicated by the +.code PATH +variable, which liberates the script from having encode the exact location +where the program is installed. However, if the operating system allows only +one argument in the hash bang mechanism, then no arguments can be passed +to the program. + +To mitigate this problem, +\*(TX +supports a special feature in its hash bang support. If the hash bang +.code #! +line contains a null byte, then text after the null byte, to the end of the +line, is split into fields using the space character as a separator, and these +fields are inserted into the command line. This manipulation happens during +command line processing, prior to the execution of the file, which happens +after command-line processing. If this processing is applied to a file +that is specified using the +.code -f +option, then the arguments which arise from the special processing +are inserted after that option and its argument. If this processing is +applied to the file which is the first non-option argument, then the +options are inserted before that argument. However, care is taken not +to process that argument a second time. +In either situation, processing of the command line options continues, and the +arguments which are processed next are the ones which were just inserted. This +is true even if the options had been inserted as a result of processing the +first non-option argument, which would ordinarily signal the termination of +option processing. + +In the following examples, it is assumed that the script is +named, and invoked, as +.codn /home/jenny/foo.txr , +and is given arguments +.codn "--bar abc" , +and that +.code txr +resolves to +.codn /usr/bin/txr . +The +.code <NUL> +code indicates a literal ASCII NUL character, or zero bytes. + +Basic example: + +.cblk + #!/usr/bin/env txr<NUL>-a 3 +.cble + +Here, +.code env +searches for +.code txr +receives, from the operating system the arguments: + +.cblk + /usr/bin/txr /home/jenny/foo.txr --bar abc +.cble + +The first non-option argument is the name of the script. \*(TX opens +the script, and notices that it begins with a hash bang line. +It consumes the hash bang line and finds the null byte inside it, +retrieving the character string after it, which is +.strn "-a 3" . +This is split into the two arguments +.code -a +and +.codn 3 , +which are then inserted into the command line ahead of the +the script name. The effective command line then becomes: + +.cblk + /usr/bin/txr -a 3 /home/jenny/foo.txr --bar abc +.cble + +Command line option processing continues, beginning with the +.code -a +option. After the option is processed, +.code /home/amy/foo.txr +is encountered again. This time it is not opened a second time; +it signals the end of option processing, exactly as it would immediately +do if it hadn't triggered the insertion of any arguments. + +Advanced example: use +.code env +to invoke +.code txr +passing options to interpreter and to the script: + +.cblk + #!/usr/bin/env txr<NUL>--eargs:-C:175:{}:--debug +.cble + +This example shows how +.code --eargs +can be used in conjunction with the null hack. When +.code txr +begins executing, it receives the arguments + +.cblk + /usr/bin/txr /home/amy/foo.txr +.cble + +The script file is opened, and the arguments delimited by the +null character in the hash bang line are inserted, resulting +in the effective command line: + +.cblk + /usr/bin/txr --eargs:-C:175:{}:--debug /home/amy/foo.txr +.cble + +Next, +.code --eargs +is processed in the ordinary way, transforming the command line +into: + +.cblk + /usr/bin/txr -C 175 /home/amy/foo.txr --debug +.cble + +The name of the script file is encountered, and signals the end +of option processing. Thus +.code txr +receives the +.code -C +option, instructing it to emulate some behaviors from version 175, +and the +.code /home/amy/foo.txr +script receives +.code --debug +as +.B its +argument: it executes with the +.code *args* +list containing one element, the character string +.strn --debug . + +The hash bang null hack feature was introduced in \*(TX 177. +Previous versions ignore the hash bang line, performing no special +processing. Where a risk exists that programs which depend on the +feature might be executed by an older version of \*(TX, care must +be taken to detect and handle that situation, either by means of the +.code txr-version +variable, or else by some logic which infers that the processing of the hash +bang line hadn't been performed. + +.NP* Hash Bang and Setuid \*(TX supports setuid hash bang scripting, even on platforms that do not support setuid and setgid attributes on hash bang scripts. On such platforms, \*(TX has to be installed setuid/setgid. See the section @@ -198,31 +198,21 @@ static void hint(void) } #endif -static val remove_hash_bang_line(val spec) +static val check_hash_bang(val stream, val args) { - if (!consp(spec)) - return spec; - - { - val firstline = first(spec); - val firstelem = first(firstline); - val item; - - if (stringp(firstelem)) - item = firstelem; - else if (consp(firstelem) && first(firstelem) == text_s) - item = second(firstelem); - else - return spec; - - if (stringp(item)) { - val twochars = sub_str(item, zero, two); - if (equal(twochars, lit("#!"))) - return rest(spec); + val line = get_line(stream); + if (match_str(line, lit("#!"), nil)) { + val pos = search_str(line, lit("\xdc00"), nil, nil); + + if (pos) { + val after_null = sub_str(line, succ(pos), t); + val prepend_args = split_str(after_null, lit(" ")); + args = nappend2(prepend_args, args); } - - return spec; + } else { + seek_stream(stream, zero, from_start_k); } + return args; } #if __linux__ @@ -481,7 +471,7 @@ int txr_main(int argc, char **argv) val spec_file_str; int match_loglevel = opt_loglevel; val arg_undo = nil, arg; - val parse_stream = std_input; + val parse_stream = nil; val txr_lisp_p = nil; val enter_repl = nil; val args_s = intern(lit("*args*"), user_package); @@ -539,7 +529,7 @@ int txr_main(int argc, char **argv) } for (ref_arg_list = arg_list, arg = upop(&arg_list, &arg_undo); - arg && car(arg) == chr('-'); + arg; arg = upop(&arg_list, &arg_undo)) { eff_arg_tail = list_collect(eff_arg_tail, arg); @@ -547,6 +537,20 @@ int txr_main(int argc, char **argv) if (equal(arg, lit("--"))) break; + if (car(arg) != chr('-')) { + if (!parse_stream) { + spec_file_str = arg; + open_txr_file(arg, &txr_lisp_p, &spec_file_str, &parse_stream); + simulate_setuid_setgid(parse_stream); + dyn_env = make_env(nil, nil, dyn_env); + env_vbind(dyn_env, load_path_s, spec_file_str); + arg_list = check_hash_bang(parse_stream, arg_undo); + set(eff_arg_tail, butlastn(one, deref(eff_arg_tail))); + continue; + } + break; + } + if (equal(arg, lit("-"))) break; @@ -786,10 +790,39 @@ int txr_main(int argc, char **argv) prog_string, nao); return EXIT_FAILURE; } + if (parse_stream) { + format(std_error, + lit("~a: -c ~a: input file has already been established\n"), + prog_string, spec_file, nao); + return EXIT_FAILURE; + } specstring = arg; + drop_privilege(); + spec_file_str = lit("cmdline"); + if (gt(length_str(specstring), zero) && + chr_str(specstring, minus(length_str(specstring), one)) != chr('\n')) + specstring = cat_str(list(specstring, string(L"\n"), nao), nil); + parse_stream = make_string_byte_input_stream(specstring); break; case 'f': + if (parse_stream) { + format(std_error, + lit("~a: -f ~a: input file has already been established\n"), + prog_string, spec_file, nao); + return EXIT_FAILURE; + } spec_file = arg; + if (wcscmp(c_str(spec_file), L"-") != 0) { + open_txr_file(spec_file, &txr_lisp_p, &spec_file_str, &parse_stream); + simulate_setuid_setgid(parse_stream); + dyn_env = make_env(nil, nil, dyn_env); + env_vbind(dyn_env, load_path_s, spec_file_str); + arg_list = check_hash_bang(parse_stream, arg_list); + } else { + drop_privilege(); + spec_file_str = lit("stdin"); + parse_stream = std_input; + } break; case 'e': drop_privilege(); @@ -922,35 +955,7 @@ int txr_main(int argc, char **argv) if (!equal(cdr(eff_arg_list), ref_arg_list)) reg_var(intern(lit("*args-eff*"), user_package), eff_arg_list); - if (specstring && spec_file) { - drop_privilege(); - format(std_error, lit("~a: cannot specify both -f and -c\n"), - prog_string, nao); - return EXIT_FAILURE; - } - - if (specstring) { - drop_privilege(); - spec_file_str = lit("cmdline"); - if (gt(length_str(specstring), zero) && - chr_str(specstring, minus(length_str(specstring), one)) != chr('\n')) - specstring = cat_str(list(specstring, string(L"\n"), nao), nil); - parse_stream = make_string_byte_input_stream(specstring); - if (arg) - arg_list = arg_undo; - } else if (spec_file) { - if (wcscmp(c_str(spec_file), L"-") != 0) { - open_txr_file(spec_file, &txr_lisp_p, &spec_file_str, &parse_stream); - simulate_setuid_setgid(parse_stream); - dyn_env = make_env(nil, nil, dyn_env); - env_vbind(dyn_env, load_path_s, spec_file_str); - } else { - drop_privilege(); - spec_file_str = lit("stdin"); - } - if (arg) - arg_list = arg_undo; - } else { + if (!parse_stream) { if (!arg) { drop_privilege(); if (enter_repl) @@ -966,15 +971,11 @@ int txr_main(int argc, char **argv) #endif } - if (!equal(arg, lit("-"))) { - open_txr_file(arg, &txr_lisp_p, &spec_file_str, &parse_stream); - simulate_setuid_setgid(parse_stream); - dyn_env = make_env(nil, nil, dyn_env); - env_vbind(dyn_env, load_path_s, spec_file_str); - } else { - drop_privilege(); - spec_file_str = lit("stdin"); - } + drop_privilege(); + spec_file_str = lit("stdin"); + parse_stream = std_input; + } else if (specstring || spec_file) { + arg_list = arg_undo; } reg_var(args_s, or2(orig_args, arg_list)); @@ -996,7 +997,7 @@ int txr_main(int argc, char **argv) if (parser.errors) return EXIT_FAILURE; - spec = remove_hash_bang_line(parser.syntax_tree); + spec = parser.syntax_tree; opt_loglevel = match_loglevel; @@ -1026,7 +1027,7 @@ int txr_main(int argc, char **argv) } { - val result = read_eval_stream(parse_stream, std_error, t); + val result = read_eval_stream(parse_stream, std_error); close_stream(parse_stream, nil); |