From a6b0130ceaeadce6845d698fb68712dc2786e918 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Tue, 11 Jun 2013 16:12:49 -0700 Subject: * eval.c (eval_init): lazy-str's third argument is optional. Added lazy-stringp. Changing names of length-str-{gt,ge,lt,le} to be consistent with the >, >=, < and <= functions. * lib.c (lazy_stream_func): Greatly simplified implementation. The lazy list now continues by means of recursing via an optimized version of lazy_stream_cons called lazy_stream_cont. The environment structure is simplified to just hold the next item, rather than a pointless list. The pointless setting of lcons->lc.func to nil is also removed; this is always done by the caller. (lazy_stream_cont): New static function, similar to lazy_stream_cons, but optimized by not consing up a new function and new environment cell. (lazy_stream_cons): The environment for the update function is simplified to just a single cons. * txr.1: Documented lazy string functions and lazy-stream-cons. --- ChangeLog | 20 ++++++ eval.c | 11 +-- lib.c | 33 +++++---- txr.1 | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 258 insertions(+), 32 deletions(-) diff --git a/ChangeLog b/ChangeLog index dba9c02a..e062c16b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +2013-06-11 Kaz Kylheku + + * eval.c (eval_init): lazy-str's third argument is optional. + Added lazy-stringp. Changing names of length-str-{gt,ge,lt,le} + to be consistent with the >, >=, < and <= functions. + + * lib.c (lazy_stream_func): Greatly simplified implementation. + The lazy list now continues by means of recursing via + an optimized version of lazy_stream_cons called lazy_stream_cont. + The environment structure is simplified to just hold the next item, + rather than a pointless list. The pointless setting of lcons->lc.func + to nil is also removed; this is always done by the caller. + (lazy_stream_cont): New static function, similar to lazy_stream_cons, + but optimized by not consing up a new function and new environment + cell. + (lazy_stream_cons): The environment for the update function + is simplified to just a single cons. + + * txr.1: Documented lazy string functions and lazy-stream-cons. + 2012-03-18 Kaz Kylheku * eval.c (eval_init): lazy string related functions become intrinsics. diff --git a/eval.c b/eval.c index 501a20a0..77209599 100644 --- a/eval.c +++ b/eval.c @@ -2401,14 +2401,15 @@ void eval_init(void) reg_fun(intern(lit("break-str"), user_package), func_n2(break_str)); reg_fun(intern(lit("lazy-stream-cons"), user_package), func_n1(lazy_stream_cons)); - reg_fun(intern(lit("lazy-str"), user_package), func_n3(lazy_str)); + reg_fun(intern(lit("lazy-str"), user_package), func_n3o(lazy_str, 1)); + reg_fun(intern(lit("lazy-stringp"), user_package), func_n1(lazy_stringp)); reg_fun(intern(lit("lazy-str-force-upto"), user_package), func_n2(lazy_str_force_upto)); reg_fun(intern(lit("lazy-str-force"), user_package), func_n1(lazy_str_force)); reg_fun(intern(lit("lazy-str-get-trailing-list"), user_package), func_n2(lazy_str_get_trailing_list)); - reg_fun(intern(lit("length-str-gt"), user_package), func_n2(length_str_gt)); - reg_fun(intern(lit("length-str-ge"), user_package), func_n2(length_str_ge)); - reg_fun(intern(lit("length-str-lt"), user_package), func_n2(length_str_lt)); - reg_fun(intern(lit("length-str-le"), user_package), func_n2(length_str_le)); + reg_fun(intern(lit("length-str->"), user_package), func_n2(length_str_gt)); + reg_fun(intern(lit("length-str->="), user_package), func_n2(length_str_ge)); + reg_fun(intern(lit("length-str-<"), user_package), func_n2(length_str_lt)); + reg_fun(intern(lit("length-str-<="), user_package), func_n2(length_str_le)); reg_fun(intern(lit("vector"), user_package), func_n1(vector)); reg_fun(intern(lit("vectorp"), user_package), func_n1(vectorp)); diff --git a/lib.c b/lib.c index d5f5dd32..97844d32 100644 --- a/lib.c +++ b/lib.c @@ -3674,23 +3674,28 @@ val cat_vec(val list) return vec; } -static val lazy_stream_func(val env, val lcons) +static val lazy_stream_cont(val stream, val func, val env) { - val stream = car(env); - val next = cdr(env) ? pop(cdr_l(env)) : get_line(stream); - val ahead = get_line(stream); - - set(lcons->lc.car, next); - set(lcons->lc.cdr, if2(ahead, make_lazy_cons(lcons->lc.func))); - lcons->lc.func = nil; + val next = get_line(stream); - if (!next || !ahead) + if (!next) { close_stream(stream, t); + return nil; + } + + rplacd(env, next); + return make_lazy_cons(func); +} + +static val lazy_stream_func(val env, val lcons) +{ + val stream = car(env); + val prefetched_line = cdr(env); - if (ahead) - mpush(ahead, *cdr_l(env)); + set(lcons->lc.car, prefetched_line); + set(lcons->lc.cdr, lazy_stream_cont(stream, lcons->lc.func, env)); - return next; + return prefetched_line; } val lazy_stream_cons(val stream) @@ -3702,7 +3707,7 @@ val lazy_stream_cons(val stream) return nil; } - return make_lazy_cons(func_f1(cons(stream, cons(first, nil)), + return make_lazy_cons(func_f1(cons(stream, first), lazy_stream_func)); } @@ -3872,7 +3877,7 @@ val lazy_str_get_trailing_list(val lstr, val index) { uses_or2; val split_suffix = split_str(sub_str(lstr->ls.prefix, index, nil), - or2(car(lstr->ls.opts), string(L"\n"))); + or2(car(lstr->ls.opts), string(L"\n"))); return nappend2(split_suffix, lstr->ls.list); } diff --git a/txr.1 b/txr.1 index af8fb604..8d56e113 100644 --- a/txr.1 +++ b/txr.1 @@ -7112,6 +7112,30 @@ executing, it is still accessible. This allows the update function to retrieve a reference to itself and propagate itself into another lazy cons (as in the example under make-lazy-cons). +.SS Function lazy-stream-cons + +.TP +Syntax: + + (lazy-stream-cons ) + +.TP +Description: + +The lazy-stream-cons returns a lazy cons which generates a lazy list based on +reading lines of text from input stream , which form the elements of +the list. The get-line function is called on demand to add elements to the +list. + +The lazy-stream-cons function itself makes the first call to get-line +on the stream. If this returns nil, then the stream is closed and nil is +returned. Otherwise, a lazy cons is returned whose update function will install +that line into the car field of the lazy cons, and continue the lazy list +by making another call to lazy-stream-cons, installing the result into the +cdr field. + +the string returned by get-line, and whose cdr contains the lazy function. + .SS Function generate .TP Syntax: @@ -7352,19 +7376,6 @@ Description: The stringp function returns t if is one of the several kinds of strings. Otherwise it returns nil. -.SS Function lazy-stringp - -.TP -Syntax: - - (lazy-stringp ) - -.TP -Description: - -The lazy-stringp function returns t if is a lazy -string. Otherwise it returns nil. - .SS Function length-str .TP @@ -7952,6 +7963,195 @@ first character in string which appears in string . If there is no such character, then nil is returned. +.SH LAZY STRINGS + +Lazy strings are objects that were developed for the TXR pattern matching +language, and are exposed via TXR Lisp. Lazy strings behave much like strings, +and can be substituted for strings. However, unlike regular strings, which +exist in their entirety, first to last character, from the moment they are +created, lazy strings do not exist all at once, but are created on demand. If +character at index N of a lazy string is accessed, then characters 0 through N +of that string are forced into existence. However, characters at indices +beyond N need not necessarily exist. + +A lazy string dynamically grows by acquiring new text from a list of strings +which is attached to that lazy string object. When the lazy string is accessed +beyond the end of its hitherto materialized prefix, it takes enough strings +from the list in order to materialize the index. If the list doesn't have +enough material, then the access fails, just like an access beyond the end of a +regular string. A lazy string always takes whole strings from the attached +list. + +Lazy string growth is achieved via the lazy-str-force_upto function which +forces a string to exist up to a given character position. This function is +used internally to handle various situations. + +The lazy-str-force function forces the entire string to materialize. If the +string is connected to an infinite lazy list, this will exhaust all memory. + +Lazy strings are specially recognized in many of the regular string functions, +which do the right thing with lazy strings. For instance when sub-str +is invoked on a lazy string, a special version of the sub-str logic is +used which handles various lazy string cases, and can potentially return +another lazy string. Taking a sub-str of a lazy string from character position +7 to all the way to the end does not force the entire lazy string to exist, +and in fact the operation will work on a lazy string that is infinite. + +Furthermore, special lazy string functions are provided which allow programs to +be written carefully to take better advantage of lazy strings. What carefully +means is code that avoids unnecessarily forcing the lazy string. For instance, +in many situations it is necessary to obtain the length of a string, only to +test it for equality or inequality with some number. But it is not necessary to +compute the length of a string in order to know that it is greater than some +value. Computing the length of a lazy string is bad, because it forces the +string to exist, which may not even be possible. + +.SS Function lazy-str + +.TP +Syntax: + + (lazy-str [ []]) + +.TP +Description: + +The lazy-str function constructs a lazy string which draws material from + which is a list of strings. + +If the optional argument is given, then it specifies a string +which is appended to every string from , before that string is +incorporated into the lazy string. If is not given, or specified +as nil, then it defaults to the string "\en", and so the strings from + are effectively treated as lines which get terminated by newlines +as they accumulate into the growing prefix of the lazy string. +To avoid the use of a terminator string, a null string argument +must be explicitly passed. In that case, the lazy string grows simply +by catenating elements from . + +If the argument is specified, it must be a positive integer. It +expresses a maximum limit on how many elements will be consumed from + in order to feed the lazy string. Once that many elements are +drawn, the string ends, even if the list has not been exhausted. + +.SS Function lazy-stringp + +.TP +Syntax: + + (lazy-stringp ) + +.TP +Description: + +The lazy-stringp function returns t if is a lazy +string. Otherwise it returns nil. + +.SS Function lazy-str-force-upto + +.TP +Syntax: + + (lazy-str-force-upto ) + +.TP +Description: + +The lazy-str-force-upto function tries to instantiate the lazy string such that +the position given by materializes. The is a character +position, exactly as used in the chr-str function. + +Some positions beyond may also materialize, as a side effect. + +If the string is already materialized through to at least , or if it is +possible to materialize the string that far, then the value t is returned to +indicate success. + +If there is insufficient material to force the lazy string through to the + position, then nil is returned. + +It is an error if the argument isn't a lazy string. + +.SS Function lazy-str-force + +.TP +Syntax: + + (lazy-str-force ) + +.TP +Description: + +The argument must be a lazy string. The lazy string is forced +to fully materialize. + +The return value is an ordinary, non-lazy string equivalent to the fully +materialized lazy string. + +.SS Function lazy-str-get-trailing-list + +.TP +Syntax: + + (lazy-str-get-trailing-list ) + +.TP +Description: + +The lazy-str-get-trailing-list function is a sort of inverse operation to the +the lazy string from its associated list. + +Firstly, the string is forced up through the position . + +Next, the materialized part of the string starting at position , +through to the end, is split into pieces on occurrences of the terminator +character, which had been given as the argument in the lazy-str +constructor, and defaults to the newline character. + +Finally, a list is returned consisting of the the pieces produced by the split, +to which is appended the remaining list of the string which has not yet been +forced to materialize. + +If is a position which cannot be forced, then the lazy string's +remaining list is returned, with single null string prepended to it. + + +.SS Functions length-str->, length-str->=, length-str-< and length-str-<= + +.TP +Syntax: + + (length-str-> ) + (length-str->= ) + (length-str-< ) + (length-str-<= ) + +.TP +Description: + +These functions compare the lengths of two strings. The following +equivalences hold, as far as the resulting value is concerned: + + (length-str-> s l) <--> (> (length-str s) l) + (length-str->= s l) <--> (>= (length-str s) l) + (length-str-< s l) <--> (< (length-str s) l) + (length-str-<= s l) <--> (<= (length-str s) l) + +The difference between the length-str-* functions and the equivalent forms is +that if the string is lazy, the length-str function will fully force it in +order to calculate and return its length. + +These functions only force a string up to position , so they are not +only more efficient, but usable on infinitely long lazy strings. + +length-str cannot compute the length of a lazy string with an unbounded +length; it will exhaust all memory trying to force the string. + +These functions can be used to test such as string whether it is longer +or shorter than a given length, without forcing the string beyond +that length. + + .SH VECTORS .SS Function vector -- cgit v1.2.3