diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2013-06-11 16:12:49 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2013-06-11 16:12:49 -0700 |
commit | a6b0130ceaeadce6845d698fb68712dc2786e918 (patch) | |
tree | dc7bf64d77c90a277ed9d322ae66f4b70cfb73c0 | |
parent | 573ac250cbf85b32f50545269c994292440d0b5d (diff) | |
download | txr-a6b0130ceaeadce6845d698fb68712dc2786e918.tar.gz txr-a6b0130ceaeadce6845d698fb68712dc2786e918.tar.bz2 txr-a6b0130ceaeadce6845d698fb68712dc2786e918.zip |
* eval.c (eval_init): lazy-str's third argument is optional.
Added lazy-stringp. Changing names of length-str-{gt,ge,lt,le}
to be consistent with the >, >=, < and <= functions.
* lib.c (lazy_stream_func): Greatly simplified implementation.
The lazy list now continues by means of recursing via
an optimized version of lazy_stream_cons called lazy_stream_cont.
The environment structure is simplified to just hold the next item,
rather than a pointless list. The pointless setting of lcons->lc.func
to nil is also removed; this is always done by the caller.
(lazy_stream_cont): New static function, similar to lazy_stream_cons,
but optimized by not consing up a new function and new environment
cell.
(lazy_stream_cons): The environment for the update function
is simplified to just a single cons.
* txr.1: Documented lazy string functions and lazy-stream-cons.
-rw-r--r-- | ChangeLog | 20 | ||||
-rw-r--r-- | eval.c | 11 | ||||
-rw-r--r-- | lib.c | 33 | ||||
-rw-r--r-- | txr.1 | 226 |
4 files changed, 258 insertions, 32 deletions
@@ -1,3 +1,23 @@ +2013-06-11 Kaz Kylheku <kaz@kylheku.com> + + * eval.c (eval_init): lazy-str's third argument is optional. + Added lazy-stringp. Changing names of length-str-{gt,ge,lt,le} + to be consistent with the >, >=, < and <= functions. + + * lib.c (lazy_stream_func): Greatly simplified implementation. + The lazy list now continues by means of recursing via + an optimized version of lazy_stream_cons called lazy_stream_cont. + The environment structure is simplified to just hold the next item, + rather than a pointless list. The pointless setting of lcons->lc.func + to nil is also removed; this is always done by the caller. + (lazy_stream_cont): New static function, similar to lazy_stream_cons, + but optimized by not consing up a new function and new environment + cell. + (lazy_stream_cons): The environment for the update function + is simplified to just a single cons. + + * txr.1: Documented lazy string functions and lazy-stream-cons. + 2012-03-18 Kaz Kylheku <kaz@kylheku.com> * eval.c (eval_init): lazy string related functions become intrinsics. @@ -2401,14 +2401,15 @@ void eval_init(void) reg_fun(intern(lit("break-str"), user_package), func_n2(break_str)); reg_fun(intern(lit("lazy-stream-cons"), user_package), func_n1(lazy_stream_cons)); - reg_fun(intern(lit("lazy-str"), user_package), func_n3(lazy_str)); + reg_fun(intern(lit("lazy-str"), user_package), func_n3o(lazy_str, 1)); + reg_fun(intern(lit("lazy-stringp"), user_package), func_n1(lazy_stringp)); reg_fun(intern(lit("lazy-str-force-upto"), user_package), func_n2(lazy_str_force_upto)); reg_fun(intern(lit("lazy-str-force"), user_package), func_n1(lazy_str_force)); reg_fun(intern(lit("lazy-str-get-trailing-list"), user_package), func_n2(lazy_str_get_trailing_list)); - reg_fun(intern(lit("length-str-gt"), user_package), func_n2(length_str_gt)); - reg_fun(intern(lit("length-str-ge"), user_package), func_n2(length_str_ge)); - reg_fun(intern(lit("length-str-lt"), user_package), func_n2(length_str_lt)); - reg_fun(intern(lit("length-str-le"), user_package), func_n2(length_str_le)); + reg_fun(intern(lit("length-str->"), user_package), func_n2(length_str_gt)); + reg_fun(intern(lit("length-str->="), user_package), func_n2(length_str_ge)); + reg_fun(intern(lit("length-str-<"), user_package), func_n2(length_str_lt)); + reg_fun(intern(lit("length-str-<="), user_package), func_n2(length_str_le)); reg_fun(intern(lit("vector"), user_package), func_n1(vector)); reg_fun(intern(lit("vectorp"), user_package), func_n1(vectorp)); @@ -3674,23 +3674,28 @@ val cat_vec(val list) return vec; } -static val lazy_stream_func(val env, val lcons) +static val lazy_stream_cont(val stream, val func, val env) { - val stream = car(env); - val next = cdr(env) ? pop(cdr_l(env)) : get_line(stream); - val ahead = get_line(stream); - - set(lcons->lc.car, next); - set(lcons->lc.cdr, if2(ahead, make_lazy_cons(lcons->lc.func))); - lcons->lc.func = nil; + val next = get_line(stream); - if (!next || !ahead) + if (!next) { close_stream(stream, t); + return nil; + } + + rplacd(env, next); + return make_lazy_cons(func); +} + +static val lazy_stream_func(val env, val lcons) +{ + val stream = car(env); + val prefetched_line = cdr(env); - if (ahead) - mpush(ahead, *cdr_l(env)); + set(lcons->lc.car, prefetched_line); + set(lcons->lc.cdr, lazy_stream_cont(stream, lcons->lc.func, env)); - return next; + return prefetched_line; } val lazy_stream_cons(val stream) @@ -3702,7 +3707,7 @@ val lazy_stream_cons(val stream) return nil; } - return make_lazy_cons(func_f1(cons(stream, cons(first, nil)), + return make_lazy_cons(func_f1(cons(stream, first), lazy_stream_func)); } @@ -3872,7 +3877,7 @@ val lazy_str_get_trailing_list(val lstr, val index) { uses_or2; val split_suffix = split_str(sub_str(lstr->ls.prefix, index, nil), - or2(car(lstr->ls.opts), string(L"\n"))); + or2(car(lstr->ls.opts), string(L"\n"))); return nappend2(split_suffix, lstr->ls.list); } @@ -7112,6 +7112,30 @@ executing, it is still accessible. This allows the update function to retrieve a reference to itself and propagate itself into another lazy cons (as in the example under make-lazy-cons). +.SS Function lazy-stream-cons + +.TP +Syntax: + + (lazy-stream-cons <stream>) + +.TP +Description: + +The lazy-stream-cons returns a lazy cons which generates a lazy list based on +reading lines of text from input stream <stream>, which form the elements of +the list. The get-line function is called on demand to add elements to the +list. + +The lazy-stream-cons function itself makes the first call to get-line +on the stream. If this returns nil, then the stream is closed and nil is +returned. Otherwise, a lazy cons is returned whose update function will install +that line into the car field of the lazy cons, and continue the lazy list +by making another call to lazy-stream-cons, installing the result into the +cdr field. + +the string returned by get-line, and whose cdr contains the lazy function. + .SS Function generate .TP Syntax: @@ -7352,19 +7376,6 @@ Description: The stringp function returns t if <obj> is one of the several kinds of strings. Otherwise it returns nil. -.SS Function lazy-stringp - -.TP -Syntax: - - (lazy-stringp <obj>) - -.TP -Description: - -The lazy-stringp function returns t if <obj> is a lazy -string. Otherwise it returns nil. - .SS Function length-str .TP @@ -7952,6 +7963,195 @@ first character in string <str> which appears in string <set>. If there is no such character, then nil is returned. +.SH LAZY STRINGS + +Lazy strings are objects that were developed for the TXR pattern matching +language, and are exposed via TXR Lisp. Lazy strings behave much like strings, +and can be substituted for strings. However, unlike regular strings, which +exist in their entirety, first to last character, from the moment they are +created, lazy strings do not exist all at once, but are created on demand. If +character at index N of a lazy string is accessed, then characters 0 through N +of that string are forced into existence. However, characters at indices +beyond N need not necessarily exist. + +A lazy string dynamically grows by acquiring new text from a list of strings +which is attached to that lazy string object. When the lazy string is accessed +beyond the end of its hitherto materialized prefix, it takes enough strings +from the list in order to materialize the index. If the list doesn't have +enough material, then the access fails, just like an access beyond the end of a +regular string. A lazy string always takes whole strings from the attached +list. + +Lazy string growth is achieved via the lazy-str-force_upto function which +forces a string to exist up to a given character position. This function is +used internally to handle various situations. + +The lazy-str-force function forces the entire string to materialize. If the +string is connected to an infinite lazy list, this will exhaust all memory. + +Lazy strings are specially recognized in many of the regular string functions, +which do the right thing with lazy strings. For instance when sub-str +is invoked on a lazy string, a special version of the sub-str logic is +used which handles various lazy string cases, and can potentially return +another lazy string. Taking a sub-str of a lazy string from character position +7 to all the way to the end does not force the entire lazy string to exist, +and in fact the operation will work on a lazy string that is infinite. + +Furthermore, special lazy string functions are provided which allow programs to +be written carefully to take better advantage of lazy strings. What carefully +means is code that avoids unnecessarily forcing the lazy string. For instance, +in many situations it is necessary to obtain the length of a string, only to +test it for equality or inequality with some number. But it is not necessary to +compute the length of a string in order to know that it is greater than some +value. Computing the length of a lazy string is bad, because it forces the +string to exist, which may not even be possible. + +.SS Function lazy-str + +.TP +Syntax: + + (lazy-str <string-list> [<terminator> [<limit-count>]]) + +.TP +Description: + +The lazy-str function constructs a lazy string which draws material from +<string-list> which is a list of strings. + +If the optional <terminator> argument is given, then it specifies a string +which is appended to every string from <string-list>, before that string is +incorporated into the lazy string. If <terminator> is not given, or specified +as nil, then it defaults to the string "\en", and so the strings from +<string-list> are effectively treated as lines which get terminated by newlines +as they accumulate into the growing prefix of the lazy string. +To avoid the use of a terminator string, a null string <terminator> argument +must be explicitly passed. In that case, the lazy string grows simply +by catenating elements from <string-list>. + +If the <limit-count> argument is specified, it must be a positive integer. It +expresses a maximum limit on how many elements will be consumed from +<string-list> in order to feed the lazy string. Once that many elements are +drawn, the string ends, even if the list has not been exhausted. + +.SS Function lazy-stringp + +.TP +Syntax: + + (lazy-stringp <obj>) + +.TP +Description: + +The lazy-stringp function returns t if <obj> is a lazy +string. Otherwise it returns nil. + +.SS Function lazy-str-force-upto + +.TP +Syntax: + + (lazy-str-force-upto <lazy-str> <index>) + +.TP +Description: + +The lazy-str-force-upto function tries to instantiate the lazy string such that +the position given by <index> materializes. The <index> is a character +position, exactly as used in the chr-str function. + +Some positions beyond <index> may also materialize, as a side effect. + +If the string is already materialized through to at least <index>, or if it is +possible to materialize the string that far, then the value t is returned to +indicate success. + +If there is insufficient material to force the lazy string through to the +<index> position, then nil is returned. + +It is an error if the <lazy-str> argument isn't a lazy string. + +.SS Function lazy-str-force + +.TP +Syntax: + + (lazy-str-force <lazy-str>) + +.TP +Description: + +The <lazy-str> argument must be a lazy string. The lazy string is forced +to fully materialize. + +The return value is an ordinary, non-lazy string equivalent to the fully +materialized lazy string. + +.SS Function lazy-str-get-trailing-list + +.TP +Syntax: + + (lazy-str-get-trailing-list <string> <index>) + +.TP +Description: + +The lazy-str-get-trailing-list function is a sort of inverse operation to the +the lazy string from its associated list. + +Firstly, the string is forced up through the position <index>. + +Next, the materialized part of the string starting at position <index>, +through to the end, is split into pieces on occurrences of the terminator +character, which had been given as the <terminator> argument in the lazy-str +constructor, and defaults to the newline character. + +Finally, a list is returned consisting of the the pieces produced by the split, +to which is appended the remaining list of the string which has not yet been +forced to materialize. + +If <index> is a position which cannot be forced, then the lazy string's +remaining list is returned, with single null string prepended to it. + + +.SS Functions length-str->, length-str->=, length-str-< and length-str-<= + +.TP +Syntax: + + (length-str-> <string> <len>) + (length-str->= <string> <len>) + (length-str-< <string> <len>) + (length-str-<= <string> <len>) + +.TP +Description: + +These functions compare the lengths of two strings. The following +equivalences hold, as far as the resulting value is concerned: + + (length-str-> s l) <--> (> (length-str s) l) + (length-str->= s l) <--> (>= (length-str s) l) + (length-str-< s l) <--> (< (length-str s) l) + (length-str-<= s l) <--> (<= (length-str s) l) + +The difference between the length-str-* functions and the equivalent forms is +that if the string is lazy, the length-str function will fully force it in +order to calculate and return its length. + +These functions only force a string up to position <len>, so they are not +only more efficient, but usable on infinitely long lazy strings. + +length-str cannot compute the length of a lazy string with an unbounded +length; it will exhaust all memory trying to force the string. + +These functions can be used to test such as string whether it is longer +or shorter than a given length, without forcing the string beyond +that length. + + .SH VECTORS .SS Function vector |