diff options
-rw-r--r-- | ChangeLog | 17 | ||||
-rw-r--r-- | eval.c | 3 | ||||
-rw-r--r-- | filter.c | 28 | ||||
-rw-r--r-- | genvim.txr | 3 | ||||
-rw-r--r-- | txr.1 | 155 | ||||
-rw-r--r-- | txr.vim | 158 |
6 files changed, 282 insertions, 82 deletions
@@ -1,5 +1,22 @@ 2014-03-11 Kaz Kylheku <kaz@kylheku.com> + * eval.c (eval_init): Registration of url_encode and url_decode + moved to filter.c. + + * filter.c (trie_compress_intrinsic, html_encode, html_decode): New + static functions. + (filter_init): Register make_trie, trie_add, trie_compress_intrinsic, + filter_string_tree, filter_equal, html_encode and html_decode + as intrinsics. Move registration of url_encode and url_decode here. + + * genvim.txr: Look for registrations in filter.c too. + + * txr.1: Documented. + + * txr.vim: Updated. + +2014-03-11 Kaz Kylheku <kaz@kylheku.com> + * stream.c (open_process): Close the original pipe file descriptor in the child process after dup2-ing it to standard input or standard output, so the child doesn't have to references to the pipe. @@ -3495,9 +3495,6 @@ void eval_init(void) reg_fun(intern(lit("match-fun"), user_package), func_n4(match_fun)); - reg_fun(intern(lit("url-encode"), user_package), func_n2o(url_encode, 1)); - reg_fun(intern(lit("url-decode"), user_package), func_n2o(url_decode, 1)); - reg_fun(intern(lit("time"), user_package), func_n0(time_sec)); reg_fun(intern(lit("time-usec"), user_package), func_n0(time_sec_usec)); reg_fun(intern(lit("time-string-local"), user_package), func_n2(time_string_local)); @@ -41,6 +41,7 @@ #include "match.h" #include "filter.h" #include "gc.h" +#include "eval.h" #include "stream.h" val filters; @@ -109,6 +110,12 @@ static void trie_compress(val *ptrie) } } +static val trie_compress_intrinsic(val ptrie) +{ + trie_compress(&ptrie); + return ptrie; +} + val trie_lookup_begin(val trie) { return trie; @@ -663,6 +670,16 @@ val url_decode(val str, val space_plus) return get_string_from_stream(out); } +static val html_encode(val str) +{ + return trie_filter_string(get_filter(to_html_k), str); +} + +static val html_decode(val str) +{ + return trie_filter_string(get_filter(from_html_k), str); +} + void filter_init(void) { protect(&filters, (val *) 0); @@ -702,4 +719,15 @@ void filter_init(void) sethash(filters, tointeger_k, curry_12_1(func_n2(int_str), nil)); sethash(filters, tofloat_k, func_n1(flo_str)); sethash(filters, hextoint_k, curry_12_1(func_n2(int_str), num_fast(16))); + + reg_fun(intern(lit("make-trie"), user_package), func_n0(make_trie)); + reg_fun(intern(lit("trie-add"), user_package), func_n3(trie_add)); + reg_fun(intern(lit("trie-compress"), user_package), + func_n1(trie_compress_intrinsic)); + reg_fun(intern(lit("filter-string-tree"), user_package), func_n2(filter_string_tree)); + reg_fun(intern(lit("filter-equal"), user_package), func_n4(filter_equal)); + reg_fun(intern(lit("url-encode"), user_package), func_n2o(url_encode, 1)); + reg_fun(intern(lit("url-decode"), user_package), func_n2o(url_decode, 1)); + reg_fun(intern(lit("html-encode"), user_package), func_n1(html_encode)); + reg_fun(intern(lit("html-decode"), user_package), func_n1(html_decode)); } @@ -7,7 +7,8 @@ static void dir_tables_init(void) @(until) } @(end) -@(next @(open-files '("eval.c" "rand.c" "signal.c" "stream.c" "syslog.c" "txr.c"))) +@(next @(open-files '("eval.c" "rand.c" "signal.c" "stream.c" + "syslog.c" "filter.c" "txr.c"))) @(collect) @ (block) @ (cases) @@ -13179,6 +13179,161 @@ argument is omitted or specified as nil, then + (plus) characters in the encoded data are retained as + characters in the decoded strings. Otherwise, plus characters are converted to spaces. +.SS Functions html-encode and html-decode + +.TP +Syntax: + + (html-encode <text-string>) + (html-decode <html-string>) + +.TP +Description: + +The html-encode and decode functions convert between an HTML and raw +representation of of text. + +The hml-encode function returns a string which is based on the content of +<text-string>, but in which all characters which have special meaning in HTML +have been replaced by special HTML codes for representing those characters. +The returned string is the HTML-encoded verbatim representation of +<text-string>. + +The html-decode function converts <html-string>, which may contain HTML +character encodings, into a string which contains the characters represented +by those encodings. + +The function composition (html-decode (html-encode text)) returns a string +which is equal to text. + +The reverse composition (html-encode (html-decode html)) does not necessarily +return a string equal to text. + +For instance if html is the string "<p>Hello, world!</p>", +then html-decode produces "<p>Hello, world!</p>". Then, html-encode +produces "<p>Hello, world!</p>". + +.SH FILTER MODULE + +The filter module provides a trie (pronunced "try") data structure, +which is suitable for representing dictionaries for efficient filtering. +Dictionaires are unordered collections of keys, which are strings, which +have associated values, which are also strings. A trie can be used to filter +text, such that keys appearing in the text are replaced by the corresponding +values. A trie supports this filtering operation by providing an efficient +prefix-based lookup method which only looks at each input character ones, and +which does not require knowledge of the length of the key in advance. + +.SS Function make-trie + +.TP +Syntax: + + (make-trie) + +.TP +Description: + +This function creates an empty trie. There is no special data type for +a trie; a trie is some existing type such as a hash table. + +.SS Function trie-add + +.TP +Syntax: + + (trie-add <trie> <key> <value>) + +.TP +Description: + +The trie-add function adds the string <key> to the trie, associating +it with <value>. If <key> already exists in <trie>, then the value +is updated with <value>. + +The <trie> must be a trie that has not been compressed with trie-compress. + +A trie can contain keys which are prefixes of other keys. For instance +it can contain "dog" and "dogma". When a trie is used for matching +and substitution, the longest match is used. If the input presents +the text "doggy", then the match is "dog". If the input is "dogmatic", +then "dogma" matches. + + +.SS Function trie-compress + +.TP +Syntax: + + (trie-compress <trie>) + +.TP +Description: + +The trie-compress function changes the representation of <trie> to +a representation which occupies less space and supports faster lookups. +The new representation is returned. + +The compressed representation of a trie does not support the trie-add function. + +This function destructively manipulates <trie>, and may return an object +that is the same object as <trie>, or it may return a different object, +while at the same time still modifying the internals of <trie>. +Consequently, the program should not retain the input object <trie>, +but use the returned object in its place. + + +.SS Function filter-string-tree + +.TP +Syntax: + + (filter-string-tree <filter> <obj>) + +.TP +The filter-string-tree a tree structure similar to <obj>, in which all of the +string atoms have been filtered through <filter>. + +The <obj> argument is a string tree structure: either the symbol nil, denoting +an empty structure; a string; or a list of tree structures. If <obj> is +nil, then filter-string-tree returns nil. + +The <filter> argument is a filter: it is either a trie, a function, or nil. +If <filter> is nil, then filter-string-trie just returns <obj>. + +If <filter> is a function, it must be a function that can be called +with one argument. The strings of the string tree are filtered by passing +each one into the function and substituting the return value into the +corresponding place in the returned structure. + +Otherwise if <filter> is a trie, then this trie is used for filtering, +the string elements similarly to a function. For each string, a new +string is returned in which occurrences of the keys in the trie are +replaced by the values in the trie. + +.SS Function filter-equal + +.TP +Syntax: + + (filter-equal <filter-1> <filter-2> <obj-1> <obj-2>) + +.TP +Description: + +The filter-equal function tests whether two string trees are equal +under the given filters. + +The precise semantics can be given by this expression: + + (equal (filter-string-tree <filter-1> <obj-1>) + (filter-string-tree <filter-2> <obj-2>)) + +The string tree <obj-1> is filtered through <filter-1>, as if +by the filter-string-tree function, and similarly, <obj-2> is +filtered through <filter-2>. The resulting structures are compared +using equal, and the result of that is returned. + .SH ACCESS TO TXR PATTERN LANGUAGE FROM LISP .SS Function match-fun @@ -66,19 +66,20 @@ syn keyword txl_keyword contained env-fbind env-hash env-vbind eq syn keyword txl_keyword contained eql equal errno error syn keyword txl_keyword contained eval evenp exit exp syn keyword txl_keyword contained expt exptmod fboundp fifth -syn keyword txl_keyword contained find find-if find-package first -syn keyword txl_keyword contained fixnump flatten flatten* flip -syn keyword txl_keyword contained flo-int flo-str floatp floor -syn keyword txl_keyword contained flush-stream for for* force -syn keyword txl_keyword contained format fourth fun func-get-env -syn keyword txl_keyword contained func-get-form func-set-env functionp gcd -syn keyword txl_keyword contained gen generate gensym get-byte -syn keyword txl_keyword contained get-char get-hash-userdata get-line get-list-from-stream -syn keyword txl_keyword contained get-sig-handler get-string-from-stream gethash group-by -syn keyword txl_keyword contained gun hash hash-alist hash-construct -syn keyword txl_keyword contained hash-count hash-diff hash-eql hash-equal -syn keyword txl_keyword contained hash-isec hash-keys hash-pairs hash-uni -syn keyword txl_keyword contained hash-update hash-update-1 hash-values hashp +syn keyword txl_keyword contained filter-equal filter-string-tree find find-if +syn keyword txl_keyword contained find-package first fixnump flatten +syn keyword txl_keyword contained flatten* flip flo-int flo-str +syn keyword txl_keyword contained floatp floor flush-stream for +syn keyword txl_keyword contained for* force format fourth +syn keyword txl_keyword contained fun func-get-env func-get-form func-set-env +syn keyword txl_keyword contained functionp gcd gen generate +syn keyword txl_keyword contained gensym get-byte get-char get-hash-userdata +syn keyword txl_keyword contained get-line get-list-from-stream get-sig-handler get-string-from-stream +syn keyword txl_keyword contained gethash group-by gun hash +syn keyword txl_keyword contained hash-alist hash-construct hash-count hash-diff +syn keyword txl_keyword contained hash-eql hash-equal hash-isec hash-keys +syn keyword txl_keyword contained hash-pairs hash-uni hash-update hash-update-1 +syn keyword txl_keyword contained hash-values hashp html-decode html-encode syn keyword txl_keyword contained identity if iff iffi syn keyword txl_keyword contained inc inhash int-flo int-str syn keyword txl_keyword contained integerp intern interp-fun-p isqrt @@ -100,71 +101,72 @@ syn keyword txl_keyword contained macro-time macroexpand macroexpand-1 macrolet syn keyword txl_keyword contained major make-catenated-stream make-env make-hash syn keyword txl_keyword contained make-lazy-cons make-package make-random-state make-similar-hash syn keyword txl_keyword contained make-string-byte-input-stream make-string-input-stream make-string-output-stream make-strlist-output-stream -syn keyword txl_keyword contained make-sym make-time make-time-utc makedev -syn keyword txl_keyword contained mapcar mapcar* maphash mappend -syn keyword txl_keyword contained mappend* mask match-fun match-regex -syn keyword txl_keyword contained match-regex-right match-str match-str-tree max -syn keyword txl_keyword contained memq memql memqual merge -syn keyword txl_keyword contained min minor mkdir mknod -syn keyword txl_keyword contained mkstring mod multi-sort n-choose-k -syn keyword txl_keyword contained n-perm-k none not nreverse -syn keyword txl_keyword contained null num-chr num-str numberp -syn keyword txl_keyword contained oddp op open-command open-directory -syn keyword txl_keyword contained open-file open-files open-files* open-pipe -syn keyword txl_keyword contained open-process open-tail openlog or -syn keyword txl_keyword contained orf packagep perm pop -syn keyword txl_keyword contained pos pos-if posq posql -syn keyword txl_keyword contained posqual pprinl pprint prinl -syn keyword txl_keyword contained print prog1 progn prop -syn keyword txl_keyword contained proper-listp push pushhash put-byte -syn keyword txl_keyword contained put-char put-line put-string pwd -syn keyword txl_keyword contained qquote quasi quote rand -syn keyword txl_keyword contained random random-fixnum random-state-p range -syn keyword txl_keyword contained range* rcomb read readlink -syn keyword txl_keyword contained real-time-stream-p reduce-left reduce-right ref -syn keyword txl_keyword contained refset regex-compile regex-parse regexp -syn keyword txl_keyword contained regsub rehome-sym remhash remove-if -syn keyword txl_keyword contained remove-if* remove-path remq remq* -syn keyword txl_keyword contained remql remql* remqual remqual* -syn keyword txl_keyword contained rename-path repeat replace replace-list -syn keyword txl_keyword contained replace-str replace-vec rest return -syn keyword txl_keyword contained return-from reverse rlcp rperm -syn keyword txl_keyword contained rplaca rplacd s-ifblk s-ifchr -syn keyword txl_keyword contained s-ifdir s-ififo s-iflnk s-ifmt -syn keyword txl_keyword contained s-ifreg s-ifsock s-irgrp s-iroth -syn keyword txl_keyword contained s-irusr s-irwxg s-irwxo s-irwxu -syn keyword txl_keyword contained s-isgid s-isuid s-isvtx s-iwgrp -syn keyword txl_keyword contained s-iwoth s-iwusr s-ixgrp s-ixoth -syn keyword txl_keyword contained s-ixusr search-regex search-str search-str-tree -syn keyword txl_keyword contained second seek-stream set set-diff -syn keyword txl_keyword contained set-hash-userdata set-sig-handler sethash setlogmask -syn keyword txl_keyword contained sig-abrt sig-alrm sig-bus sig-check -syn keyword txl_keyword contained sig-chld sig-cont sig-fpe sig-hup -syn keyword txl_keyword contained sig-ill sig-int sig-io sig-iot -syn keyword txl_keyword contained sig-kill sig-lost sig-pipe sig-poll -syn keyword txl_keyword contained sig-prof sig-pwr sig-quit sig-segv -syn keyword txl_keyword contained sig-stkflt sig-stop sig-sys sig-term -syn keyword txl_keyword contained sig-trap sig-tstp sig-ttin sig-ttou -syn keyword txl_keyword contained sig-urg sig-usr1 sig-usr2 sig-vtalrm -syn keyword txl_keyword contained sig-winch sig-xcpu sig-xfsz sin -syn keyword txl_keyword contained sixth size-vec some sort -syn keyword txl_keyword contained source-loc source-loc-str span-str splice -syn keyword txl_keyword contained split-str split-str-set sqrt stat -syn keyword txl_keyword contained stream-get-prop stream-set-prop streamp string-cmp -syn keyword txl_keyword contained string-extend string-lt stringp sub -syn keyword txl_keyword contained sub-list sub-str sub-vec symacrolet -syn keyword txl_keyword contained symbol-function symbol-name symbol-package symbol-value -syn keyword txl_keyword contained symbolp symlink sys-qquote sys-splice -syn keyword txl_keyword contained sys-unquote syslog tan third -syn keyword txl_keyword contained throw throwf time time-fields-local -syn keyword txl_keyword contained time-fields-utc time-string-local time-string-utc time-usec -syn keyword txl_keyword contained tok-str tostring tostringp tree-bind -syn keyword txl_keyword contained tree-case tree-find trim-str trunc -syn keyword txl_keyword contained typeof unget-byte unget-char unquote -syn keyword txl_keyword contained upcase-str update url-decode url-encode -syn keyword txl_keyword contained usleep uw-protect vec vec-push -syn keyword txl_keyword contained vec-set-length vecref vector vector-list -syn keyword txl_keyword contained vectorp with-saved-vars zerop +syn keyword txl_keyword contained make-sym make-time make-time-utc make-trie +syn keyword txl_keyword contained makedev mapcar mapcar* maphash +syn keyword txl_keyword contained mappend mappend* mask match-fun +syn keyword txl_keyword contained match-regex match-regex-right match-str match-str-tree +syn keyword txl_keyword contained max memq memql memqual +syn keyword txl_keyword contained merge min minor mkdir +syn keyword txl_keyword contained mknod mkstring mod multi-sort +syn keyword txl_keyword contained n-choose-k n-perm-k none not +syn keyword txl_keyword contained nreverse null num-chr num-str +syn keyword txl_keyword contained numberp oddp op open-command +syn keyword txl_keyword contained open-directory open-file open-files open-files* +syn keyword txl_keyword contained open-pipe open-process open-tail openlog +syn keyword txl_keyword contained or orf packagep perm +syn keyword txl_keyword contained pop pos pos-if posq +syn keyword txl_keyword contained posql posqual pprinl pprint +syn keyword txl_keyword contained prinl print prog1 progn +syn keyword txl_keyword contained prop proper-listp push pushhash +syn keyword txl_keyword contained put-byte put-char put-line put-string +syn keyword txl_keyword contained pwd qquote quasi quote +syn keyword txl_keyword contained rand random random-fixnum random-state-p +syn keyword txl_keyword contained range range* rcomb read +syn keyword txl_keyword contained readlink real-time-stream-p reduce-left reduce-right +syn keyword txl_keyword contained ref refset regex-compile regex-parse +syn keyword txl_keyword contained regexp regsub rehome-sym remhash +syn keyword txl_keyword contained remove-if remove-if* remove-path remq +syn keyword txl_keyword contained remq* remql remql* remqual +syn keyword txl_keyword contained remqual* rename-path repeat replace +syn keyword txl_keyword contained replace-list replace-str replace-vec rest +syn keyword txl_keyword contained return return-from reverse rlcp +syn keyword txl_keyword contained rperm rplaca rplacd s-ifblk +syn keyword txl_keyword contained s-ifchr s-ifdir s-ififo s-iflnk +syn keyword txl_keyword contained s-ifmt s-ifreg s-ifsock s-irgrp +syn keyword txl_keyword contained s-iroth s-irusr s-irwxg s-irwxo +syn keyword txl_keyword contained s-irwxu s-isgid s-isuid s-isvtx +syn keyword txl_keyword contained s-iwgrp s-iwoth s-iwusr s-ixgrp +syn keyword txl_keyword contained s-ixoth s-ixusr search-regex search-str +syn keyword txl_keyword contained search-str-tree second seek-stream set +syn keyword txl_keyword contained set-diff set-hash-userdata set-sig-handler sethash +syn keyword txl_keyword contained setlogmask sig-abrt sig-alrm sig-bus +syn keyword txl_keyword contained sig-check sig-chld sig-cont sig-fpe +syn keyword txl_keyword contained sig-hup sig-ill sig-int sig-io +syn keyword txl_keyword contained sig-iot sig-kill sig-lost sig-pipe +syn keyword txl_keyword contained sig-poll sig-prof sig-pwr sig-quit +syn keyword txl_keyword contained sig-segv sig-stkflt sig-stop sig-sys +syn keyword txl_keyword contained sig-term sig-trap sig-tstp sig-ttin +syn keyword txl_keyword contained sig-ttou sig-urg sig-usr1 sig-usr2 +syn keyword txl_keyword contained sig-vtalrm sig-winch sig-xcpu sig-xfsz +syn keyword txl_keyword contained sin sixth size-vec some +syn keyword txl_keyword contained sort source-loc source-loc-str span-str +syn keyword txl_keyword contained splice split-str split-str-set sqrt +syn keyword txl_keyword contained stat stream-get-prop stream-set-prop streamp +syn keyword txl_keyword contained string-cmp string-extend string-lt stringp +syn keyword txl_keyword contained sub sub-list sub-str sub-vec +syn keyword txl_keyword contained symacrolet symbol-function symbol-name symbol-package +syn keyword txl_keyword contained symbol-value symbolp symlink sys-qquote +syn keyword txl_keyword contained sys-splice sys-unquote syslog tan +syn keyword txl_keyword contained third throw throwf time +syn keyword txl_keyword contained time-fields-local time-fields-utc time-string-local time-string-utc +syn keyword txl_keyword contained time-usec tok-str tostring tostringp +syn keyword txl_keyword contained tree-bind tree-case tree-find trie-add +syn keyword txl_keyword contained trie-compress trim-str trunc typeof +syn keyword txl_keyword contained unget-byte unget-char unquote upcase-str +syn keyword txl_keyword contained update url-decode url-encode usleep +syn keyword txl_keyword contained uw-protect vec vec-push vec-set-length +syn keyword txl_keyword contained vecref vector vector-list vectorp +syn keyword txl_keyword contained with-saved-vars zerop syn match txr_error "@[\t ]*[*]\?[\t ]*." syn match txr_nested_error "[^\t `]\+" contained |