summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2014-03-11 20:10:40 -0700
committerKaz Kylheku <kaz@kylheku.com>2014-03-11 20:10:40 -0700
commit1fb002a4fd466e2384d12b80176a1bf526d0ce5f (patch)
tree56f1bf0ccbcdc392b11ddfc91eb4ec694698247d
parent23a2f7ca3b960cb563e5003fae88eda7278a0021 (diff)
downloadtxr-1fb002a4fd466e2384d12b80176a1bf526d0ce5f.tar.gz
txr-1fb002a4fd466e2384d12b80176a1bf526d0ce5f.tar.bz2
txr-1fb002a4fd466e2384d12b80176a1bf526d0ce5f.zip
* eval.c (eval_init): Registration of url_encode and url_decode
moved to filter.c. * filter.c (trie_compress_intrinsic, html_encode, html_decode): New static functions. (filter_init): Register make_trie, trie_add, trie_compress_intrinsic, filter_string_tree, filter_equal, html_encode and html_decode as intrinsics. Move registration of url_encode and url_decode here. * genvim.txr: Look for registrations in filter.c too. * txr.1: Documented. * txr.vim: Updated.
-rw-r--r--ChangeLog17
-rw-r--r--eval.c3
-rw-r--r--filter.c28
-rw-r--r--genvim.txr3
-rw-r--r--txr.1155
-rw-r--r--txr.vim158
6 files changed, 282 insertions, 82 deletions
diff --git a/ChangeLog b/ChangeLog
index b392409d..ceb25b20 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,22 @@
2014-03-11 Kaz Kylheku <kaz@kylheku.com>
+ * eval.c (eval_init): Registration of url_encode and url_decode
+ moved to filter.c.
+
+ * filter.c (trie_compress_intrinsic, html_encode, html_decode): New
+ static functions.
+ (filter_init): Register make_trie, trie_add, trie_compress_intrinsic,
+ filter_string_tree, filter_equal, html_encode and html_decode
+ as intrinsics. Move registration of url_encode and url_decode here.
+
+ * genvim.txr: Look for registrations in filter.c too.
+
+ * txr.1: Documented.
+
+ * txr.vim: Updated.
+
+2014-03-11 Kaz Kylheku <kaz@kylheku.com>
+
* stream.c (open_process): Close the original pipe file descriptor
in the child process after dup2-ing it to standard input or standard
output, so the child doesn't have to references to the pipe.
diff --git a/eval.c b/eval.c
index 6e3ff5ac..6abbf616 100644
--- a/eval.c
+++ b/eval.c
@@ -3495,9 +3495,6 @@ void eval_init(void)
reg_fun(intern(lit("match-fun"), user_package), func_n4(match_fun));
- reg_fun(intern(lit("url-encode"), user_package), func_n2o(url_encode, 1));
- reg_fun(intern(lit("url-decode"), user_package), func_n2o(url_decode, 1));
-
reg_fun(intern(lit("time"), user_package), func_n0(time_sec));
reg_fun(intern(lit("time-usec"), user_package), func_n0(time_sec_usec));
reg_fun(intern(lit("time-string-local"), user_package), func_n2(time_string_local));
diff --git a/filter.c b/filter.c
index 4d8f27f2..4ac65f17 100644
--- a/filter.c
+++ b/filter.c
@@ -41,6 +41,7 @@
#include "match.h"
#include "filter.h"
#include "gc.h"
+#include "eval.h"
#include "stream.h"
val filters;
@@ -109,6 +110,12 @@ static void trie_compress(val *ptrie)
}
}
+static val trie_compress_intrinsic(val ptrie)
+{
+ trie_compress(&ptrie);
+ return ptrie;
+}
+
val trie_lookup_begin(val trie)
{
return trie;
@@ -663,6 +670,16 @@ val url_decode(val str, val space_plus)
return get_string_from_stream(out);
}
+static val html_encode(val str)
+{
+ return trie_filter_string(get_filter(to_html_k), str);
+}
+
+static val html_decode(val str)
+{
+ return trie_filter_string(get_filter(from_html_k), str);
+}
+
void filter_init(void)
{
protect(&filters, (val *) 0);
@@ -702,4 +719,15 @@ void filter_init(void)
sethash(filters, tointeger_k, curry_12_1(func_n2(int_str), nil));
sethash(filters, tofloat_k, func_n1(flo_str));
sethash(filters, hextoint_k, curry_12_1(func_n2(int_str), num_fast(16)));
+
+ reg_fun(intern(lit("make-trie"), user_package), func_n0(make_trie));
+ reg_fun(intern(lit("trie-add"), user_package), func_n3(trie_add));
+ reg_fun(intern(lit("trie-compress"), user_package),
+ func_n1(trie_compress_intrinsic));
+ reg_fun(intern(lit("filter-string-tree"), user_package), func_n2(filter_string_tree));
+ reg_fun(intern(lit("filter-equal"), user_package), func_n4(filter_equal));
+ reg_fun(intern(lit("url-encode"), user_package), func_n2o(url_encode, 1));
+ reg_fun(intern(lit("url-decode"), user_package), func_n2o(url_decode, 1));
+ reg_fun(intern(lit("html-encode"), user_package), func_n1(html_encode));
+ reg_fun(intern(lit("html-decode"), user_package), func_n1(html_decode));
}
diff --git a/genvim.txr b/genvim.txr
index d443439d..e2ff77e1 100644
--- a/genvim.txr
+++ b/genvim.txr
@@ -7,7 +7,8 @@ static void dir_tables_init(void)
@(until)
}
@(end)
-@(next @(open-files '("eval.c" "rand.c" "signal.c" "stream.c" "syslog.c" "txr.c")))
+@(next @(open-files '("eval.c" "rand.c" "signal.c" "stream.c"
+ "syslog.c" "filter.c" "txr.c")))
@(collect)
@ (block)
@ (cases)
diff --git a/txr.1 b/txr.1
index 87e0b624..b34ba7cf 100644
--- a/txr.1
+++ b/txr.1
@@ -13179,6 +13179,161 @@ argument is omitted or specified as nil, then + (plus) characters in the
encoded data are retained as + characters in the decoded strings. Otherwise,
plus characters are converted to spaces.
+.SS Functions html-encode and html-decode
+
+.TP
+Syntax:
+
+ (html-encode <text-string>)
+ (html-decode <html-string>)
+
+.TP
+Description:
+
+The html-encode and decode functions convert between an HTML and raw
+representation of of text.
+
+The hml-encode function returns a string which is based on the content of
+<text-string>, but in which all characters which have special meaning in HTML
+have been replaced by special HTML codes for representing those characters.
+The returned string is the HTML-encoded verbatim representation of
+<text-string>.
+
+The html-decode function converts <html-string>, which may contain HTML
+character encodings, into a string which contains the characters represented
+by those encodings.
+
+The function composition (html-decode (html-encode text)) returns a string
+which is equal to text.
+
+The reverse composition (html-encode (html-decode html)) does not necessarily
+return a string equal to text.
+
+For instance if html is the string "<p>Hello, world&#33;</p>",
+then html-decode produces "<p>Hello, world!</p>". Then, html-encode
+produces "&lt;p&gt;Hello, world!&lt;/p&gt;".
+
+.SH FILTER MODULE
+
+The filter module provides a trie (pronunced "try") data structure,
+which is suitable for representing dictionaries for efficient filtering.
+Dictionaires are unordered collections of keys, which are strings, which
+have associated values, which are also strings. A trie can be used to filter
+text, such that keys appearing in the text are replaced by the corresponding
+values. A trie supports this filtering operation by providing an efficient
+prefix-based lookup method which only looks at each input character ones, and
+which does not require knowledge of the length of the key in advance.
+
+.SS Function make-trie
+
+.TP
+Syntax:
+
+ (make-trie)
+
+.TP
+Description:
+
+This function creates an empty trie. There is no special data type for
+a trie; a trie is some existing type such as a hash table.
+
+.SS Function trie-add
+
+.TP
+Syntax:
+
+ (trie-add <trie> <key> <value>)
+
+.TP
+Description:
+
+The trie-add function adds the string <key> to the trie, associating
+it with <value>. If <key> already exists in <trie>, then the value
+is updated with <value>.
+
+The <trie> must be a trie that has not been compressed with trie-compress.
+
+A trie can contain keys which are prefixes of other keys. For instance
+it can contain "dog" and "dogma". When a trie is used for matching
+and substitution, the longest match is used. If the input presents
+the text "doggy", then the match is "dog". If the input is "dogmatic",
+then "dogma" matches.
+
+
+.SS Function trie-compress
+
+.TP
+Syntax:
+
+ (trie-compress <trie>)
+
+.TP
+Description:
+
+The trie-compress function changes the representation of <trie> to
+a representation which occupies less space and supports faster lookups.
+The new representation is returned.
+
+The compressed representation of a trie does not support the trie-add function.
+
+This function destructively manipulates <trie>, and may return an object
+that is the same object as <trie>, or it may return a different object,
+while at the same time still modifying the internals of <trie>.
+Consequently, the program should not retain the input object <trie>,
+but use the returned object in its place.
+
+
+.SS Function filter-string-tree
+
+.TP
+Syntax:
+
+ (filter-string-tree <filter> <obj>)
+
+.TP
+The filter-string-tree a tree structure similar to <obj>, in which all of the
+string atoms have been filtered through <filter>.
+
+The <obj> argument is a string tree structure: either the symbol nil, denoting
+an empty structure; a string; or a list of tree structures. If <obj> is
+nil, then filter-string-tree returns nil.
+
+The <filter> argument is a filter: it is either a trie, a function, or nil.
+If <filter> is nil, then filter-string-trie just returns <obj>.
+
+If <filter> is a function, it must be a function that can be called
+with one argument. The strings of the string tree are filtered by passing
+each one into the function and substituting the return value into the
+corresponding place in the returned structure.
+
+Otherwise if <filter> is a trie, then this trie is used for filtering,
+the string elements similarly to a function. For each string, a new
+string is returned in which occurrences of the keys in the trie are
+replaced by the values in the trie.
+
+.SS Function filter-equal
+
+.TP
+Syntax:
+
+ (filter-equal <filter-1> <filter-2> <obj-1> <obj-2>)
+
+.TP
+Description:
+
+The filter-equal function tests whether two string trees are equal
+under the given filters.
+
+The precise semantics can be given by this expression:
+
+ (equal (filter-string-tree <filter-1> <obj-1>)
+ (filter-string-tree <filter-2> <obj-2>))
+
+The string tree <obj-1> is filtered through <filter-1>, as if
+by the filter-string-tree function, and similarly, <obj-2> is
+filtered through <filter-2>. The resulting structures are compared
+using equal, and the result of that is returned.
+
.SH ACCESS TO TXR PATTERN LANGUAGE FROM LISP
.SS Function match-fun
diff --git a/txr.vim b/txr.vim
index 67dd21d4..dc68e64c 100644
--- a/txr.vim
+++ b/txr.vim
@@ -66,19 +66,20 @@ syn keyword txl_keyword contained env-fbind env-hash env-vbind eq
syn keyword txl_keyword contained eql equal errno error
syn keyword txl_keyword contained eval evenp exit exp
syn keyword txl_keyword contained expt exptmod fboundp fifth
-syn keyword txl_keyword contained find find-if find-package first
-syn keyword txl_keyword contained fixnump flatten flatten* flip
-syn keyword txl_keyword contained flo-int flo-str floatp floor
-syn keyword txl_keyword contained flush-stream for for* force
-syn keyword txl_keyword contained format fourth fun func-get-env
-syn keyword txl_keyword contained func-get-form func-set-env functionp gcd
-syn keyword txl_keyword contained gen generate gensym get-byte
-syn keyword txl_keyword contained get-char get-hash-userdata get-line get-list-from-stream
-syn keyword txl_keyword contained get-sig-handler get-string-from-stream gethash group-by
-syn keyword txl_keyword contained gun hash hash-alist hash-construct
-syn keyword txl_keyword contained hash-count hash-diff hash-eql hash-equal
-syn keyword txl_keyword contained hash-isec hash-keys hash-pairs hash-uni
-syn keyword txl_keyword contained hash-update hash-update-1 hash-values hashp
+syn keyword txl_keyword contained filter-equal filter-string-tree find find-if
+syn keyword txl_keyword contained find-package first fixnump flatten
+syn keyword txl_keyword contained flatten* flip flo-int flo-str
+syn keyword txl_keyword contained floatp floor flush-stream for
+syn keyword txl_keyword contained for* force format fourth
+syn keyword txl_keyword contained fun func-get-env func-get-form func-set-env
+syn keyword txl_keyword contained functionp gcd gen generate
+syn keyword txl_keyword contained gensym get-byte get-char get-hash-userdata
+syn keyword txl_keyword contained get-line get-list-from-stream get-sig-handler get-string-from-stream
+syn keyword txl_keyword contained gethash group-by gun hash
+syn keyword txl_keyword contained hash-alist hash-construct hash-count hash-diff
+syn keyword txl_keyword contained hash-eql hash-equal hash-isec hash-keys
+syn keyword txl_keyword contained hash-pairs hash-uni hash-update hash-update-1
+syn keyword txl_keyword contained hash-values hashp html-decode html-encode
syn keyword txl_keyword contained identity if iff iffi
syn keyword txl_keyword contained inc inhash int-flo int-str
syn keyword txl_keyword contained integerp intern interp-fun-p isqrt
@@ -100,71 +101,72 @@ syn keyword txl_keyword contained macro-time macroexpand macroexpand-1 macrolet
syn keyword txl_keyword contained major make-catenated-stream make-env make-hash
syn keyword txl_keyword contained make-lazy-cons make-package make-random-state make-similar-hash
syn keyword txl_keyword contained make-string-byte-input-stream make-string-input-stream make-string-output-stream make-strlist-output-stream
-syn keyword txl_keyword contained make-sym make-time make-time-utc makedev
-syn keyword txl_keyword contained mapcar mapcar* maphash mappend
-syn keyword txl_keyword contained mappend* mask match-fun match-regex
-syn keyword txl_keyword contained match-regex-right match-str match-str-tree max
-syn keyword txl_keyword contained memq memql memqual merge
-syn keyword txl_keyword contained min minor mkdir mknod
-syn keyword txl_keyword contained mkstring mod multi-sort n-choose-k
-syn keyword txl_keyword contained n-perm-k none not nreverse
-syn keyword txl_keyword contained null num-chr num-str numberp
-syn keyword txl_keyword contained oddp op open-command open-directory
-syn keyword txl_keyword contained open-file open-files open-files* open-pipe
-syn keyword txl_keyword contained open-process open-tail openlog or
-syn keyword txl_keyword contained orf packagep perm pop
-syn keyword txl_keyword contained pos pos-if posq posql
-syn keyword txl_keyword contained posqual pprinl pprint prinl
-syn keyword txl_keyword contained print prog1 progn prop
-syn keyword txl_keyword contained proper-listp push pushhash put-byte
-syn keyword txl_keyword contained put-char put-line put-string pwd
-syn keyword txl_keyword contained qquote quasi quote rand
-syn keyword txl_keyword contained random random-fixnum random-state-p range
-syn keyword txl_keyword contained range* rcomb read readlink
-syn keyword txl_keyword contained real-time-stream-p reduce-left reduce-right ref
-syn keyword txl_keyword contained refset regex-compile regex-parse regexp
-syn keyword txl_keyword contained regsub rehome-sym remhash remove-if
-syn keyword txl_keyword contained remove-if* remove-path remq remq*
-syn keyword txl_keyword contained remql remql* remqual remqual*
-syn keyword txl_keyword contained rename-path repeat replace replace-list
-syn keyword txl_keyword contained replace-str replace-vec rest return
-syn keyword txl_keyword contained return-from reverse rlcp rperm
-syn keyword txl_keyword contained rplaca rplacd s-ifblk s-ifchr
-syn keyword txl_keyword contained s-ifdir s-ififo s-iflnk s-ifmt
-syn keyword txl_keyword contained s-ifreg s-ifsock s-irgrp s-iroth
-syn keyword txl_keyword contained s-irusr s-irwxg s-irwxo s-irwxu
-syn keyword txl_keyword contained s-isgid s-isuid s-isvtx s-iwgrp
-syn keyword txl_keyword contained s-iwoth s-iwusr s-ixgrp s-ixoth
-syn keyword txl_keyword contained s-ixusr search-regex search-str search-str-tree
-syn keyword txl_keyword contained second seek-stream set set-diff
-syn keyword txl_keyword contained set-hash-userdata set-sig-handler sethash setlogmask
-syn keyword txl_keyword contained sig-abrt sig-alrm sig-bus sig-check
-syn keyword txl_keyword contained sig-chld sig-cont sig-fpe sig-hup
-syn keyword txl_keyword contained sig-ill sig-int sig-io sig-iot
-syn keyword txl_keyword contained sig-kill sig-lost sig-pipe sig-poll
-syn keyword txl_keyword contained sig-prof sig-pwr sig-quit sig-segv
-syn keyword txl_keyword contained sig-stkflt sig-stop sig-sys sig-term
-syn keyword txl_keyword contained sig-trap sig-tstp sig-ttin sig-ttou
-syn keyword txl_keyword contained sig-urg sig-usr1 sig-usr2 sig-vtalrm
-syn keyword txl_keyword contained sig-winch sig-xcpu sig-xfsz sin
-syn keyword txl_keyword contained sixth size-vec some sort
-syn keyword txl_keyword contained source-loc source-loc-str span-str splice
-syn keyword txl_keyword contained split-str split-str-set sqrt stat
-syn keyword txl_keyword contained stream-get-prop stream-set-prop streamp string-cmp
-syn keyword txl_keyword contained string-extend string-lt stringp sub
-syn keyword txl_keyword contained sub-list sub-str sub-vec symacrolet
-syn keyword txl_keyword contained symbol-function symbol-name symbol-package symbol-value
-syn keyword txl_keyword contained symbolp symlink sys-qquote sys-splice
-syn keyword txl_keyword contained sys-unquote syslog tan third
-syn keyword txl_keyword contained throw throwf time time-fields-local
-syn keyword txl_keyword contained time-fields-utc time-string-local time-string-utc time-usec
-syn keyword txl_keyword contained tok-str tostring tostringp tree-bind
-syn keyword txl_keyword contained tree-case tree-find trim-str trunc
-syn keyword txl_keyword contained typeof unget-byte unget-char unquote
-syn keyword txl_keyword contained upcase-str update url-decode url-encode
-syn keyword txl_keyword contained usleep uw-protect vec vec-push
-syn keyword txl_keyword contained vec-set-length vecref vector vector-list
-syn keyword txl_keyword contained vectorp with-saved-vars zerop
+syn keyword txl_keyword contained make-sym make-time make-time-utc make-trie
+syn keyword txl_keyword contained makedev mapcar mapcar* maphash
+syn keyword txl_keyword contained mappend mappend* mask match-fun
+syn keyword txl_keyword contained match-regex match-regex-right match-str match-str-tree
+syn keyword txl_keyword contained max memq memql memqual
+syn keyword txl_keyword contained merge min minor mkdir
+syn keyword txl_keyword contained mknod mkstring mod multi-sort
+syn keyword txl_keyword contained n-choose-k n-perm-k none not
+syn keyword txl_keyword contained nreverse null num-chr num-str
+syn keyword txl_keyword contained numberp oddp op open-command
+syn keyword txl_keyword contained open-directory open-file open-files open-files*
+syn keyword txl_keyword contained open-pipe open-process open-tail openlog
+syn keyword txl_keyword contained or orf packagep perm
+syn keyword txl_keyword contained pop pos pos-if posq
+syn keyword txl_keyword contained posql posqual pprinl pprint
+syn keyword txl_keyword contained prinl print prog1 progn
+syn keyword txl_keyword contained prop proper-listp push pushhash
+syn keyword txl_keyword contained put-byte put-char put-line put-string
+syn keyword txl_keyword contained pwd qquote quasi quote
+syn keyword txl_keyword contained rand random random-fixnum random-state-p
+syn keyword txl_keyword contained range range* rcomb read
+syn keyword txl_keyword contained readlink real-time-stream-p reduce-left reduce-right
+syn keyword txl_keyword contained ref refset regex-compile regex-parse
+syn keyword txl_keyword contained regexp regsub rehome-sym remhash
+syn keyword txl_keyword contained remove-if remove-if* remove-path remq
+syn keyword txl_keyword contained remq* remql remql* remqual
+syn keyword txl_keyword contained remqual* rename-path repeat replace
+syn keyword txl_keyword contained replace-list replace-str replace-vec rest
+syn keyword txl_keyword contained return return-from reverse rlcp
+syn keyword txl_keyword contained rperm rplaca rplacd s-ifblk
+syn keyword txl_keyword contained s-ifchr s-ifdir s-ififo s-iflnk
+syn keyword txl_keyword contained s-ifmt s-ifreg s-ifsock s-irgrp
+syn keyword txl_keyword contained s-iroth s-irusr s-irwxg s-irwxo
+syn keyword txl_keyword contained s-irwxu s-isgid s-isuid s-isvtx
+syn keyword txl_keyword contained s-iwgrp s-iwoth s-iwusr s-ixgrp
+syn keyword txl_keyword contained s-ixoth s-ixusr search-regex search-str
+syn keyword txl_keyword contained search-str-tree second seek-stream set
+syn keyword txl_keyword contained set-diff set-hash-userdata set-sig-handler sethash
+syn keyword txl_keyword contained setlogmask sig-abrt sig-alrm sig-bus
+syn keyword txl_keyword contained sig-check sig-chld sig-cont sig-fpe
+syn keyword txl_keyword contained sig-hup sig-ill sig-int sig-io
+syn keyword txl_keyword contained sig-iot sig-kill sig-lost sig-pipe
+syn keyword txl_keyword contained sig-poll sig-prof sig-pwr sig-quit
+syn keyword txl_keyword contained sig-segv sig-stkflt sig-stop sig-sys
+syn keyword txl_keyword contained sig-term sig-trap sig-tstp sig-ttin
+syn keyword txl_keyword contained sig-ttou sig-urg sig-usr1 sig-usr2
+syn keyword txl_keyword contained sig-vtalrm sig-winch sig-xcpu sig-xfsz
+syn keyword txl_keyword contained sin sixth size-vec some
+syn keyword txl_keyword contained sort source-loc source-loc-str span-str
+syn keyword txl_keyword contained splice split-str split-str-set sqrt
+syn keyword txl_keyword contained stat stream-get-prop stream-set-prop streamp
+syn keyword txl_keyword contained string-cmp string-extend string-lt stringp
+syn keyword txl_keyword contained sub sub-list sub-str sub-vec
+syn keyword txl_keyword contained symacrolet symbol-function symbol-name symbol-package
+syn keyword txl_keyword contained symbol-value symbolp symlink sys-qquote
+syn keyword txl_keyword contained sys-splice sys-unquote syslog tan
+syn keyword txl_keyword contained third throw throwf time
+syn keyword txl_keyword contained time-fields-local time-fields-utc time-string-local time-string-utc
+syn keyword txl_keyword contained time-usec tok-str tostring tostringp
+syn keyword txl_keyword contained tree-bind tree-case tree-find trie-add
+syn keyword txl_keyword contained trie-compress trim-str trunc typeof
+syn keyword txl_keyword contained unget-byte unget-char unquote upcase-str
+syn keyword txl_keyword contained update url-decode url-encode usleep
+syn keyword txl_keyword contained uw-protect vec vec-push vec-set-length
+syn keyword txl_keyword contained vecref vector vector-list vectorp
+syn keyword txl_keyword contained with-saved-vars zerop
syn match txr_error "@[\t ]*[*]\?[\t ]*."
syn match txr_nested_error "[^\t `]\+" contained