summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog19
-rw-r--r--eval.c4
-rw-r--r--filter.c22
-rw-r--r--filter.h6
-rw-r--r--txr.118
5 files changed, 54 insertions, 15 deletions
diff --git a/ChangeLog b/ChangeLog
index eff28bb2..6ea80d12 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+2012-03-18 Kaz Kylheku <kaz@kylheku.com>
+
+ * eval.c (eval_init): url_decode has two parameters now,
+ so we make the second one optional.
+
+ * filter.c (topercent_k, frompercent_k): New keyword
+ variables.
+ (url_encode, url_decode): Take a second parameter, space_plus.
+ This determines whether or not to apply the rule that
+ a space encodes as a + character.
+ (filter_init): Initialize new keyword variables, and register :topercent
+ and :frompercent filters. Fix the previous registrations of :tourl and
+ :fromurl using currying.
+
+ * filter.h (urlencode, urldecode): Declarations updated.
+ (topercent_k, frompercent_k): Declared.
+
+ * txr.1: Documented.
+
2012-03-17 Kaz Kylheku <kaz@kylheku.com>
Changing type function to not blow up on nil, which makes a lot of code
diff --git a/eval.c b/eval.c
index fcce0b2f..74acb79f 100644
--- a/eval.c
+++ b/eval.c
@@ -2371,8 +2371,8 @@ void eval_init(void)
reg_fun(intern(lit("match-fun"), user_package), func_n4(match_fun));
- reg_fun(intern(lit("url-encode"), user_package), func_n1(url_encode));
- reg_fun(intern(lit("url-decode"), user_package), func_n1(url_decode));
+ reg_fun(intern(lit("url-encode"), user_package), func_n2o(url_encode, 1));
+ reg_fun(intern(lit("url-decode"), user_package), func_n2o(url_decode, 1));
eval_error_s = intern(lit("eval-error"), user_package);
uw_register_subtype(eval_error_s, error_s);
diff --git a/filter.c b/filter.c
index 94423a7b..44e83936 100644
--- a/filter.c
+++ b/filter.c
@@ -44,7 +44,7 @@
val filters;
val filter_k, lfilt_k, rfilt_k, to_html_k, from_html_k;
val upcase_k, downcase_k, fun_k;
-val tourl_k, fromurl_k;
+val topercent_k, frompercent_k, tourl_k, fromurl_k;
static val make_trie(void)
{
@@ -593,7 +593,7 @@ static int is_url_reserved(int ch)
return (ch <= 0x20 || ch >= 0x7F || strchr(":/?#[]@!$&'()*+,;=%", ch) != 0);
}
-val url_encode(val str)
+val url_encode(val str, val space_plus)
{
val in_byte = make_string_byte_input_stream(str);
val out = make_string_output_stream();
@@ -602,7 +602,9 @@ val url_encode(val str)
while ((ch = get_byte(in_byte)) != nil) {
int c = c_num(ch);
- if (is_url_reserved(c))
+ if (space_plus && c == ' ')
+ put_char(chr('+'), out);
+ else if (is_url_reserved(c))
format(out, lit("%~1X~1X"), num_fast(c >> 4), num_fast(c & 0xf), nao);
else
put_char(chr_num(ch), out);
@@ -611,7 +613,7 @@ val url_encode(val str)
return get_string_from_stream(out);
}
-val url_decode(val str)
+val url_decode(val str, val space_plus)
{
val in = make_string_input_stream(str);
val out = make_string_output_stream();
@@ -637,6 +639,10 @@ val url_decode(val str)
}
continue;
}
+ if (space_plus && ch == chr('+')) {
+ put_char(chr(' '), out);
+ continue;
+ }
if (!ch)
break;
@@ -659,6 +665,8 @@ void filter_init(void)
upcase_k = intern(lit("upcase"), keyword_package);
downcase_k = intern(lit("downcase"), keyword_package);
fun_k = intern(lit("fun"), keyword_package);
+ topercent_k = intern(lit("topercent"), keyword_package);
+ frompercent_k = intern(lit("frompercent"), keyword_package);
tourl_k = intern(lit("tourl"), keyword_package);
fromurl_k = intern(lit("fromurl"), keyword_package);
@@ -671,6 +679,8 @@ void filter_init(void)
}
sethash(filters, upcase_k, func_n1(upcase_str));
sethash(filters, downcase_k, func_n1(downcase_str));
- sethash(filters, tourl_k, func_n1(url_encode));
- sethash(filters, fromurl_k, func_n1(url_decode));
+ sethash(filters, topercent_k, curry_12_1(func_n2(url_encode), nil));
+ sethash(filters, frompercent_k, curry_12_1(func_n2(url_decode), nil));
+ sethash(filters, tourl_k, curry_12_1(func_n2(url_encode), t));
+ sethash(filters, fromurl_k, curry_12_1(func_n2(url_decode), t));
}
diff --git a/filter.h b/filter.h
index 1a084e66..806965c6 100644
--- a/filter.h
+++ b/filter.h
@@ -27,7 +27,7 @@
extern val filters;
extern val filter_k, lfilt_k, rfilt_k, to_html_k, from_html_k;
extern val upcase_k, downcase_k, fun_k;
-extern val tourl_k, fromurl_k;
+extern val topercent_k, frompercent_k, tourl_k, fromurl_k;
val trie_lookup_begin(val trie);
val trie_value_at(val node);
@@ -37,8 +37,8 @@ val filter_string(val trie, val str);
val filter_equal(val lfilt, val rfilt, val left, val right);
val register_filter(val sym, val table);
-val url_encode(val str);
-val url_decode(val str);
+val url_encode(val str, val space_plus);
+val url_decode(val str, val space_plus);
void filter_init(void);
diff --git a/txr.1 b/txr.1
index e3ab1535..a95f23b7 100644
--- a/txr.1
+++ b/txr.1
@@ -3697,15 +3697,15 @@ Convert the 26 lower case letters of the English alphabet to upper case.
.IP :downcase
Convert the 26 upper case letters of the English alphabet to lower case.
-.IP :fromurl
-Decode URL-encoded (a.k.a. percent-encoded) text. Character triplets consisting
+.IP :frompercent
+Decode percent-encoded text. Character triplets consisting
of the % character followed by a pair of hexadecimal digits (case insensitive)
are are converted to bytes having the value represented by the hexadecimal
digits (most significant nybble first). Sequences of one or more such bytes are
treated as UTF-8 data and decoded to characters.
-.IP :tourl
-Convert to URL encoding according to RFC 3986. The text is first converted
+.IP :topercent
+Convert to percent encoding according to RFC 3986. The text is first converted
to UTF-8 bytes. The bytes are then converted back to text as follows.
Bytes in the range 0 to 32, and 127 to 255 (note: including the ASCII DEL),
bytes whose values correspond to ASCII characters which are listed by RFC 3986
@@ -3716,6 +3716,16 @@ byte value (most significant nybble first, upper case). All other bytes
are converted directly to characters of the same value without any such
encoding.
+.IP :fromurl
+Decode from URL encoding, which is like percent encoding, except that
+if the unencoded + character occurs, it is decoded to a space character.
+Of course %20 still decodes to space, and %2B to the + character.
+
+.IP :tourl
+Encode to URL encoding, which is like percent encoding except that
+a space maps to + rather than %20. The + character, being in the
+reserved set, encodes to %2B.
+
Example: to escape HTML characters in all variable substitutions occuring in an
output clause, specify :filter :to_html in the directive: