4 files changed, 47 insertions, 6 deletions
diff --git a/ChangeLog b/ChangeLog
index fbb802ff..998d3dab 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,23 @@
 2015-07-02  Kaz Kylheku  <kaz@kylheku.com>
 
+	Support trailing semicolon after hex/octal characters.
+
+	* parser.l (%option): Remove nounput option since we need
+	yyunput.
+	(grammar): Rule for matching hex and octal escape in SPECIAL
+	state recognizes optional semicolon. In 109 compatibility,
+	this is pushed back into the stream, otherwise consumed.
+
+	* txr.1: Updated documentation, including compat notes.
+
+	* genvim.txr (txr_char): Include optional semicolon in
+	match. Corrected some errors where 8 and 9 were being
+	included as matches for octal digits.
+	(txr_error): Default match for \x or \o  not followed
+	by digits.
+
+2015-07-02  Kaz Kylheku  <kaz@kylheku.com>
+
 	Hash-bang support for .tl files.
 
 	* parser.c (read_eval_stream): New boolean argument
diff --git a/genvim.txr b/genvim.txr
index e74ed9c2..5aab351f 100644
--- a/genvim.txr
+++ b/genvim.txr
@@ -78,8 +78,9 @@ syn match txr_atat "@@[ \t]*@@"
 syn match txr_comment "@@[ \t]*[#;].*"
 syn match txr_contin "@@[ \t]*\\$"
 syn match txr_char "@@[ \t]*\\."
-syn match txr_char "@@[ \t]*\\x[0-9A-Fa-f]\+"
-syn match txr_char "@@[ \t]*\\[0-9]\+"
+syn match txr_error "@@[ \t]*\\[xo]"
+syn match txr_char "@@[ \t]*\\x[0-9A-Fa-f]\+;\?"
+syn match txr_char "@@[ \t]*\\[0-7]\+;\?"
 syn match txr_variable "@@[ \t]*[*]\?[ \t]*[A-Za-z_][A-Za-z0-9_]*"
 syn match txr_splicevar "@@[ \t,*]*[A-Za-z_][A-Za-z0-9_]*"
 syn match txr_regdir "@@[ \t]*/\(\\/\|[^/]\|\\\n\)*/"
@@ -97,7 +98,7 @@ syn match txr_regesc "\\[abtnvfre\\ \n/sSdDwW()\|.*?+~&%\[\]\-]" contained
 syn match txr_nested_error "[^\t `]\+" contained
 
 syn match txr_chr "#\\x[A-Fa-f0-9]\+"@(if txr-p " contained")
-syn match txr_chr "#\\o[0-9]\+"@(if txr-p " contained")
+syn match txr_chr "#\\o[0-7]\+"@(if txr-p " contained")
 syn match txr_chr "#\\[^ \t\nA-Za-z0-9_]"@(if txr-p " contained")
 syn match txr_chr "#\\[A-Za-z0-9_]\+"@(if txr-p " contained")
 syn match txr_ncomment ";.*"@(if txr-p " contained")
diff --git a/parser.l b/parser.l
index 37bbdc70..30cb3034 100644
--- a/parser.l
+++ b/parser.l
@@ -166,7 +166,7 @@ static wchar_t num_esc(scanner_t *scn, char *num)
 
 %}
 
-%option stack nounput noinput reentrant bison-bridge extra-type="parser_t *"
+%option stack noinput reentrant bison-bridge extra-type="parser_t *"
 
 SYM     [a-zA-Z0-9_]+
 SGN     [+\-]
@@ -685,11 +685,18 @@ UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
   return TEXT;
 }
 
-<SPECIAL>[\\](x{HEX}+|{OCT}+) {
+<SPECIAL>[\\](x{HEX}+|{OCT}+);? {
   wchar_t lexeme[2];
   lexeme[0] = num_esc(yyg, yytext + 1);
   lexeme[1] = 0;
   yylval->lexeme = chk_strdup(lexeme);
+
+  {
+    char lastchar = yytext[yyleng-1];
+    if (lastchar == ';' && opt_compat && opt_compat <= 109)
+      unput(lastchar);
+  }
+
   yy_pop_state(yyscanner);
   return TEXT;
 }
diff --git a/txr.1 b/txr.1
index 3bbdabef..894143db 100644
--- a/txr.1
+++ b/txr.1
@@ -1155,7 +1155,9 @@ A
 immediately followed by a sequence of hex digits is interpreted as a hexadecimal
 numeric character code. For instance
 .code @\ex41
-is the ASCII character A.
+is the ASCII character A.  If a semicolon character immediately follows the
+hex digits, it is consumed, and characters which follow are not considered
+part of the hex escape even if they are hex digits.
 .meIP @\e < octal-digits
 
 A
@@ -1165,6 +1167,9 @@ as an octal character code. For instance
 .code @\e010
 is character 8, same as
 .codn @\eb .
+If a semicolon character immediately follows the octal digits, it is consumed,
+and subsequent characters are not treated as part of the octal escape,
+even if they are octal digits.
 .PP
 
 Note that if a newline is embedded into a query line with
@@ -30636,6 +30641,16 @@ is given an argument which is equal or lower. For instance
 .code -C 103
 selects the behaviors described below for version 105, but not those for 102.
 
+.IP 109
+The optional trailing semicolon on hex and octal codes in the \*(TX
+pattern language was introduced in 110. The feature is disabled
+with 109 or lower compatibility, so that
+.code @\ex21;a
+encodes
+.code !;a
+rather than the current behavior of encoding
+.codn !a .
+
 .IP 107
 Up through \*(TX 107, by accident, there was a function called
 .code flip