3 files changed, 66 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog
index 166adc68..723a3f2d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
 2011-09-29  Kaz Kylheku  <kaz@kylheku.com>
 
+	* parser.l: Implemented backslash continuations in SPECIAL
+	state, regexes and string literals.
+
+	* txr.1: Documented.
+
+2011-09-29  Kaz Kylheku  <kaz@kylheku.com>
+
 	* match.c (match_line): Implemented horizontal all, some,
 	none, maybe and cases directives.
 	(match_files): Recognize horizontal version of these directives
diff --git a/parser.l b/parser.l
index 991df970..a57c954b 100644
--- a/parser.l
+++ b/parser.l
@@ -136,6 +136,7 @@ void yybadtoken(int tok, val context)
 static wchar_t char_esc(int letter)
 {
   switch (letter) {
+  case ' ': return L' ';
   case 'a': return L'\a';
   case 'b': return L'\b';
   case 't': return L'\t';
@@ -357,7 +358,7 @@ UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
                           return yytext[0];
                         }
 
-<SPECIAL,NESTED>[\t ]+  { /* Eat whitespace in directive */ }
+<SPECIAL,NESTED>{WS}    { /* Eat whitespace in directive */ }
 
 <SPECIAL,NESTED>\"      {
                           yy_push_state(STRLIT);
@@ -394,14 +395,19 @@ UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
                           return '.';
                         }
 
-<SPECIAL>[\\][abtnvfre] {
-                          wchar_t lexeme[2];
-                          lexeme[0] = char_esc(yytext[1]);
-                          lexeme[1] = 0;
-                          yylval.lexeme = chk_strdup(lexeme);
-                          yy_pop_state();
-                          return TEXT;
-                        }
+<SPECIAL,NESTED>[\\]\n{WS}      {
+                                  yy_pop_state();
+                                  lineno++;
+                                }
+
+<SPECIAL>[\\][abtnvfre ] {
+                           wchar_t lexeme[2];
+                           lexeme[0] = char_esc(yytext[1]);
+                           lexeme[1] = 0;
+                           yylval.lexeme = chk_strdup(lexeme);
+                           yy_pop_state();
+                           return TEXT;
+                         }
 
 <SPECIAL>[\\](x{HEX}+|{OCT}+)   {
                                   wchar_t lexeme[2];
@@ -429,7 +435,7 @@ UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
                 }
 
 
-<REGEX>[\\][abtnvfre\\]         {
+<REGEX>[\\][abtnvfre\\ ]        {
                                   yylval.chr = char_esc(yytext[1]);
                                   return REGCHAR;
                                 }
@@ -439,6 +445,10 @@ UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
                                   return REGCHAR;
                                 }
 
+<REGEX>{WS}[\\]\n{WS}   {
+                          lineno++;
+                        }
+
 <REGEX>\n       {
                   lineno++;
                   yyerror("newline in regex");
@@ -521,10 +531,14 @@ UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
                 }
 
 <STRLIT,CHRLIT,QSILIT>[\\][abtnvfre"`'\\] {
-                                          yylval.chr = char_esc(yytext[1]);
-                                          return LITCHAR;
-                                        }
+                                            yylval.chr = char_esc(yytext[1]);
+                                            return LITCHAR;
+                                          }
 
+<STRLIT,QSILIT>{WS}[\\]\n{WS}   {
+                                  lineno++;
+                                }
+                                
 <STRLIT,CHRLIT>[\\](x{HEX}+|{OCT}+)     {
                                           yylval.chr = num_esc(yytext + 1);
                                           return LITCHAR;
diff --git a/txr.1 b/txr.1
index 0a5bcad1..3d2f46cb 100644
--- a/txr.1
+++ b/txr.1
@@ -368,6 +368,27 @@ Control characters may be embedded directly in a query (with the exception of
 newline characters). An alternative to embedding is to use escape syntax.
 The following escapes are supported:
 
+.IP @\e<newline>
+A backslash immediately followed by a newline introduces a physical line
+break without breaking up the logical line. Material following this sequence
+continues to be interpreted as a continuation of the previous line, so
+that indentation can be introduced to show the continuation without appearing
+in the data.
+.IP @\e<space>
+A backslash followed by a space encodes a space. This is useful in line
+continuations when it is necessary for leading spaces to be preserved.
+For instance the two line sequence
+
+   abcd@\
+     @\  efg
+
+is equivalent to the line
+
+  abcd  efg
+
+The two spaces before the @\ in the second line are consumed. The
+spaces after are preserved.
+
 .IP @\ea
 Alert character (ASCII 7, BEL).
 .IP @\eb
@@ -445,6 +466,17 @@ directive may be used, which has the following syntax:
 where the RE part enclosed in slashes represents regular expression
 syntax (described in the section Regular Expressions below).
 
+Long regular expressions can be broken into multiple lines using a
+backslash-newline sequence.  Whitespace before the sequence or after the
+sequence is not significant, so the following two are equivalent:
+
+  @/reg \e
+    ular/
+
+  @/regular/
+
+There may not be whitespace between the backslash and newline.
+
 Whereas literal text simply represents itself, regular expression denotes a
 (potentially infinite) set of texts.  The regular expression directive
 matches the longest piece of text (possibly empty) which belongs to the set