summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--parser.l40
-rw-r--r--txr.132
3 files changed, 66 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog
index 166adc68..723a3f2d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
2011-09-29 Kaz Kylheku <kaz@kylheku.com>
+ * parser.l: Implemented backslash continuations in SPECIAL
+ state, regexes and string literals.
+
+ * txr.1: Documented.
+
+2011-09-29 Kaz Kylheku <kaz@kylheku.com>
+
* match.c (match_line): Implemented horizontal all, some,
none, maybe and cases directives.
(match_files): Recognize horizontal version of these directives
diff --git a/parser.l b/parser.l
index 991df970..a57c954b 100644
--- a/parser.l
+++ b/parser.l
@@ -136,6 +136,7 @@ void yybadtoken(int tok, val context)
static wchar_t char_esc(int letter)
{
switch (letter) {
+ case ' ': return L' ';
case 'a': return L'\a';
case 'b': return L'\b';
case 't': return L'\t';
@@ -357,7 +358,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return yytext[0];
}
-<SPECIAL,NESTED>[\t ]+ { /* Eat whitespace in directive */ }
+<SPECIAL,NESTED>{WS} { /* Eat whitespace in directive */ }
<SPECIAL,NESTED>\" {
yy_push_state(STRLIT);
@@ -394,14 +395,19 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return '.';
}
-<SPECIAL>[\\][abtnvfre] {
- wchar_t lexeme[2];
- lexeme[0] = char_esc(yytext[1]);
- lexeme[1] = 0;
- yylval.lexeme = chk_strdup(lexeme);
- yy_pop_state();
- return TEXT;
- }
+<SPECIAL,NESTED>[\\]\n{WS} {
+ yy_pop_state();
+ lineno++;
+ }
+
+<SPECIAL>[\\][abtnvfre ] {
+ wchar_t lexeme[2];
+ lexeme[0] = char_esc(yytext[1]);
+ lexeme[1] = 0;
+ yylval.lexeme = chk_strdup(lexeme);
+ yy_pop_state();
+ return TEXT;
+ }
<SPECIAL>[\\](x{HEX}+|{OCT}+) {
wchar_t lexeme[2];
@@ -429,7 +435,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
}
-<REGEX>[\\][abtnvfre\\] {
+<REGEX>[\\][abtnvfre\\ ] {
yylval.chr = char_esc(yytext[1]);
return REGCHAR;
}
@@ -439,6 +445,10 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return REGCHAR;
}
+<REGEX>{WS}[\\]\n{WS} {
+ lineno++;
+ }
+
<REGEX>\n {
lineno++;
yyerror("newline in regex");
@@ -521,10 +531,14 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
}
<STRLIT,CHRLIT,QSILIT>[\\][abtnvfre"`'\\] {
- yylval.chr = char_esc(yytext[1]);
- return LITCHAR;
- }
+ yylval.chr = char_esc(yytext[1]);
+ return LITCHAR;
+ }
+<STRLIT,QSILIT>{WS}[\\]\n{WS} {
+ lineno++;
+ }
+
<STRLIT,CHRLIT>[\\](x{HEX}+|{OCT}+) {
yylval.chr = num_esc(yytext + 1);
return LITCHAR;
diff --git a/txr.1 b/txr.1
index 0a5bcad1..3d2f46cb 100644
--- a/txr.1
+++ b/txr.1
@@ -368,6 +368,27 @@ Control characters may be embedded directly in a query (with the exception of
newline characters). An alternative to embedding is to use escape syntax.
The following escapes are supported:
+.IP @\e<newline>
+A backslash immediately followed by a newline introduces a physical line
+break without breaking up the logical line. Material following this sequence
+continues to be interpreted as a continuation of the previous line, so
+that indentation can be introduced to show the continuation without appearing
+in the data.
+.IP @\e<space>
+A backslash followed by a space encodes a space. This is useful in line
+continuations when it is necessary for leading spaces to be preserved.
+For instance the two line sequence
+
+ abcd@\
+ @\ efg
+
+is equivalent to the line
+
+ abcd efg
+
+The two spaces before the @\ in the second line are consumed. The
+spaces after are preserved.
+
.IP @\ea
Alert character (ASCII 7, BEL).
.IP @\eb
@@ -445,6 +466,17 @@ directive may be used, which has the following syntax:
where the RE part enclosed in slashes represents regular expression
syntax (described in the section Regular Expressions below).
+Long regular expressions can be broken into multiple lines using a
+backslash-newline sequence. Whitespace before the sequence or after the
+sequence is not significant, so the following two are equivalent:
+
+ @/reg \e
+ ular/
+
+ @/regular/
+
+There may not be whitespace between the backslash and newline.
+
Whereas literal text simply represents itself, regular expression denotes a
(potentially infinite) set of texts. The regular expression directive
matches the longest piece of text (possibly empty) which belongs to the set