diff options
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | parser.l | 40 | ||||
-rw-r--r-- | txr.1 | 32 |
3 files changed, 66 insertions, 13 deletions
@@ -1,5 +1,12 @@ 2011-09-29 Kaz Kylheku <kaz@kylheku.com> + * parser.l: Implemented backslash continuations in SPECIAL + state, regexes and string literals. + + * txr.1: Documented. + +2011-09-29 Kaz Kylheku <kaz@kylheku.com> + * match.c (match_line): Implemented horizontal all, some, none, maybe and cases directives. (match_files): Recognize horizontal version of these directives @@ -136,6 +136,7 @@ void yybadtoken(int tok, val context) static wchar_t char_esc(int letter) { switch (letter) { + case ' ': return L' '; case 'a': return L'\a'; case 'b': return L'\b'; case 't': return L'\t'; @@ -357,7 +358,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return yytext[0]; } -<SPECIAL,NESTED>[\t ]+ { /* Eat whitespace in directive */ } +<SPECIAL,NESTED>{WS} { /* Eat whitespace in directive */ } <SPECIAL,NESTED>\" { yy_push_state(STRLIT); @@ -394,14 +395,19 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return '.'; } -<SPECIAL>[\\][abtnvfre] { - wchar_t lexeme[2]; - lexeme[0] = char_esc(yytext[1]); - lexeme[1] = 0; - yylval.lexeme = chk_strdup(lexeme); - yy_pop_state(); - return TEXT; - } +<SPECIAL,NESTED>[\\]\n{WS} { + yy_pop_state(); + lineno++; + } + +<SPECIAL>[\\][abtnvfre ] { + wchar_t lexeme[2]; + lexeme[0] = char_esc(yytext[1]); + lexeme[1] = 0; + yylval.lexeme = chk_strdup(lexeme); + yy_pop_state(); + return TEXT; + } <SPECIAL>[\\](x{HEX}+|{OCT}+) { wchar_t lexeme[2]; @@ -429,7 +435,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } -<REGEX>[\\][abtnvfre\\] { +<REGEX>[\\][abtnvfre\\ ] { yylval.chr = char_esc(yytext[1]); return REGCHAR; } @@ -439,6 +445,10 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return REGCHAR; } +<REGEX>{WS}[\\]\n{WS} { + lineno++; + } + <REGEX>\n { lineno++; yyerror("newline in regex"); @@ -521,10 +531,14 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } <STRLIT,CHRLIT,QSILIT>[\\][abtnvfre"`'\\] { - yylval.chr = char_esc(yytext[1]); - return LITCHAR; - } + yylval.chr = char_esc(yytext[1]); + return LITCHAR; + } +<STRLIT,QSILIT>{WS}[\\]\n{WS} { + lineno++; + } + <STRLIT,CHRLIT>[\\](x{HEX}+|{OCT}+) { yylval.chr = num_esc(yytext + 1); return LITCHAR; @@ -368,6 +368,27 @@ Control characters may be embedded directly in a query (with the exception of newline characters). An alternative to embedding is to use escape syntax. The following escapes are supported: +.IP @\e<newline> +A backslash immediately followed by a newline introduces a physical line +break without breaking up the logical line. Material following this sequence +continues to be interpreted as a continuation of the previous line, so +that indentation can be introduced to show the continuation without appearing +in the data. +.IP @\e<space> +A backslash followed by a space encodes a space. This is useful in line +continuations when it is necessary for leading spaces to be preserved. +For instance the two line sequence + + abcd@\ + @\ efg + +is equivalent to the line + + abcd efg + +The two spaces before the @\ in the second line are consumed. The +spaces after are preserved. + .IP @\ea Alert character (ASCII 7, BEL). .IP @\eb @@ -445,6 +466,17 @@ directive may be used, which has the following syntax: where the RE part enclosed in slashes represents regular expression syntax (described in the section Regular Expressions below). +Long regular expressions can be broken into multiple lines using a +backslash-newline sequence. Whitespace before the sequence or after the +sequence is not significant, so the following two are equivalent: + + @/reg \e + ular/ + + @/regular/ + +There may not be whitespace between the backslash and newline. + Whereas literal text simply represents itself, regular expression denotes a (potentially infinite) set of texts. The regular expression directive matches the longest piece of text (possibly empty) which belongs to the set |