diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2014-03-17 13:23:33 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2014-03-17 13:23:33 -0700 |
commit | 2bb1c1f082120f2c4e2026a492685d27cb1572e3 (patch) | |
tree | 38bd39fd128ac425dead3d5e4c11b8b5018e49d7 | |
parent | 6ac24f8203cd10d5442a02c220a1229b2b7d5513 (diff) | |
download | unix-cruft-2bb1c1f082120f2c4e2026a492685d27cb1572e3.tar.gz unix-cruft-2bb1c1f082120f2c4e2026a492685d27cb1572e3.tar.bz2 unix-cruft-2bb1c1f082120f2c4e2026a492685d27cb1572e3.zip |
New file: awkreg-grammar.txt.
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | awkreg-grammar.txt | 69 |
2 files changed, 74 insertions, 0 deletions
@@ -1,5 +1,10 @@ 2014-03-17 Kaz Kylheku <kaz@kylheku.com> + * awkreg-grammar.txt: New file. Taken from my original Usenet + posting in comp.lang.awk. + +2014-03-17 Kaz Kylheku <kaz@kylheku.com> + Fix in {m,n} syntax. The issue is that the parser partially consumes broken {m,n} diff --git a/awkreg-grammar.txt b/awkreg-grammar.txt new file mode 100644 index 0000000..af6984b --- /dev/null +++ b/awkreg-grammar.txt @@ -0,0 +1,69 @@ +####################### +# Original LR grammar # +####################### + +S -> <^> R <$> + +R -> R|R + -> R+ + -> R? + -> R* + -> R R + -> R{num<,<num>>} + -> R{<,num>} + -> (R) + -> bracket + -> rchar + +bracket -> [<^> <bchar / range / class >*] + +bchar -> any character but [ or - + -> \] + -> \- + -> \^ + -> \\ + +range -> bchar - bchar + +class -> [:alpha:] / [:digit:] / ... et cetera + +rchar -> any character but ( ) [ ] { } * ? + + -> \ char + +char -> any character + +################# +# Left-factored # +################# + +R -> T # regex is a single term + -> T R # a term followed by a regex + -> T | R # a term or regex + -> # empty + +T -> F # a regex term is a factor + -> F * + -> F ? + -> F + + -> F {num<,<num>>} + +F -> rchar # a factor is a regex char + -> bracket # [...] expression + -> (R) # parenthesized regex + +bracket -> [<^> <bchar / range / class >*] + +bchar -> any character but [ or - + -> \] + -> \- + -> \^ + -> \\ + +range -> bchar - bchar + +class -> [:alpha:] / [:digit:] / ... et cetera + +rchar -> any character but ( ) [ ] { } * ? + + -> \ char + +char -> any character |