summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2014-03-17 13:23:33 -0700
committerKaz Kylheku <kaz@kylheku.com>2014-03-17 13:23:33 -0700
commit2bb1c1f082120f2c4e2026a492685d27cb1572e3 (patch)
tree38bd39fd128ac425dead3d5e4c11b8b5018e49d7
parent6ac24f8203cd10d5442a02c220a1229b2b7d5513 (diff)
downloadunix-cruft-2bb1c1f082120f2c4e2026a492685d27cb1572e3.tar.gz
unix-cruft-2bb1c1f082120f2c4e2026a492685d27cb1572e3.tar.bz2
unix-cruft-2bb1c1f082120f2c4e2026a492685d27cb1572e3.zip
New file: awkreg-grammar.txt.
-rw-r--r--ChangeLog5
-rw-r--r--awkreg-grammar.txt69
2 files changed, 74 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 7f4faff..1261b64 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
2014-03-17 Kaz Kylheku <kaz@kylheku.com>
+ * awkreg-grammar.txt: New file. Taken from my original Usenet
+ posting in comp.lang.awk.
+
+2014-03-17 Kaz Kylheku <kaz@kylheku.com>
+
Fix in {m,n} syntax.
The issue is that the parser partially consumes broken {m,n}
diff --git a/awkreg-grammar.txt b/awkreg-grammar.txt
new file mode 100644
index 0000000..af6984b
--- /dev/null
+++ b/awkreg-grammar.txt
@@ -0,0 +1,69 @@
+#######################
+# Original LR grammar #
+#######################
+
+S -> <^> R <$>
+
+R -> R|R
+ -> R+
+ -> R?
+ -> R*
+ -> R R
+ -> R{num<,<num>>}
+ -> R{<,num>}
+ -> (R)
+ -> bracket
+ -> rchar
+
+bracket -> [<^> <bchar / range / class >*]
+
+bchar -> any character but [ or -
+ -> \]
+ -> \-
+ -> \^
+ -> \\
+
+range -> bchar - bchar
+
+class -> [:alpha:] / [:digit:] / ... et cetera
+
+rchar -> any character but ( ) [ ] { } * ? +
+ -> \ char
+
+char -> any character
+
+#################
+# Left-factored #
+#################
+
+R -> T # regex is a single term
+ -> T R # a term followed by a regex
+ -> T | R # a term or regex
+ -> # empty
+
+T -> F # a regex term is a factor
+ -> F *
+ -> F ?
+ -> F +
+ -> F {num<,<num>>}
+
+F -> rchar # a factor is a regex char
+ -> bracket # [...] expression
+ -> (R) # parenthesized regex
+
+bracket -> [<^> <bchar / range / class >*]
+
+bchar -> any character but [ or -
+ -> \]
+ -> \-
+ -> \^
+ -> \\
+
+range -> bchar - bchar
+
+class -> [:alpha:] / [:digit:] / ... et cetera
+
+rchar -> any character but ( ) [ ] { } * ? +
+ -> \ char
+
+char -> any character