aboutsummaryrefslogtreecommitdiff
path: root/grammars
diff options
context:
space:
mode:
authorEvan Jones <evan.q.jones@gmail.com>2023-07-23 23:58:10 -0400
committerGitHub <noreply@github.com>2023-07-23 23:58:10 -0400
commit84e09a7d8bc4ab6d658b5cd81295ac0add60be78 (patch)
tree934c5480d917325ac8baa29f4edfae99137b56bb /grammars
parent2f9cf974a066ac0e03fbb235d834b01b0164d743 (diff)
llama : add grammar-based sampling (#1773)
* llama, main : constrain sampling to grammar * allow loading grammar from file * fix whitespace errors * handle & print parser errors * add comments to grammar syntax and allow newlines where unambiguous * add missing include * support alternates in root rule * fix bugs with empty token and EOS * adjust JSON grammar * remove swp file * rewrite ternary expressions Co-authored-by: Henri Vasserman <henv@hot.ee> * use struct for grammar elements and add Unicode support * add unicode escapes * add inverse char ranges * only sample full tokens (no peeking or truncation) * llama : minor style changes blindly applied in online editor - hopefully I didn't break something * update help text * add warning message if EOS is disabled --------- Co-authored-by: Henri Vasserman <henv@hot.ee> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'grammars')
-rw-r--r--grammars/arithmetic.gbnf6
-rw-r--r--grammars/chess.gbnf13
-rw-r--r--grammars/japanese.gbnf7
-rw-r--r--grammars/json.gbnf29
-rw-r--r--grammars/list.gbnf4
5 files changed, 59 insertions, 0 deletions
diff --git a/grammars/arithmetic.gbnf b/grammars/arithmetic.gbnf
new file mode 100644
index 0000000..3aa95a9
--- /dev/null
+++ b/grammars/arithmetic.gbnf
@@ -0,0 +1,6 @@
+root ::= (expr "=" ws term "\n")+
+expr ::= term ([-+*/] term)*
+term ::= ident | num | "(" ws expr ")" ws
+ident ::= [a-z] [a-z0-9_]* ws
+num ::= [0-9]+ ws
+ws ::= [ \t\n]*
diff --git a/grammars/chess.gbnf b/grammars/chess.gbnf
new file mode 100644
index 0000000..ef0fc1b
--- /dev/null
+++ b/grammars/chess.gbnf
@@ -0,0 +1,13 @@
+# Specifies chess moves as a list in algebraic notation, using PGN conventions
+
+# Force first move to "1. ", then any 1-2 digit number after, relying on model to follow the pattern
+root ::= "1. " move " " move "\n" ([1-9] [0-9]? ". " move " " move "\n")+
+move ::= (pawn | nonpawn | castle) [+#]?
+
+# piece type, optional file/rank, optional capture, dest file & rank
+nonpawn ::= [NBKQR] [a-h]? [1-8]? "x"? [a-h] [1-8]
+
+# optional file & capture, dest file & rank, optional promotion
+pawn ::= ([a-h] "x")? [a-h] [1-8] ("=" [NBKQR])?
+
+castle ::= "O-O" "-O"?
diff --git a/grammars/japanese.gbnf b/grammars/japanese.gbnf
new file mode 100644
index 0000000..43f25ab
--- /dev/null
+++ b/grammars/japanese.gbnf
@@ -0,0 +1,7 @@
+# A probably incorrect grammar for Japanese
+root ::= jp-char+ ([ \t\n] jp-char+)*
+jp-char ::= hiragana | katakana | punctuation | cjk
+hiragana ::= [ぁ-ゟ]
+katakana ::= [ァ-ヿ]
+punctuation ::= [、-〾]
+cjk ::= [一-鿿]
diff --git a/grammars/json.gbnf b/grammars/json.gbnf
new file mode 100644
index 0000000..40fa2b6
--- /dev/null
+++ b/grammars/json.gbnf
@@ -0,0 +1,29 @@
+# Grammar for subset of JSON - doesn't support full string or number syntax
+
+root ::= object
+value ::= object | array | string | number | boolean | "null"
+
+object ::=
+ "{" ws (
+ string ":" ws value
+ ("," ws string ":" ws value)*
+ )? "}"
+
+array ::=
+ "[" ws (
+ value
+ ("," ws value)*
+ )? "]"
+
+string ::=
+ "\"" (
+ [^"\\] |
+ "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
+ )* "\"" ws
+
+# Only plain integers currently
+number ::= "-"? [0-9]+ ws
+boolean ::= ("true" | "false") ws
+
+# Optional space: by convention, applied in this grammar after literal chars when allowed
+ws ::= ([ \t\n] ws)?
diff --git a/grammars/list.gbnf b/grammars/list.gbnf
new file mode 100644
index 0000000..51e6c9c
--- /dev/null
+++ b/grammars/list.gbnf
@@ -0,0 +1,4 @@
+root ::= item+
+
+# Excludes various line break characters
+item ::= "- " [^\r\n\x0b\x0c\x85\u2028\u2029]+ "\n"