From 864a99e7a01d9422d2f55618dbe62c8099a2175c Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Mon, 10 Jun 2024 18:32:10 -0400 Subject: [PATCH 1/3] cmake : fix CMake requirement for CUDA (#7821) --- CMakeLists.txt | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b1d6afbbc..8e280f87d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -402,12 +402,26 @@ if (LLAMA_CUBLAS) endif() if (LLAMA_CUDA) - cmake_minimum_required(VERSION 3.17) + cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES find_package(CUDAToolkit) if (CUDAToolkit_FOUND) message(STATUS "CUDA found") + if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + # 52 == lowest CUDA 12 standard + # 60 == f16 CUDA intrinsics + # 61 == integer CUDA intrinsics + # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster + if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16) + set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics + else() + set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics + #set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work + endif() + endif() + message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") + enable_language(CUDA) set(GGML_HEADERS_CUDA ggml-cuda.h) @@ -472,21 +486,6 @@ if (LLAMA_CUDA) else() set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ... endif() - - if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) - # 52 == lowest CUDA 12 standard - # 60 == f16 CUDA intrinsics - # 61 == integer CUDA intrinsics - # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster - if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16) - set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics - else() - set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics - #set(CMAKE_CUDA_ARCHITECTURES "") # use this to compile much faster, but only F16 models work - endif() - endif() - message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") - else() message(WARNING "CUDA not found") endif() From 396b18dfec2c56846e80362db70af09b9e1d70ba Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Tue, 11 Jun 2024 01:00:30 +0100 Subject: [PATCH 2/3] `json`: document schema conversion in GBNF readme, align manual grammar examples & converters (#7841) * json: fix char pattern in grammar converters * json: prevent number precision & whitespace runaways in example grammars * json: add doc to grammar readme --- common/json-schema-to-grammar.cpp | 2 +- examples/json_schema_to_grammar.py | 2 +- .../server/public/json-schema-to-grammar.mjs | 2 +- grammars/README.md | 39 +++++++++++++++++++ grammars/json.gbnf | 6 +-- grammars/json_arr.gbnf | 6 +-- tests/test-json-schema-to-grammar.cpp | 38 +++++++++--------- 7 files changed, 67 insertions(+), 28 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 737bae27c..11221a32f 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -57,7 +57,7 @@ std::unordered_map PRIMITIVE_RULES = { {"object", {"\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space", {"string", "value"}}}, {"array", {"\"[\" space ( value (\",\" space value)* )? \"]\" space", {"value"}}}, {"uuid", {"\"\\\"\" [0-9a-fA-F]{8} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{12} \"\\\"\" space", {}}}, - {"char", {"[^\"\\\\] | \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F]{4})", {}}}, + {"char", {"[^\"\\\\\\x7F\\x00-\\x1F] | [\\\\] ([\"\\\\bfnrt] | \"u\" [0-9a-fA-F]{4})", {}}}, {"string", {"\"\\\"\" char* \"\\\"\" space", {"char"}}}, {"null", {"\"null\" space", {}}}, }; diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index 7d889c3fe..cd444d010 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -43,7 +43,7 @@ PRIMITIVE_RULES = { 'object' : BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']), 'array' : BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']), 'uuid' : BuiltinRule(r'"\"" [0-9a-fA-F]{8} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{12} "\"" space', []), - 'char' : BuiltinRule(r'[^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F]{4})', []), + 'char' : BuiltinRule(r'[^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})', []), 'string' : BuiltinRule(r'"\"" char* "\"" space', ['char']), 'null' : BuiltinRule('"null" space', []), } diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs index cef11eab8..dc2468396 100644 --- a/examples/server/public/json-schema-to-grammar.mjs +++ b/examples/server/public/json-schema-to-grammar.mjs @@ -41,7 +41,7 @@ const PRIMITIVE_RULES = { object : new BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']), array : new BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']), uuid : new BuiltinRule('"\\"" [0-9a-fA-F]{8} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{12} "\\"" space', []), - char : new BuiltinRule(`[^"\\\\] | "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F]{4})`, []), + char : new BuiltinRule(`[^"\\\\\\x7F\\x00-\\x1F] | [\\\\] (["\\\\bfnrt] | "u" [0-9a-fA-F]{4})`, []), string : new BuiltinRule(`"\\"" char* "\\"" space`, ['char']), null : new BuiltinRule('"null" space', []), }; diff --git a/grammars/README.md b/grammars/README.md index 3ffc7cec0..2ec21a4c0 100644 --- a/grammars/README.md +++ b/grammars/README.md @@ -94,6 +94,8 @@ This guide provides a brief overview. Check out the GBNF files in this directory ./main -m --grammar-file grammars/some-grammar.gbnf -p 'Some prompt' ``` +`llama.cpp` can also convert JSON schemas to grammars either ahead of time or at each request, see below. + ## Troubleshooting Grammars currently have performance gotchas (see https://github.com/ggerganov/llama.cpp/issues/4218). @@ -103,3 +105,40 @@ Grammars currently have performance gotchas (see https://github.com/ggerganov/ll A common pattern is to allow repetitions of a pattern `x` up to N times. While semantically correct, the syntax `x? x? x?.... x?` (with N repetitions) may result in extremely slow sampling. Instead, you can write `x{0,N}` (or `(x (x (x ... (x)?...)?)?)?` w/ N-deep nesting in earlier llama.cpp versions). + +## Using GBNF grammars + +You can use GBNF grammars: + +- In the [server](../examples/server)'s completion endpoints, passed as the `grammar` body field +- In the [main](../examples/main) CLI, passed as the `--grammar` & `--grammar-file` flags +- With the [gbnf-validator](../examples/gbnf-validator) tool, to test them against strings. + +## JSON Schemas → GBNF + +`llama.cpp` supports converting a subset of https://json-schema.org/ to GBNF grammars: + +- In the [server](../examples/server): + - For any completion endpoints, passed as the `json_schema` body field + - For the `/chat/completions` endpoint, passed inside the `result_format` body field (e.g. `{"type", "json_object", "schema": {"items": {}}}`) +- In the [main](../examples/main) CLI, passed as the `--json` / `-j` flag +- To convert to a grammar ahead of time: + - in CLI, with [json_schema_to_grammar.py](../examples/json_schema_to_grammar.py) + - in JavaScript with [json-schema-to-grammar.mjs](../examples/server/public/json-schema-to-grammar.mjs) (this is used by the [server](../examples/server)'s Web UI) + +Take a look at [tests](../../tests/test-json-schema-to-grammar.cpp) to see which features are likely supported (you'll also find usage examples in https://github.com/ggerganov/llama.cpp/pull/5978, https://github.com/ggerganov/llama.cpp/pull/6659 & https://github.com/ggerganov/llama.cpp/pull/6555). + +Here is also a non-exhaustive list of **unsupported** features: + +- `additionalProperties`: to be fixed in https://github.com/ggerganov/llama.cpp/pull/7840 +- `minimum`, `exclusiveMinimum`, `maximum`, `exclusiveMaximum` + - `integer` constraints to be implemented in https://github.com/ggerganov/llama.cpp/pull/7797 +- Remote `$ref`s in the C++ version (Python & JavaScript versions fetch https refs) +- Mixing `properties` w/ `anyOf` / `oneOf` in the same type (https://github.com/ggerganov/llama.cpp/issues/7703) +- `string` formats `uri`, `email` +- [`contains`](https://json-schema.org/draft/2020-12/json-schema-core#name-contains) / `minContains` +- `uniqueItems` +- `$anchor` (cf. [dereferencing](https://json-schema.org/draft/2020-12/json-schema-core#name-dereferencing)) +- [`not`](https://json-schema.org/draft/2020-12/json-schema-core#name-not) +- [Conditionals](https://json-schema.org/draft/2020-12/json-schema-core#name-keywords-for-applying-subsche) `if` / `then` / `else` / `dependentSchemas` +- [`patternProperties`](https://json-schema.org/draft/2020-12/json-schema-core#name-patternproperties) diff --git a/grammars/json.gbnf b/grammars/json.gbnf index a8a80752e..064a53f8a 100644 --- a/grammars/json.gbnf +++ b/grammars/json.gbnf @@ -16,10 +16,10 @@ array ::= string ::= "\"" ( [^"\\\x7F\x00-\x1F] | - "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes + "\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4}) # escapes )* "\"" ws -number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws +number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [0-9] [1-9]{0,15})? ws # Optional space: by convention, applied in this grammar after literal chars when allowed -ws ::= ([ \t\n] ws)? +ws ::= [ \t\n]{0,20} diff --git a/grammars/json_arr.gbnf b/grammars/json_arr.gbnf index 31a3202f8..bd1312d96 100644 --- a/grammars/json_arr.gbnf +++ b/grammars/json_arr.gbnf @@ -25,10 +25,10 @@ array ::= string ::= "\"" ( [^"\\\x7F\x00-\x1F] | - "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes + "\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4}) # escapes )* "\"" ws -number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws +number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [1-9] [0-9]{0,15})? ws # Optional space: by convention, applied in this grammar after literal chars when allowed -ws ::= ([ \t\n] ws)? +ws ::= [ \t\n]{0,20} diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index 052c08073..bea876bd1 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -105,7 +105,7 @@ static void test_all(const std::string & lang, std::function Date: Tue, 11 Jun 2024 02:22:57 +0100 Subject: [PATCH 3/3] json: refine constraint for whitespace to avoid runaways yet allow pretty print (#7866) --- common/json-schema-to-grammar.cpp | 2 +- examples/json_schema_to_grammar.py | 5 +- .../server/public/json-schema-to-grammar.mjs | 2 +- grammars/json.gbnf | 2 +- grammars/json_arr.gbnf | 2 +- tests/test-json-schema-to-grammar.cpp | 76 +++++++++---------- 6 files changed, 44 insertions(+), 45 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 11221a32f..10b9b3d1d 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -40,7 +40,7 @@ static std::string build_repetition(const std::string & item_rule, int min_items return result; } -const std::string SPACE_RULE = "\" \"?"; +const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}"; struct BuiltinRule { std::string content; diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index cd444d010..ab19e20df 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -29,9 +29,8 @@ class BuiltinRule: self.content = content self.deps = deps or [] -# whitespace is constrained to a single space char to prevent model "running away" in -# whitespace. Also maybe improves generation quality? -SPACE_RULE = '" "?' +# Constraining spaces to prevent model "running away". +SPACE_RULE = '| " " | "\\n" [ \\t]{0,20}' PRIMITIVE_RULES = { 'boolean' : BuiltinRule('("true" | "false") space', []), diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs index dc2468396..faed6a32c 100644 --- a/examples/server/public/json-schema-to-grammar.mjs +++ b/examples/server/public/json-schema-to-grammar.mjs @@ -1,5 +1,5 @@ // WARNING: This file was ported from json_schema_to_grammar.py, please fix bugs / add features there first. -const SPACE_RULE = '" "?'; +const SPACE_RULE = '| " " | "\\n" [ \\t]{0,20}'; function _buildRepetition(itemRule, minItems, maxItems, opts={}) { if (minItems === 0 && maxItems === 1) { diff --git a/grammars/json.gbnf b/grammars/json.gbnf index 064a53f8a..b6448c87b 100644 --- a/grammars/json.gbnf +++ b/grammars/json.gbnf @@ -22,4 +22,4 @@ string ::= number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [0-9] [1-9]{0,15})? ws # Optional space: by convention, applied in this grammar after literal chars when allowed -ws ::= [ \t\n]{0,20} +ws ::= | " " | "\n" [ \t]{0,20} diff --git a/grammars/json_arr.gbnf b/grammars/json_arr.gbnf index bd1312d96..b3dc6f9b1 100644 --- a/grammars/json_arr.gbnf +++ b/grammars/json_arr.gbnf @@ -31,4 +31,4 @@ string ::= number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [1-9] [0-9]{0,15})? ws # Optional space: by convention, applied in this grammar after literal chars when allowed -ws ::= [ \t\n]{0,20} +ws ::= | " " | "\n" [ \t]{0,20} diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index bea876bd1..a33104dea 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -112,7 +112,7 @@ static void test_all(const std::string & lang, std::function