From c5a8d4b749352645afd4c024f85d6eca2ca72c6d Mon Sep 17 00:00:00 2001 From: Clint Herron Date: Fri, 21 Jun 2024 23:18:36 -0400 Subject: [PATCH] JSON Schema to GBNF integration tests (#7790) * Adding simple bare-bones test for end-to-end integration test for json validation against auto-generated JSON-schema grammars. * Adding additional examples as documented in #7789 . Also adding the ability to automatically output improperly failing grammars to debug output files so they can more easily be examined in the gbnf-validator program. * Uncommenting formerly commented tests so that they fail for others who are attempting to reproduce the bugs. * Merging improved schema test methods added by @ochafik in #7797 * Adding #define to temporarily remove failing tests so that this PR can pass CI, but still be useful for other PRs that want to leverage the framework. * Fixing nits from ochafik. Removing escape slashes, adding additional failing cases, fixing some other strings. * Fixing grammar indentation to be consistent throughout file. --- Makefile | 2 +- tests/test-grammar-integration.cpp | 599 ++++++++++++++++++++++++++++- 2 files changed, 580 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index dddf647cd..4ea59c0b4 100644 --- a/Makefile +++ b/Makefile @@ -1051,7 +1051,7 @@ tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) -tests/test-grammar-integration: tests/test-grammar-integration.cpp ggml.o llama.o grammar-parser.o $(OBJS) +tests/test-grammar-integration: tests/test-grammar-integration.cpp json-schema-to-grammar.o ggml.o llama.o grammar-parser.o $(OBJS) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 8787fb1ec..96f90c01e 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -7,11 +7,16 @@ #include "ggml.h" #include "llama.h" #include "grammar-parser.h" +#include "json-schema-to-grammar.h" #include "unicode.h" #include #include #include +using json = nlohmann::ordered_json; + +//#define INCLUDE_FAILING_TESTS 1 + static llama_grammar* build_grammar(const std::string & grammar_str) { auto parsed_grammar = grammar_parser::parse(grammar_str.c_str()); @@ -65,8 +70,8 @@ static bool match_string(const std::string & input, llama_grammar* grammar) { return false; } -static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector & passing_strings, const std::vector & failing_strings) { - fprintf(stderr, "⚫ Testing %s. Grammar: %s\n", test_desc.c_str(), grammar_str.c_str()); +static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector & passing_strings, const std::vector & failing_strings) { + fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str()); fflush(stderr); auto grammar = build_grammar(grammar_str); @@ -85,6 +90,23 @@ static void test_grammar(const std::string & test_desc, const std::string & gram if (!matched) { fprintf(stderr, "❌ (failed to match)\n"); + + // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed. + // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf + FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w"); + if (grammar_file) { + fprintf(grammar_file, "%s", grammar_str.c_str()); + fclose(grammar_file); + } + + // DEBUG: Write the test string to test-grammar-integration.string.txt + FILE* string_file = fopen("test-grammar-integration.string.txt", "w"); + if (string_file) { + fprintf(string_file, "%s", test_string.c_str()); + fclose(string_file); + } + + fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n"); } else { fprintf(stdout, "✅︎\n"); } @@ -118,6 +140,12 @@ static void test_grammar(const std::string & test_desc, const std::string & gram // Clean up allocated memory llama_grammar_free(grammar); } +static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector & passing_strings, const std::vector & failing_strings) { + test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings); +} +static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector & passing_strings, const std::vector & failing_strings) { + test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str)), passing_strings, failing_strings); +} static void test_simple_grammar() { // Test case for a simple grammar @@ -400,10 +428,11 @@ static void test_quantifiers() { static void test_failure_missing_root() { fprintf(stderr, "⚫ Testing missing root node:\n"); // Test case for a grammar that is missing a root rule - const std::string grammar_str = R"""(rot ::= expr -expr ::= term ("+" term)* -term ::= number -number ::= [0-9]+)"""; + const std::string grammar_str = R"""( + rot ::= expr + expr ::= term ("+" term)* + term ::= number + number ::= [0-9]+)"""; grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str()); @@ -420,10 +449,10 @@ static void test_failure_missing_reference() { // Test case for a grammar that is missing a referenced rule const std::string grammar_str = -R"""(root ::= expr -expr ::= term ("+" term)* -term ::= numero -number ::= [0-9]+)"""; + R"""(root ::= expr + expr ::= term ("+" term)* + term ::= numero + number ::= [0-9]+)"""; fprintf(stderr, " Expected error: "); @@ -445,29 +474,558 @@ static void test_failure_left_recursion() { // Test more complicated left recursion detection const std::string medium_str = R"""( -root ::= asdf -asdf ::= "a" | asdf "a" -)"""; + root ::= asdf + asdf ::= "a" | asdf "a" + )"""; assert(test_build_grammar_fails(medium_str)); // Test even more complicated left recursion detection const std::string hard_str = R"""( -root ::= asdf -asdf ::= "a" | foo "b" -foo ::= "c" | asdf "d" | "e")"""; + root ::= asdf + asdf ::= "a" | foo "b" + foo ::= "c" | asdf "d" | "e")"""; assert(test_build_grammar_fails(hard_str)); // Test yet even more complicated left recursion detection const std::string hardest_str = R"""( -root ::= asdf -asdf ::= "a" | foo "b" -foo ::= "c" | empty asdf "d" | "e" -empty ::= "blah" | )"""; + root ::= asdf + asdf ::= "a" | foo "b" + foo ::= "c" | empty asdf "d" | "e" + empty ::= "blah" | )"""; assert(test_build_grammar_fails(hardest_str)); fprintf(stderr, " ✅︎ Passed\n"); } +static void test_json_schema() { + // Note that this is similar to the regular grammar tests, + // but we convert each json schema to a grammar before parsing. + // Otherwise, this test structure is the same. + + test_schema( + "empty schema (object)", + // Schema + R"""( + {} + )""", + // Passing strings + { + "{}", + R"""({"foo": "bar"})""", + }, + // Failing strings + { + "", + "[]", + "null", + "\"\"", + "true", + } + ); + + test_schema( + "exotic formats (list)", + // Schema + R"""( + { + "items": [ + { "format": "date" }, + { "format": "uuid" }, + { "format": "time" }, + { "format": "date-time" } + ] + } + )""", + // Passing strings + { + // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it? + // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it? + R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""", + //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it? + //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it? + }, + // Failing strings + { + R"""(["foo", "bar"])""", + R"""(["12345678-1234-1234-1234-1234567890ab"])""", + } + ); + + test_schema( + "string", + // Schema + R"""( + { + "type": "string" + } + )""", + // Passing strings + { + "\"foo\"", + "\"bar\"", + "\"\"", + }, + // Failing strings + { + "{}", + "\"foo\": \"bar\"", + } + ); + + test_schema( + "string w/ min length 1", + // Schema + R"""( + { + "type": "string", + "minLength": 1 + } + )""", + // Passing strings + { + "\"foo\"", + "\"bar\"", + }, + // Failing strings + { + "\"\"", + "{}", + "\"foo\": \"bar\"", + } + ); + + test_schema( + "string w/ min length 3", + // Schema + R"""( + { + "type": "string", + "minLength": 3 + } + )""", + // Passing strings + { + "\"foo\"", + "\"bar\"", + "\"foobar\"", + }, + // Failing strings + { + "\"\"", + "\"f\"", + "\"fo\"", + } + ); + + test_schema( + "string w/ max length", + // Schema + R"""( + { + "type": "string", + "maxLength": 3 + } + )""", + // Passing strings + { + "\"foo\"", + "\"bar\"", + "\"\"", + "\"f\"", + "\"fo\"", + }, + // Failing strings + { + "\"foobar\"", + } + ); + + test_schema( + "string w/ min & max length", + // Schema + R"""( + { + "type": "string", + "minLength": 1, + "maxLength": 4 + } + )""", + // Passing strings + { + "\"foo\"", + "\"bar\"", + "\"f\"", + "\"barf\"", + }, + // Failing strings + { + "\"\"", + "\"barfo\"", + "\"foobar\"", + } + ); + + test_schema( + "boolean", + // Schema + R"""( + { + "type": "boolean" + } + )""", + // Passing strings + { + "true", + "false", + }, + // Failing strings + { + "\"\"", + "\"true\"", + "True", + "FALSE", + } + ); + + test_schema( + "integer", + // Schema + R"""( + { + "type": "integer" + } + )""", + // Passing strings + { + "0", + "12345", + "1234567890123456" + }, + // Failing strings + { + "", + "01", + "007", + "12345678901234567" + } + ); + + test_schema( + "string const", + // Schema + R"""( + { + "const": "foo" + } + )""", + // Passing strings + { + "\"foo\"", + }, + // Failing strings + { + "foo", + "\"bar\"", + } + ); + + test_schema( + "non-string const", + // Schema + R"""( + { + "const": true + } + )""", + // Passing strings + { + "true", + }, + // Failing strings + { + "", + "foo", + "\"true\"", + } + ); + + test_schema( + "non-string const", + // Schema + R"""( + { + "enum": ["red", "amber", "green", null, 42, ["foo"]] + } + )""", + // Passing strings + { + "\"red\"", + "null", + "42", + "[\"foo\"]", + }, + // Failing strings + { + "", + "420", + "true", + "foo", + } + ); + + + test_schema( + "min+max items", + // Schema + R"""( + { + "items": { + "type": ["number", "integer"] + }, + "minItems": 3, + "maxItems": 5 + } + )""", + // Passing strings + { + "[1, 2, 3]", + "[1, 2, 3, 4]", + "[1, 2, 3, 4, 5]", + }, + // Failing strings + { + "[1, 2]", + "[1, 2, 3, 4, 5, 6]", + "1" + } + ); + + // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties) + test_schema( + "object properties", + // Schema + R"""( + { + "type": "object", + "properties": { + "number": { "type": "number" }, + "street_name": { "type": "string" }, + "street_type": { "enum": ["Street", "Avenue", "Boulevard"] } + } + } + )""", + // Passing strings + { + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""", + // "By default, leaving out properties is valid" + R"""({ "street_name": "Pennsylvania" })""", + R"""({ "number": 1600, "street_name": "Pennsylvania" })""", + // "By extension, even an empty object is valid" + R"""({})""", + // "By default, providing additional properties is valid" +#ifdef INCLUDE_FAILING_TESTS + // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default. + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""", + // TODO: Spaces should be permitted around enum values, but currently they fail to pass. + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", +#endif + }, + // Failing strings + { + // Change datatype from number to string + R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""", + // Reorder properties + R"""({ "street_name": "Pennsylvania", "number": 1600 })""", + // Reorder properties + R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""", + } + ); + + + // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties) + test_schema( + "object properties, additionalProperties: true", + // Schema + R"""( + { + "type": "object", + "properties": { + "number": { "type": "number" }, + "street_name": { "type": "string" }, + "street_type": { "enum": ["Street", "Avenue", "Boulevard"] } + }, + "additionalProperties": true + } + )""", + // Passing strings + { + // "By extension, even an empty object is valid" + R"""({})""", +#ifdef INCLUDE_FAILING_TESTS + // TODO: Following line should pass and doesn't + R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""", + // "By default, leaving out properties is valid" + // TODO: Following line should pass and doesn't + R"""({ "street_name": "Pennsylvania" })""", + // TODO: Following line should pass and doesn't + R"""({ "number": 1600, "street_name": "Pennsylvania" })""", + // "By default, providing additional properties is valid" + // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default. + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""", + // TODO: Spaces should be permitted around enum values, but currently they fail to pass. + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", +#endif + }, + // Failing strings + { + // Change datatype from number to string + R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""", + // Reorder properties + R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""", + } + ); + + // Additional properties: false + test_schema( + "required + optional props each in original order", + // Schema + R"""( + { + "type": "object", + "properties": { + "number": { "type": "number" }, + "street_name": { "type": "string" }, + "street_type": { "enum": ["Street", "Avenue", "Boulevard"] } + }, + "additionalProperties": false + } + )""", + // Passing strings + { + R"""({ "street_name": "Pennsylvania" })""", + R"""({ "number": 1600, "street_type":"Avenue"})""", + R"""({ "number": 1600, "street_name": "Pennsylvania" })""", + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""", +#ifdef INCLUDE_FAILING_TESTS + // TODO: Spaces should be permitted around enum values, but currently they fail to pass. + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", +#endif + }, + // Failing strings + { + // Reorder properties + R"""({ "street_type": "Avenue", "number": 1600 })""", + // Add "direction" + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""", + } + ); + + test_schema( + "required + optional props each in original order", + // Schema + R"""( + { + "properties": { + "b": {"type": "string"}, + "a": {"type": "string"}, + "d": {"type": "string"}, + "c": {"type": "string"} + }, + "required": ["a", "b"], + "additionalProperties": false + } + )""", + // Passing strings + { + R"""({"b": "foo", "a": "bar"})""", + R"""({"b":"foo","a":"bar","d":"qux"})""", + R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""", + }, + // Failing strings + { + R"""({"a": "foo", "b": "bar"})""", + R"""({"b": "bar"})""", + R"""({"a": "foo", "c": "baz"})""", + R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""", + } + ); + + // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties + test_schema( + "required props", + // Schema + R"""( + { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/product.schema.json", + "title": "Product", + "description": "A product from Acme's catalog", + "type": "object", + "properties": { + "productId": { + "description": "The unique identifier for a product", + "type": "integer" + }, + "productName": { + "description": "Name of the product", + "type": "string" + }, + "price": { + "description": "The price of the product", + "type": "number", + "exclusiveMinimum": 0 + }, + "tags": { + "description": "Tags for the product", + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": true + }, + "dimensions": { + "type": "object", + "properties": { + "length": { + "type": "number" + }, + "width": { + "type": "number" + }, + "height": { + "type": "number" + } + }, + "required": [ "length", "width", "height" ] + } + }, + "required": [ "productId", "productName", "price" ] + } + )""", + // Passing strings + { + R"""({"productId": 1, "productName": "A green door", "price": 12.50})""", + R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""", + R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""", + }, + // Failing strings + { + R"""({})""", // Missing all required properties + R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties + // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement. + // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex. + // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""", + R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price) + R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId) + R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1 + R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order + // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement. + // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""", + } + ); +} + int main() { fprintf(stdout, "Running grammar integration tests...\n"); test_simple_grammar(); @@ -477,6 +1035,7 @@ int main() { test_failure_missing_root(); test_failure_missing_reference(); test_failure_left_recursion(); + test_json_schema(); fprintf(stdout, "All tests passed.\n"); return 0; }