mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-11 21:10:24 +01:00
json: refine constraint for whitespace to avoid runaways yet allow pretty print (#7866)
This commit is contained in:
parent
396b18dfec
commit
b61eb9644d
@ -40,7 +40,7 @@ static std::string build_repetition(const std::string & item_rule, int min_items
|
||||
return result;
|
||||
}
|
||||
|
||||
const std::string SPACE_RULE = "\" \"?";
|
||||
const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}";
|
||||
|
||||
struct BuiltinRule {
|
||||
std::string content;
|
||||
|
@ -29,9 +29,8 @@ class BuiltinRule:
|
||||
self.content = content
|
||||
self.deps = deps or []
|
||||
|
||||
# whitespace is constrained to a single space char to prevent model "running away" in
|
||||
# whitespace. Also maybe improves generation quality?
|
||||
SPACE_RULE = '" "?'
|
||||
# Constraining spaces to prevent model "running away".
|
||||
SPACE_RULE = '| " " | "\\n" [ \\t]{0,20}'
|
||||
|
||||
PRIMITIVE_RULES = {
|
||||
'boolean' : BuiltinRule('("true" | "false") space', []),
|
||||
|
@ -1,5 +1,5 @@
|
||||
// WARNING: This file was ported from json_schema_to_grammar.py, please fix bugs / add features there first.
|
||||
const SPACE_RULE = '" "?';
|
||||
const SPACE_RULE = '| " " | "\\n" [ \\t]{0,20}';
|
||||
|
||||
function _buildRepetition(itemRule, minItems, maxItems, opts={}) {
|
||||
if (minItems === 0 && maxItems === 1) {
|
||||
|
@ -22,4 +22,4 @@ string ::=
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [0-9] [1-9]{0,15})? ws
|
||||
|
||||
# Optional space: by convention, applied in this grammar after literal chars when allowed
|
||||
ws ::= [ \t\n]{0,20}
|
||||
ws ::= | " " | "\n" [ \t]{0,20}
|
||||
|
@ -31,4 +31,4 @@ string ::=
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [1-9] [0-9]{0,15})? ws
|
||||
|
||||
# Optional space: by convention, applied in this grammar after literal chars when allowed
|
||||
ws ::= [ \t\n]{0,20}
|
||||
ws ::= | " " | "\n" [ \t]{0,20}
|
||||
|
@ -112,7 +112,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
|
||||
root ::= object
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
string ::= "\"" char* "\"" space
|
||||
value ::= object | array | string | number | boolean | null
|
||||
)"""
|
||||
@ -135,7 +135,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
date-time ::= date "T" time
|
||||
date-time-string ::= "\"" date-time "\"" space
|
||||
root ::= "[" space tuple-0 "," space uuid "," space tuple-2 "," space tuple-3 "]" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9]{3} )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )
|
||||
time-string ::= "\"" time "\"" space
|
||||
tuple-0 ::= date-string
|
||||
@ -154,7 +154,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
R"""(
|
||||
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
|
||||
root ::= "\"" char* "\"" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -168,7 +168,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
R"""(
|
||||
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
|
||||
root ::= "\"" char+ "\"" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -182,7 +182,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
R"""(
|
||||
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
|
||||
root ::= "\"" char{3,} "\"" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -196,7 +196,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
R"""(
|
||||
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
|
||||
root ::= "\"" char{0,3} "\"" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -211,7 +211,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
R"""(
|
||||
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
|
||||
root ::= "\"" char{1,4} "\"" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -223,7 +223,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
})""",
|
||||
R"""(
|
||||
root ::= ("true" | "false") space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -236,7 +236,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
R"""(
|
||||
integral-part ::= [0] | [1-9] [0-9]{0,15}
|
||||
root ::= ("-"? integral-part) space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -248,7 +248,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
})""",
|
||||
R"""(
|
||||
root ::= "\"foo\""
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -260,7 +260,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
})""",
|
||||
R"""(
|
||||
root ::= "123"
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -272,7 +272,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
})""",
|
||||
R"""(
|
||||
root ::= "\"red\"" | "\"amber\"" | "\"green\"" | "null" | "42" | "[\"foo\"]"
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -285,7 +285,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
R"""(
|
||||
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
|
||||
root ::= "[" space string "]" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
string ::= "\"" char* "\"" space
|
||||
)"""
|
||||
});
|
||||
@ -302,7 +302,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
integral-part ::= [0] | [1-9] [0-9]{0,15}
|
||||
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||
root ::= "[" space string "," space number "]" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
string ::= "\"" char* "\"" space
|
||||
)"""
|
||||
});
|
||||
@ -317,7 +317,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
decimal-part ::= [0-9]{1,16}
|
||||
integral-part ::= [0] | [1-9] [0-9]{0,15}
|
||||
root ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -333,7 +333,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
R"""(
|
||||
boolean ::= ("true" | "false") space
|
||||
root ::= "[" space boolean ("," space boolean)+ "]" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -349,7 +349,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
R"""(
|
||||
boolean ::= ("true" | "false") space
|
||||
root ::= "[" space boolean? "]" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -365,7 +365,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
R"""(
|
||||
boolean ::= ("true" | "false") space
|
||||
root ::= "[" space (boolean ("," space boolean)?)? "]" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -386,7 +386,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
item ::= number | integer
|
||||
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||
root ::= "[" space item ("," space item){2,4} "]" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -399,7 +399,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
})""",
|
||||
R"""(
|
||||
root ::= "\"" "ab" "c"? "d"* "ef" "g"+ ("hij")? "kl" "\"" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -412,7 +412,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
})""",
|
||||
R"""(
|
||||
root ::= "\"" "[]{}()|+*?" "\"" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -425,7 +425,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
})""",
|
||||
R"""(
|
||||
root ::= "\"" "\"" "\"" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -440,7 +440,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
dot ::= [^\x0A\x0D]
|
||||
root ::= "\"" ("(" root-1{1,3} ")")? root-1{3,3} "-" root-1{4,4} " " "a"{3,5} "nd" dot dot dot "\"" space
|
||||
root-1 ::= [0-9]
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -468,7 +468,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
c-kv ::= "\"c\"" space ":" space string
|
||||
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
|
||||
root ::= "{" space b-kv "," space c-kv "," space a-kv "}" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
string ::= "\"" char* "\"" space
|
||||
)"""
|
||||
});
|
||||
@ -488,7 +488,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
a-kv ::= "\"a\"" space ":" space string
|
||||
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
|
||||
root ::= "{" space (a-kv )? "}" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
string ::= "\"" char* "\"" space
|
||||
)"""
|
||||
});
|
||||
@ -512,7 +512,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
c-kv ::= "\"c\"" space ":" space string
|
||||
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
|
||||
root ::= "{" space (a-kv a-rest | b-kv b-rest | c-kv )? "}" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
string ::= "\"" char* "\"" space
|
||||
)"""
|
||||
});
|
||||
@ -538,7 +538,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
d-kv ::= "\"d\"" space ":" space string
|
||||
d-rest ::= ( "," space c-kv )?
|
||||
root ::= "{" space b-kv "," space a-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
string ::= "\"" char* "\"" space
|
||||
)"""
|
||||
});
|
||||
@ -559,7 +559,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
integral-part ::= [0] | [1-9] [0-9]{0,15}
|
||||
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||
root ::= "{" space (additional-kvs )? "}" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
string ::= "\"" char* "\"" space
|
||||
)"""
|
||||
});
|
||||
@ -581,7 +581,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
|
||||
root ::= object
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
string ::= "\"" char* "\"" space
|
||||
value ::= object | array | string | number | boolean | null
|
||||
)"""
|
||||
@ -603,7 +603,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
|
||||
root ::= object
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
string ::= "\"" char* "\"" space
|
||||
value ::= object | array | string | number | boolean | null
|
||||
)"""
|
||||
@ -618,7 +618,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
})""",
|
||||
R"""(
|
||||
root ::= "{" space "}" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -642,7 +642,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
integral-part ::= [0] | [1-9] [0-9]{0,15}
|
||||
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||
root ::= "{" space a-kv ( "," space ( additional-kvs ) )? "}" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
string ::= "\"" char* "\"" space
|
||||
)"""
|
||||
});
|
||||
@ -667,7 +667,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
integral-part ::= [0] | [1-9] [0-9]{0,15}
|
||||
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||
root ::= "{" space (a-kv a-rest | additional-kvs )? "}" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
string ::= "\"" char* "\"" space
|
||||
)"""
|
||||
});
|
||||
@ -695,7 +695,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
integral-part ::= [0] | [1-9] [0-9]{0,15}
|
||||
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||
root ::= "{" space a-kv ( "," space ( b-kv b-rest | additional-kvs ) )? "}" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
string ::= "\"" char* "\"" space
|
||||
)"""
|
||||
});
|
||||
@ -725,7 +725,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
foo ::= "{" space foo-a-kv "}" space
|
||||
foo-a-kv ::= "\"a\"" space ":" space string
|
||||
root ::= foo
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
string ::= "\"" char* "\"" space
|
||||
)"""
|
||||
});
|
||||
@ -759,7 +759,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
integral-part ::= [0] | [1-9] [0-9]{0,15}
|
||||
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||
root ::= alternative-0 | alternative-1
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -803,7 +803,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
integral-part ::= [0] | [1-9] [0-9]{0,15}
|
||||
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||
root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
|
||||
@ -851,7 +851,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
number-number-kv ::= "\"number\"" space ":" space number-number
|
||||
number-number-root-kv ::= "\"root\"" space ":" space number
|
||||
root ::= "{" space number-kv "}" space
|
||||
space ::= " "?
|
||||
space ::= | " " | "\n" [ \t]{0,20}
|
||||
)"""
|
||||
});
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user