tool-call: add tests: tool_call=none, parallel_tool_calls=true

2024-10-29 22:20:15 +01:00 · 2024-10-28 10:04:00 +00:00 · 2024-10-28 10:04:00 +00:00 · ec547e4137
commit ec547e4137
parent 168add7ec8
3 changed files with 53 additions and 4 deletions
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@ -746,6 +746,23 @@ async def step_tool_called(context, expected_name, expected_arguments):
        assert_n_tokens_predicted(result, tool_calls_check=check)
    assert len(context.concurrent_tasks) == 0, f"{len(context.concurrent_tasks)} pending requests"

+
+@step('receiving the following tool calls: {expected_tool_calls}')
+async def step_receiving_tool_calls(context, expected_tool_calls):
+    tool_caexpected_tool_callslls = json.loads(expected_tool_calls)
+    n_completions = await gather_tasks_results(context)
+    assert n_completions > 0
+
+    for i in range(n_completions):
+        result = context.tasks_result.pop()
+
+        def check(tool_calls):
+            assert json.dumps(expected_tool_calls) == json.dumps(tool_calls), f"tool calls: {tool_calls}, expected: {expected_tool_calls}, result = {result}"
+
+        assert_n_tokens_predicted(result, tool_calls_check=check)
+    assert len(context.concurrent_tasks) == 0, f"{len(context.concurrent_tasks)} pending requests"
+
+
@step('no tool is called')
@async_run_until_complete
 async def step_tool_called(context):
--- a/examples/server/tests/features/tool_call.feature
+++ b/examples/server/tests/features/tool_call.feature
@ -92,7 +92,7 @@ Feature: llama.cpp server
      | tool_name | tool_arguments                       | hf_repo                                              | hf_file                                 | template_override                             |
      | ipython   | {"code": "print('Hello, World!')"}   | bartowski/Phi-3.5-mini-instruct-GGUF                 | Phi-3.5-mini-instruct-Q4_K_M.gguf       |                                               |
      | ipython   | {"code": "print('Hello, World!')"}   | NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF            | Hermes-2-Pro-Llama-3-8B-Q8_0.gguf       | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use |
-      | ipython   | {"code": "print('Hello, World!')"} | bartowski/Mistral-Nemo-Instruct-2407-GGUF            | Mistral-Nemo-Instruct-2407-Q8_0.gguf    | mistralai-Mistral-Nemo-Instruct-2407          |
+      | ipython   | {"code": "print('Hello, World!')"}   | bartowski/Mistral-Nemo-Instruct-2407-GGUF            | Mistral-Nemo-Instruct-2407-Q8_0.gguf    | mistralai-Mistral-Nemo-Instruct-2407          |
      | ipython   | {"code": "print('Hello, World!'}"}   | lmstudio-community/Llama-3.2-1B-Instruct-GGUF        | Llama-3.2-1B-Instruct-Q4_K_M.gguf       | meta-llama-Llama-3.2-3B-Instruct              |
      | ipython   | {"code": "print("}                   | lmstudio-community/Llama-3.2-3B-Instruct-GGUF        | Llama-3.2-3B-Instruct-Q6_K.gguf         | meta-llama-Llama-3.2-3B-Instruct              |
      | ipython   | {"code": "print("}                   | lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF   | Meta-Llama-3.1-8B-Instruct-Q5_K_M.gguf  |                                               |
@ -113,3 +113,35 @@ Feature: llama.cpp server
    And   parallel tool calls is disabled
    And   an OAI compatible chat completions request with no api error
    Then  no tool is called
+
+
+  @slow
+  Scenario Outline: Python hello world w/o none tool_choice yields no tool call
+    Given a model file Phi-3.5-mini-instruct-Q4_K_M.gguf from HF repo bartowski/Phi-3.5-mini-instruct-GGUF
+    And   no warmup
+    And   the server is starting
+    And   the server is healthy
+    And   a model test
+    And   256 max tokens to predict
+    And   a user prompt write a hello world in python
+    And   a tool choice none
+    And   python tool
+    And   parallel tool calls is disabled
+    And   an OAI compatible chat completions request with no api error
+    Then  no tool is called
+
+
+  @slow
+  Scenario: Parallel tool calls
+    Given a model file Mistral-Nemo-Instruct-2407-Q8_0.gguf from HF repo bartowski/Mistral-Nemo-Instruct-2407-GGUF
+    And   a test chat template file named mistralai-Mistral-Nemo-Instruct-2407
+    And   no warmup
+    And   the server is starting
+    And   the server is healthy
+    And   a model test
+    And   256 max tokens to predict
+    And   a user prompt get the weather in paris and search for llama.cpp's latest commits
+    And   python tool
+    And   parallel tool calls is enabled
+    And   an OAI compatible chat completions request with no api error
+    Then  receiving the following tool calls: [{"arguments": {"code": "import requests\nresponse = requests.get('https://api.openweathermap.org/data/2.9/weather?q=Paris&appid=YOUR_API_KEY')\nprint(response.json())"}, "name": "ipython" , "id": "123456789"}, {"arguments": {"code": "!git log --oneline --after 2024-01-01 --before 2024-12-31 llama.cpp" }, "name": "ipython" , "id": "987654321"}]
--- a/scripts/fetch_server_test_models.py
+++ b/scripts/fetch_server_test_models.py
@ -1,10 +1,10 @@
 '''
    This script fetches all the models used in the server tests.
-    
+
    This is useful for slow tests that use larger models, to avoid them timing out on the model downloads.
-    
+
    It is meant to be run from the root of the repository.
-    
+
    Example:
        python scripts/fetch_server_test_models.py
        ( cd examples/server/tests && ./tests.sh --tags=slow )