Add basic function calling example using a llama-cli python wrapper

2024-10-29 22:20:15 +01:00 · 2024-09-21 23:23:17 -07:00 · 2024-09-21 23:23:17 -07:00 · fe02e8d0fb
commit fe02e8d0fb
parent 70392f1f81
4 changed files with 261 additions and 0 deletions
--- a/examples/function-calling/README.md
+++ b/examples/function-calling/README.md
@ -0,0 +1,46 @@
 # llama.cpp/examples/function-calling
 This example shows how to do basic function calling using llama-cli and a python wrapper to declare and call functions.
 ## Options
 Important options for llama-cli-function-runner.py:
 -   `-m FNAME, --model FNAME`: Specify the path to the function calling model (e.g., `-m "$(huggingface-cli  download meetkai/functionary-small-v3.2-GGUF functionary-small-v3.2.Q4_0.gguf)"`).
 -   `--ctx-size N`: Set the size of the prompt context. The default is 1024
 -   `--special`: show special tokens and function calling details
 ## Example showing showing function call details
 ```
 ./examples/function-calling/llama-cli-function-runner.py  -m `huggingface-cli  download meetkai/functionary-small-v3.2-GGUF functionary-small-v3.2.Q4_0.gguf` -i --special
 What is the weather in Phoenix?
 Sure, I'll look that up for you. Let me just check the current weather conditions in Phoenix.>>>get_weather
 {"location": "Phoenix"}<|eot_id|>
 {"temperature": "30C"}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 The current weather in Phoenix is 30C.<|eot_id|>
 What is 38484 + 323?
 Sure, let's calculate that.>>>calculate
 {"expression": "38484 + 323"}<|eot_id|>
 {"result": 38807}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 The sum of 38484 and 323 is 38807.<|eot_id|>
 What is 67 feet in meters?
 To convert 67 feet into meters, we use the conversion factor: 1 foot is approximately 0.3048 meters. Let's calculate it.>>>calculate
 {"expression": "67 * 0.3048"}<|eot_id|>
 {"result": 20.4216}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 67 feet is approximately 20.4216 meters.<|eot_id|>
 ```
 ## Function calling example, hiding details
 ```
 ./examples/function-calling/llama-cli-function-runner.py  -m `huggingface-cli  download meetkai/functionary-small-v3.2-GGUF functionary-small-v3.2.Q4_0.gguf` -i
 What is the weather in Phoenix?
 To provide you with the current weather in Phoenix, Arizona, I will need to check the weather data for you. Let me get that information.
 The current weather in Phoenix, Arizona is 30°C. If you have any more questions about weather in other locations, feel free to ask!
 Is it colder in Vegas?
 To determine if the current temperature in Las Vegas is colder than in Phoenix, which is currently 30°C, I will need to check the weather data for Las Vegas. Let's find out.
 The current weather in Las Vegas, Nevada is also 30°C. Therefore, there is no difference in temperature between Phoenix and Las Vegas at the moment. If you have any more questions or need further assistance, please let me know!
 What is 37234 times 39?
 To calculate 37234 times 39, I'll perform the multiplication. Let's do that.
 The result of multiplying 37234 by 39 is 1,452,126. If you have any more calculations or questions, feel free to ask!
 ```
--- a/examples/function-calling/function_tool.py
+++ b/examples/function-calling/function_tool.py
@ -0,0 +1,63 @@
 # Generate function calling definitions function schemas
 import inspect
 import re
 # Extract OpenAI function calling style definitions from functions
 #
 # Generated with: Create a python function to to generate the OpenAI function calling definition from a given function, getting the description, parameter type and parameter description from the function documentation, assuming the function documentation contains sphynx style parameter descriptions, marked with :param.
 def get_function_tool_json(func):
    typemap = { 'str': 'string' };
    def get_type(s):
        return typemap[s] if s in typemap else s
    function_name = func.__name__
    doc_parts = re.split(r'\n\s*:param[^:]*\s+', func.__doc__.rstrip());
    function_description = doc_parts[0]
    params_doc = [ re.split(r'\:\s*', param_doc, maxsplit=1) for param_doc in doc_parts[1:] ]
    params_doc = { param: desc for param, desc in params_doc }
    function_def = {
        'name': function_name,
        'description': function_description,
        'parameters': { 'type': 'object', 'properties': {}, 'required': [] }
    }
    for param_name, param in inspect.signature(func).parameters.items():
        function_def['parameters']['properties'][param_name] = {
            'type' : get_type(param.annotation.__name__) if param.annotation is not param.empty else '',
            'description': params_doc[param_name] if param_name in params_doc else ''
        }
        function_def['parameters']['required'].append(param_name);
    return function_def
 # Generate function definition schema from function definitions
 #
 # This is from llama-cpp-python, llama_chat_format.py
 def generate_schema_from_functions(functions, namespace="functions") -> str:
    schema = (
        "// Supported function definitions that should be called when necessary.\n"
    )
    schema += f"namespace {namespace} {{\n\n"
    for function in functions:
        function_name = function["name"]
        description = function.get("description", "")
        parameters = function.get("parameters", {})
        required_params = parameters.get("required", [])
        schema += f"// {description}\n"
        schema += f"type {function_name} = (_: {{\n"
        for param_name, param in parameters.get("properties", {}).items():
            param_description = param.get("description", "")
            param_type = param.get("type", "any")
            optional_indicator = "" if param_name in required_params else "?"
            schema += f"// {param_description}\n"
            schema += f"{param_name}{optional_indicator}: {param_type},\n"
        schema += "}) => any;\n\n"
    schema += "}} // namespace {}".format(namespace)
    return schema
--- a/examples/function-calling/functions.py
+++ b/examples/function-calling/functions.py
@ -0,0 +1,30 @@
 def calculate(expression: str):
    """Evaluate a mathematical expression
    :param expression: The mathematical expression to evaluate
    """
    try:
        result = eval(expression)
        return {"result": result}
    except:
        return {"error": "Invalid expression"}
 def get_weather(location: str):
    """get the weather of a location
    :param location: where to get weather.
    """
    return {"temperature": "30C"}
 def _run_python(code):
    allowed_globals = { '__builtins__': None, '_': None }
    allowed_locals = {}
    code = code.splitlines()
    code[-1] = f"_ = {code[-1]}"
    code = '\n'.join(code)
    try:
        exec(code, allowed_globals, allowed_locals)
    except Exception as e:
        return None
    return {'result': allowed_locals.get('_', None)}
--- a/examples/function-calling/llama-cli-function-runner.py
+++ b/examples/function-calling/llama-cli-function-runner.py
@ -0,0 +1,122 @@
 #!/usr/bin/env python3
 # function calling using llama-cli
 import subprocess
 import sys
 import select
 import os
 import re
 import json
 import functions
 from function_tool import get_function_tool_json, generate_schema_from_functions
 function_name_list = [ name for name in dir(functions) if not name.startswith('_') ]
 function_lookup = { name: getattr(functions, name) for name in function_name_list }
 tools = [ get_function_tool_json(f) for (n, f) in function_lookup.items() ]
 function_schema = generate_schema_from_functions(tools)
 prompt = """<|start_header_id|>system<|end_header_id|>
 You are capable of executing available function(s) if required.
 Execute function(s) as needed.
 The function calls are not shown in the conversation and should be called covertly to answer questions.
 Ask for the required input to:recipient==all
 Use JSON for function arguments.
 Respond in this format:
 >>>${recipient}
 ${content}
 Available functions:
 """ + function_schema + """<|eot_id|><|start_header_id|>system<|end_header_id|>
 When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 60.0 seconds. The drive at '/mnt/data' can be used to save and persist user files.<|eot_id|><|start_header_id|>user<|end_header_id|>
 """
 def main():
    import argparse
    parser = argparse.ArgumentParser(epilog='For more options: llama-cli --help')
    parser.add_argument('--display-prompt', action=argparse.BooleanOptionalAction, default=False)
    parser.add_argument('--special', action=argparse.BooleanOptionalAction, default=False)
    parser.add_argument('--reverse-prompt', type=str, default='<|start_header_id|>user<|end_header_id|>\n')
    parser.add_argument('--ctx-size', type=int, default=1024)
    args, other_args = parser.parse_known_args()
    if args.display_prompt: print(prompt)
    command = [ './llama-cli', '-i', '-p', prompt, '--reverse-prompt', args.reverse_prompt, '--escape', '--special', '--no-display-prompt', '--log-disable', '--simple-io', '--ctx-size',  str(args.ctx_size), *other_args]
    process = subprocess.Popen(
        command,
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
    )
    if process.stdout is not None: os.set_blocking(process.stdout.fileno(), False)
    try:
        run_loop(process, args)
    except KeyboardInterrupt:
        print("\nInterrupted by user.")
    finally:
        process.terminate()
        process.wait()
 def run_loop(process, args):
    pbuffer = ''
    skip_output_until_result = False
    while True:
        readable, _, _ = select.select([process.stdout, process.stderr, sys.stdin], [], [])
        for stream in readable:
            if stream == process.stdout:
                pdata = process.stdout.read()
                if not pdata: continue
                pbuffer += pdata
                if(match := re.search(r'>>>([^\n]*)\n(.*)<\|eot_id\|>', pbuffer, re.S)):
                    if not args.special:
                        pdata = pdata[:match.pos]
                    pbuffer = ''
                    skip_output_until_result = False
                    tool_name = match.group(1)
                    tool_args = match.group(2)
                    if tool_name == 'python':
                        result = functions._run_python(tool_args);
                    else:
                        try:
                            tool_args = json.loads(tool_args)
                            result = function_lookup[tool_name](**tool_args)
                        except ValueError as e:
                            result = {'error': 'unknown'}
                    result = json.dumps(result) + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>'
                    process.stdin.write(result + '\n')
                    process.stdin.flush()
                    if(args.special): pdata += '\n' + result
                elif (n := pdata.find('>>>')) >= 0:
                    if not args.special:
                        pdata = pdata[:n]
                        skip_output_until_result = True
                elif skip_output_until_result:
                    pdata = ''
                if not args.special:
                    pdata = re.sub(r'<\|[^\|>]*\|>', '', pdata)
                sys.stdout.write(pdata)
                sys.stdout.flush()
            elif stream == sys.stdin:
                user_input = sys.stdin.readline()
                if user_input:
                    user_input = user_input.rstrip()
                    process.stdin.write(user_input + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>' + '\n')
                    process.stdin.flush()
 if __name__ == '__main__':
    main()