mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 13:27:21 +01:00
2002bc96bf
* server : refactoring (wip) * server : remove llava/clip objects from build * server : fix empty prompt handling + all slots idle logic * server : normalize id vars * server : code style * server : simplify model chat template validation * server : code style * server : minor * llama : llama_chat_apply_template support null buf * server : do not process embedding requests when disabled * server : reorganize structs and enums + naming fixes * server : merge oai.hpp in utils.hpp * server : refactor system prompt update at start * server : disable cached prompts with self-extend * server : do not process more than n_batch tokens per iter * server: tests: embeddings use a real embeddings model (#5908) * server, tests : bump batch to fit 1 embedding prompt * server: tests: embeddings fix build type Debug is randomly failing (#5911) * server: tests: embeddings, use different KV Cache size * server: tests: embeddings, fixed prompt do not exceed n_batch, increase embedding timeout, reduce number of concurrent embeddings * server: tests: embeddings, no need to wait for server idle as it can timout * server: refactor: clean up http code (#5912) * server : avoid n_available var ggml-ci * server: refactor: better http codes * server : simplify json parsing + add comment about t_last * server : rename server structs * server : allow to override FQDN in tests ggml-ci * server : add comments --------- Co-authored-by: Pierrick Hymbert <pierrick.hymbert@gmail.com>
93 lines
2.6 KiB
YAML
93 lines
2.6 KiB
YAML
# Server build and tests
|
|
name: Server
|
|
|
|
on:
|
|
workflow_dispatch: # allows manual triggering
|
|
inputs:
|
|
slow_tests:
|
|
description: 'Run slow tests'
|
|
required: true
|
|
type: boolean
|
|
push:
|
|
branches:
|
|
- master
|
|
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*']
|
|
pull_request:
|
|
types: [opened, synchronize, reopened]
|
|
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*']
|
|
schedule:
|
|
- cron: '0 0 * * *'
|
|
|
|
jobs:
|
|
server:
|
|
runs-on: ubuntu-latest
|
|
|
|
strategy:
|
|
matrix:
|
|
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
|
build_type: [Debug, Release]
|
|
include:
|
|
- build_type: Release
|
|
sanitizer: ""
|
|
exclude:
|
|
- build_type: Release
|
|
sanitizer: ADDRESS
|
|
- build_type: Release
|
|
sanitizer: THREAD
|
|
- build_type: Release
|
|
sanitizer: UNDEFINED
|
|
|
|
container:
|
|
image: ubuntu:latest
|
|
ports:
|
|
- 8888
|
|
options: --cpus 4
|
|
|
|
steps:
|
|
- name: Clone
|
|
id: checkout
|
|
uses: actions/checkout@v3
|
|
|
|
- name: Dependencies
|
|
id: depends
|
|
run: |
|
|
apt-get update
|
|
apt-get -y install \
|
|
build-essential \
|
|
git \
|
|
cmake \
|
|
python3-pip \
|
|
wget \
|
|
psmisc \
|
|
language-pack-en
|
|
|
|
- name: Build
|
|
id: cmake_build
|
|
run: |
|
|
mkdir build
|
|
cd build
|
|
cmake .. \
|
|
-DLLAMA_NATIVE=OFF \
|
|
-DLLAMA_BUILD_SERVER=ON \
|
|
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
|
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
|
|
cmake --build . --config ${{ matrix.build_type }} -j $(nproc) --target server
|
|
|
|
- name: Tests dependencies
|
|
id: test_dependencies
|
|
run: |
|
|
pip install -r examples/server/tests/requirements.txt
|
|
|
|
- name: Tests
|
|
id: server_integration_tests
|
|
run: |
|
|
cd examples/server/tests
|
|
PORT=8888 ./tests.sh
|
|
|
|
- name: Slow tests
|
|
id: server_integration_tests_slow
|
|
if: ${{ github.event.schedule != '' && matrix.build_type == 'Release' || github.event.inputs.slow_tests == 'true' }}
|
|
run: |
|
|
cd examples/server/tests
|
|
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
|