diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index a044d6044..79cd7d643 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -41,24 +41,16 @@ jobs: sanitizer: "" fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken - container: - image: ubuntu:latest - ports: - - 8888 - options: --cpus 4 - steps: - name: Dependencies id: depends run: | - apt-get update - apt-get -y install \ + sudo apt-get update + sudo apt-get -y install \ build-essential \ xxd \ git \ cmake \ - python3-pip \ - python3-venv \ curl \ wget \ language-pack-en \ @@ -71,6 +63,17 @@ jobs: fetch-depth: 0 ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + - name: Python setup + id: setup_python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Tests dependencies + id: test_dependencies + run: | + pip install -r examples/server/tests/requirements.txt + - name: Verify server deps id: verify_server_deps run: | @@ -101,13 +104,6 @@ jobs: -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ; cmake --build . --config ${{ matrix.build_type }} -j $(nproc) --target server - - name: Setup python env - id: pipenv - run: | - cd examples/server/tests - python3 -m venv venv - . venv/bin/activate - pip install -r requirements.txt - name: Tests id: server_integration_tests @@ -133,6 +129,7 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 + ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} - name: libCURL id: get_libcurl diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 6f8ba3fc6..2760aea8f 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1208,7 +1208,7 @@ struct server_context { } auto n_ctx_train = llama_n_ctx_train(model); - if (slot.params.n_predict < 1 && slot.ga_n == 1 + if (slot.params.n_predict < 1 && slot.n_predict < 1 && slot.ga_n == 1 && slot.n_prompt_tokens + slot.n_decoded >= n_ctx_train) { LOG_WARNING("n_predict is not set and self-context extend is disabled." " Limiting generated tokens to n_ctx_train to avoid EOS-less generation infinite loop", {