2023-12-29 15:42:26 +01:00
|
|
|
{
|
|
|
|
lib,
|
|
|
|
config,
|
|
|
|
stdenv,
|
|
|
|
mkShell,
|
|
|
|
cmake,
|
|
|
|
ninja,
|
|
|
|
pkg-config,
|
|
|
|
git,
|
|
|
|
python3,
|
|
|
|
mpi,
|
2024-01-05 17:02:44 +01:00
|
|
|
openblas, # TODO: Use the generic `blas` so users could switch between alternative implementations
|
2023-12-29 15:42:26 +01:00
|
|
|
cudaPackages,
|
|
|
|
darwin,
|
|
|
|
rocmPackages,
|
2024-01-28 12:59:43 +01:00
|
|
|
vulkan-headers,
|
|
|
|
vulkan-loader,
|
2023-12-29 15:42:26 +01:00
|
|
|
clblast,
|
|
|
|
useBlas ? builtins.all (x: !x) [
|
|
|
|
useCuda
|
|
|
|
useMetalKit
|
|
|
|
useOpenCL
|
|
|
|
useRocm
|
2024-01-28 12:59:43 +01:00
|
|
|
useVulkan
|
2023-12-29 15:42:26 +01:00
|
|
|
],
|
|
|
|
useCuda ? config.cudaSupport,
|
|
|
|
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
|
|
|
|
useMpi ? false, # Increases the runtime closure size by ~700M
|
|
|
|
useOpenCL ? false,
|
|
|
|
useRocm ? config.rocmSupport,
|
2024-01-28 12:59:43 +01:00
|
|
|
useVulkan ? false,
|
2023-12-29 15:42:26 +01:00
|
|
|
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
|
|
|
}@inputs:
|
|
|
|
|
|
|
|
let
|
|
|
|
inherit (lib)
|
|
|
|
cmakeBool
|
|
|
|
cmakeFeature
|
|
|
|
optionals
|
|
|
|
strings
|
|
|
|
versionOlder
|
|
|
|
;
|
|
|
|
|
|
|
|
# It's necessary to consistently use backendStdenv when building with CUDA support,
|
|
|
|
# otherwise we get libstdc++ errors downstream.
|
|
|
|
stdenv = throw "Use effectiveStdenv instead";
|
|
|
|
effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;
|
|
|
|
|
|
|
|
suffices =
|
|
|
|
lib.optionals useBlas [ "BLAS" ]
|
|
|
|
++ lib.optionals useCuda [ "CUDA" ]
|
|
|
|
++ lib.optionals useMetalKit [ "MetalKit" ]
|
|
|
|
++ lib.optionals useMpi [ "MPI" ]
|
|
|
|
++ lib.optionals useOpenCL [ "OpenCL" ]
|
2024-01-28 12:59:43 +01:00
|
|
|
++ lib.optionals useRocm [ "ROCm" ]
|
|
|
|
++ lib.optionals useVulkan [ "Vulkan" ];
|
2023-12-29 15:42:26 +01:00
|
|
|
|
|
|
|
pnameSuffix =
|
|
|
|
strings.optionalString (suffices != [ ])
|
|
|
|
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
|
|
|
descriptionSuffix =
|
|
|
|
strings.optionalString (suffices != [ ])
|
|
|
|
", accelerated with ${strings.concatStringsSep ", " suffices}";
|
|
|
|
|
|
|
|
# TODO: package the Python in this repository in a Nix-like way.
|
|
|
|
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
|
|
|
|
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
|
|
|
|
# https://peps.python.org/pep-0517/
|
|
|
|
llama-python = python3.withPackages (
|
|
|
|
ps: [
|
|
|
|
ps.numpy
|
|
|
|
ps.sentencepiece
|
|
|
|
]
|
|
|
|
);
|
|
|
|
|
|
|
|
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
|
|
|
|
llama-python-extra = python3.withPackages (
|
|
|
|
ps: [
|
|
|
|
ps.numpy
|
|
|
|
ps.sentencepiece
|
2024-01-22 12:21:52 +01:00
|
|
|
ps.tiktoken
|
2023-12-29 15:42:26 +01:00
|
|
|
ps.torchWithoutCuda
|
|
|
|
ps.transformers
|
|
|
|
]
|
|
|
|
);
|
|
|
|
|
|
|
|
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
|
|
|
|
# separately
|
|
|
|
darwinBuildInputs =
|
|
|
|
with darwin.apple_sdk.frameworks;
|
|
|
|
[
|
|
|
|
Accelerate
|
|
|
|
CoreVideo
|
|
|
|
CoreGraphics
|
|
|
|
]
|
|
|
|
++ optionals useMetalKit [ MetalKit ];
|
|
|
|
|
|
|
|
cudaBuildInputs = with cudaPackages; [
|
|
|
|
cuda_cccl.dev # <nv/target>
|
|
|
|
|
|
|
|
# A temporary hack for reducing the closure size, remove once cudaPackages
|
|
|
|
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
|
|
|
|
cuda_cudart.dev
|
|
|
|
cuda_cudart.lib
|
|
|
|
cuda_cudart.static
|
|
|
|
libcublas.dev
|
|
|
|
libcublas.lib
|
|
|
|
libcublas.static
|
|
|
|
];
|
|
|
|
|
|
|
|
rocmBuildInputs = with rocmPackages; [
|
|
|
|
clr
|
|
|
|
hipblas
|
|
|
|
rocblas
|
|
|
|
];
|
2024-01-28 12:59:43 +01:00
|
|
|
|
|
|
|
vulkanBuildInputs = [
|
|
|
|
vulkan-headers
|
|
|
|
vulkan-loader
|
|
|
|
];
|
2023-12-29 15:42:26 +01:00
|
|
|
in
|
|
|
|
|
|
|
|
effectiveStdenv.mkDerivation (
|
|
|
|
finalAttrs: {
|
|
|
|
pname = "llama-cpp${pnameSuffix}";
|
|
|
|
version = llamaVersion;
|
|
|
|
|
2024-01-13 18:45:01 +01:00
|
|
|
# Note: none of the files discarded here are visible in the sandbox or
|
|
|
|
# affect the output hash. This also means they can be modified without
|
|
|
|
# triggering a rebuild.
|
2023-12-29 15:42:26 +01:00
|
|
|
src = lib.cleanSourceWith {
|
|
|
|
filter =
|
|
|
|
name: type:
|
2024-01-13 18:45:01 +01:00
|
|
|
let
|
|
|
|
noneOf = builtins.all (x: !x);
|
|
|
|
baseName = baseNameOf name;
|
|
|
|
in
|
|
|
|
noneOf [
|
2023-12-29 15:42:26 +01:00
|
|
|
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
2024-01-13 18:45:01 +01:00
|
|
|
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
|
|
|
|
(lib.hasPrefix "." baseName) # Skip hidden files and directories
|
|
|
|
(baseName == "flake.lock")
|
|
|
|
];
|
2023-12-29 15:42:26 +01:00
|
|
|
src = lib.cleanSource ../../.;
|
|
|
|
};
|
|
|
|
|
|
|
|
postPatch = ''
|
|
|
|
substituteInPlace ./ggml-metal.m \
|
|
|
|
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
|
|
|
|
|
|
|
# TODO: Package up each Python script or service appropriately.
|
|
|
|
# If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
|
|
|
|
# we could make those *.py into setuptools' entrypoints
|
|
|
|
substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
|
|
|
|
'';
|
|
|
|
|
|
|
|
nativeBuildInputs =
|
|
|
|
[
|
|
|
|
cmake
|
|
|
|
ninja
|
|
|
|
pkg-config
|
|
|
|
git
|
|
|
|
]
|
|
|
|
++ optionals useCuda [
|
|
|
|
cudaPackages.cuda_nvcc
|
|
|
|
|
|
|
|
# TODO: Replace with autoAddDriverRunpath
|
|
|
|
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
|
|
|
|
cudaPackages.autoAddOpenGLRunpathHook
|
|
|
|
];
|
|
|
|
|
|
|
|
buildInputs =
|
|
|
|
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
|
|
|
++ optionals useCuda cudaBuildInputs
|
|
|
|
++ optionals useMpi [ mpi ]
|
|
|
|
++ optionals useOpenCL [ clblast ]
|
2024-01-28 12:59:43 +01:00
|
|
|
++ optionals useRocm rocmBuildInputs
|
|
|
|
++ optionals useVulkan vulkanBuildInputs;
|
2023-12-29 15:42:26 +01:00
|
|
|
|
|
|
|
cmakeFlags =
|
|
|
|
[
|
2024-01-21 22:37:13 +01:00
|
|
|
(cmakeBool "LLAMA_NATIVE" false)
|
2023-12-29 15:42:26 +01:00
|
|
|
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
|
|
|
(cmakeBool "BUILD_SHARED_LIBS" true)
|
|
|
|
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
|
|
|
(cmakeBool "LLAMA_BLAS" useBlas)
|
|
|
|
(cmakeBool "LLAMA_CLBLAST" useOpenCL)
|
|
|
|
(cmakeBool "LLAMA_CUBLAS" useCuda)
|
|
|
|
(cmakeBool "LLAMA_HIPBLAS" useRocm)
|
|
|
|
(cmakeBool "LLAMA_METAL" useMetalKit)
|
|
|
|
(cmakeBool "LLAMA_MPI" useMpi)
|
2024-01-28 12:59:43 +01:00
|
|
|
(cmakeBool "LLAMA_VULKAN" useVulkan)
|
2023-12-29 15:42:26 +01:00
|
|
|
]
|
|
|
|
++ optionals useCuda [
|
|
|
|
(
|
|
|
|
with cudaPackages.flags;
|
|
|
|
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
|
|
|
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
]
|
|
|
|
++ optionals useRocm [
|
|
|
|
(cmakeFeature "CMAKE_C_COMPILER" "hipcc")
|
|
|
|
(cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
|
|
|
|
|
|
|
|
# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
|
|
|
|
# in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
|
|
|
|
# and select the line that matches the current nixpkgs version of rocBLAS.
|
|
|
|
# Should likely use `rocmPackages.clr.gpuTargets`.
|
|
|
|
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
|
|
|
|
]
|
|
|
|
++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
|
|
|
|
++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];
|
|
|
|
|
|
|
|
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
|
|
|
# if they haven't been added yet.
|
|
|
|
postInstall = ''
|
|
|
|
mv $out/bin/main $out/bin/llama
|
|
|
|
mv $out/bin/server $out/bin/llama-server
|
|
|
|
mkdir -p $out/include
|
|
|
|
cp $src/llama.h $out/include/
|
|
|
|
'';
|
|
|
|
|
|
|
|
# Define the shells here, but don't add in the inputsFrom to avoid recursion.
|
|
|
|
passthru = {
|
|
|
|
inherit
|
|
|
|
useBlas
|
|
|
|
useCuda
|
|
|
|
useMetalKit
|
|
|
|
useMpi
|
|
|
|
useOpenCL
|
|
|
|
useRocm
|
2024-01-28 12:59:43 +01:00
|
|
|
useVulkan
|
2023-12-29 15:42:26 +01:00
|
|
|
;
|
|
|
|
|
|
|
|
shell = mkShell {
|
|
|
|
name = "shell-${finalAttrs.finalPackage.name}";
|
|
|
|
description = "contains numpy and sentencepiece";
|
|
|
|
buildInputs = [ llama-python ];
|
|
|
|
inputsFrom = [ finalAttrs.finalPackage ];
|
2024-01-22 11:17:05 +01:00
|
|
|
shellHook = ''
|
2024-01-23 00:44:10 +01:00
|
|
|
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
|
2024-01-22 11:17:05 +01:00
|
|
|
'';
|
2023-12-29 15:42:26 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
shell-extra = mkShell {
|
|
|
|
name = "shell-extra-${finalAttrs.finalPackage.name}";
|
|
|
|
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
|
|
|
|
buildInputs = [ llama-python-extra ];
|
|
|
|
inputsFrom = [ finalAttrs.finalPackage ];
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
meta = {
|
|
|
|
# Configurations we don't want even the CI to evaluate. Results in the
|
|
|
|
# "unsupported platform" messages. This is mostly a no-op, because
|
|
|
|
# cudaPackages would've refused to evaluate anyway.
|
2024-01-28 12:59:43 +01:00
|
|
|
badPlatforms = optionals (useCuda || useOpenCL || useVulkan) lib.platforms.darwin;
|
2023-12-29 15:42:26 +01:00
|
|
|
|
|
|
|
# Configurations that are known to result in build failures. Can be
|
|
|
|
# overridden by importing Nixpkgs with `allowBroken = true`.
|
2024-01-28 12:59:43 +01:00
|
|
|
broken = (useMetalKit && !effectiveStdenv.isDarwin) || (useVulkan && effectiveStdenv.isDarwin);
|
2023-12-29 15:42:26 +01:00
|
|
|
|
|
|
|
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
|
|
|
homepage = "https://github.com/ggerganov/llama.cpp/";
|
|
|
|
license = lib.licenses.mit;
|
|
|
|
|
|
|
|
# Accommodates `nix run` and `lib.getExe`
|
|
|
|
mainProgram = "llama";
|
|
|
|
|
|
|
|
# These people might respond, on the best effort basis, if you ping them
|
|
|
|
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
|
|
|
# Consider adding yourself to this list if you want to ensure this flake
|
|
|
|
# stays maintained and you're willing to invest your time. Do not add
|
|
|
|
# other people without their consent. Consider removing people after
|
|
|
|
# they've been unreachable for long periods of time.
|
|
|
|
|
|
|
|
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
|
|
|
# an attrset following the same format as in
|
|
|
|
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
|
|
|
maintainers = with lib.maintainers; [
|
|
|
|
philiptaron
|
|
|
|
SomeoneSerge
|
|
|
|
];
|
|
|
|
|
|
|
|
# Extend `badPlatforms` instead
|
|
|
|
platforms = lib.platforms.all;
|
|
|
|
};
|
|
|
|
}
|
|
|
|
)
|