2023-12-29 15:42:26 +01:00
|
|
|
{
|
|
|
|
lib,
|
2024-03-05 02:33:08 +01:00
|
|
|
glibc,
|
2023-12-29 15:42:26 +01:00
|
|
|
config,
|
|
|
|
stdenv,
|
|
|
|
mkShell,
|
2024-03-26 01:51:46 +01:00
|
|
|
runCommand,
|
2023-12-29 15:42:26 +01:00
|
|
|
cmake,
|
|
|
|
ninja,
|
|
|
|
pkg-config,
|
|
|
|
git,
|
|
|
|
python3,
|
|
|
|
mpi,
|
2024-03-25 18:52:45 +01:00
|
|
|
blas,
|
2023-12-29 15:42:26 +01:00
|
|
|
cudaPackages,
|
|
|
|
darwin,
|
|
|
|
rocmPackages,
|
2024-01-28 12:59:43 +01:00
|
|
|
vulkan-headers,
|
|
|
|
vulkan-loader,
|
2023-12-29 15:42:26 +01:00
|
|
|
clblast,
|
2024-03-27 19:17:30 +01:00
|
|
|
useBlas ? builtins.all (x: !x) [
|
2023-12-29 15:42:26 +01:00
|
|
|
useCuda
|
|
|
|
useMetalKit
|
|
|
|
useOpenCL
|
|
|
|
useRocm
|
2024-01-28 12:59:43 +01:00
|
|
|
useVulkan
|
2024-03-27 19:14:28 +01:00
|
|
|
] && blas.meta.available,
|
2023-12-29 15:42:26 +01:00
|
|
|
useCuda ? config.cudaSupport,
|
|
|
|
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
|
|
|
|
useMpi ? false, # Increases the runtime closure size by ~700M
|
|
|
|
useOpenCL ? false,
|
|
|
|
useRocm ? config.rocmSupport,
|
2024-01-28 12:59:43 +01:00
|
|
|
useVulkan ? false,
|
2023-12-29 15:42:26 +01:00
|
|
|
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
2024-03-05 02:33:08 +01:00
|
|
|
|
|
|
|
# It's necessary to consistently use backendStdenv when building with CUDA support,
|
|
|
|
# otherwise we get libstdc++ errors downstream.
|
|
|
|
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
|
2024-03-26 01:51:46 +01:00
|
|
|
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
|
|
|
|
precompileMetalShaders ? false
|
2023-12-29 15:42:26 +01:00
|
|
|
}@inputs:
|
|
|
|
|
|
|
|
let
|
|
|
|
inherit (lib)
|
|
|
|
cmakeBool
|
|
|
|
cmakeFeature
|
|
|
|
optionals
|
|
|
|
strings
|
|
|
|
versionOlder
|
|
|
|
;
|
2024-03-05 07:12:23 +01:00
|
|
|
|
2023-12-29 15:42:26 +01:00
|
|
|
stdenv = throw "Use effectiveStdenv instead";
|
|
|
|
|
|
|
|
suffices =
|
|
|
|
lib.optionals useBlas [ "BLAS" ]
|
|
|
|
++ lib.optionals useCuda [ "CUDA" ]
|
|
|
|
++ lib.optionals useMetalKit [ "MetalKit" ]
|
|
|
|
++ lib.optionals useMpi [ "MPI" ]
|
|
|
|
++ lib.optionals useOpenCL [ "OpenCL" ]
|
2024-01-28 12:59:43 +01:00
|
|
|
++ lib.optionals useRocm [ "ROCm" ]
|
|
|
|
++ lib.optionals useVulkan [ "Vulkan" ];
|
2023-12-29 15:42:26 +01:00
|
|
|
|
|
|
|
pnameSuffix =
|
|
|
|
strings.optionalString (suffices != [ ])
|
|
|
|
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
|
|
|
descriptionSuffix =
|
|
|
|
strings.optionalString (suffices != [ ])
|
|
|
|
", accelerated with ${strings.concatStringsSep ", " suffices}";
|
|
|
|
|
2024-02-15 14:25:04 +01:00
|
|
|
executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;
|
|
|
|
|
2023-12-29 15:42:26 +01:00
|
|
|
# TODO: package the Python in this repository in a Nix-like way.
|
|
|
|
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
|
|
|
|
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
|
|
|
|
# https://peps.python.org/pep-0517/
|
2024-03-26 17:22:07 +01:00
|
|
|
#
|
|
|
|
# TODO: Package up each Python script or service appropriately, by making
|
|
|
|
# them into "entrypoints"
|
2023-12-29 15:42:26 +01:00
|
|
|
llama-python = python3.withPackages (
|
|
|
|
ps: [
|
|
|
|
ps.numpy
|
|
|
|
ps.sentencepiece
|
|
|
|
]
|
|
|
|
);
|
|
|
|
|
|
|
|
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
|
|
|
|
llama-python-extra = python3.withPackages (
|
|
|
|
ps: [
|
|
|
|
ps.numpy
|
|
|
|
ps.sentencepiece
|
2024-01-22 12:21:52 +01:00
|
|
|
ps.tiktoken
|
2023-12-29 15:42:26 +01:00
|
|
|
ps.torchWithoutCuda
|
|
|
|
ps.transformers
|
|
|
|
]
|
|
|
|
);
|
|
|
|
|
2024-03-26 01:51:46 +01:00
|
|
|
xcrunHost = runCommand "xcrunHost" {} ''
|
|
|
|
mkdir -p $out/bin
|
|
|
|
ln -s /usr/bin/xcrun $out/bin
|
|
|
|
'';
|
|
|
|
|
2023-12-29 15:42:26 +01:00
|
|
|
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
|
|
|
|
# separately
|
|
|
|
darwinBuildInputs =
|
|
|
|
with darwin.apple_sdk.frameworks;
|
|
|
|
[
|
|
|
|
Accelerate
|
|
|
|
CoreVideo
|
|
|
|
CoreGraphics
|
|
|
|
]
|
|
|
|
++ optionals useMetalKit [ MetalKit ];
|
|
|
|
|
|
|
|
cudaBuildInputs = with cudaPackages; [
|
|
|
|
cuda_cccl.dev # <nv/target>
|
|
|
|
|
|
|
|
# A temporary hack for reducing the closure size, remove once cudaPackages
|
|
|
|
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
|
|
|
|
cuda_cudart.dev
|
|
|
|
cuda_cudart.lib
|
|
|
|
cuda_cudart.static
|
|
|
|
libcublas.dev
|
|
|
|
libcublas.lib
|
|
|
|
libcublas.static
|
|
|
|
];
|
|
|
|
|
|
|
|
rocmBuildInputs = with rocmPackages; [
|
|
|
|
clr
|
|
|
|
hipblas
|
|
|
|
rocblas
|
|
|
|
];
|
2024-01-28 12:59:43 +01:00
|
|
|
|
|
|
|
vulkanBuildInputs = [
|
|
|
|
vulkan-headers
|
|
|
|
vulkan-loader
|
|
|
|
];
|
2023-12-29 15:42:26 +01:00
|
|
|
in
|
|
|
|
|
|
|
|
effectiveStdenv.mkDerivation (
|
|
|
|
finalAttrs: {
|
|
|
|
pname = "llama-cpp${pnameSuffix}";
|
|
|
|
version = llamaVersion;
|
|
|
|
|
2024-01-13 18:45:01 +01:00
|
|
|
# Note: none of the files discarded here are visible in the sandbox or
|
|
|
|
# affect the output hash. This also means they can be modified without
|
|
|
|
# triggering a rebuild.
|
2023-12-29 15:42:26 +01:00
|
|
|
src = lib.cleanSourceWith {
|
|
|
|
filter =
|
|
|
|
name: type:
|
2024-01-13 18:45:01 +01:00
|
|
|
let
|
|
|
|
noneOf = builtins.all (x: !x);
|
|
|
|
baseName = baseNameOf name;
|
|
|
|
in
|
|
|
|
noneOf [
|
2023-12-29 15:42:26 +01:00
|
|
|
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
2024-01-13 18:45:01 +01:00
|
|
|
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
|
|
|
|
(lib.hasPrefix "." baseName) # Skip hidden files and directories
|
|
|
|
(baseName == "flake.lock")
|
|
|
|
];
|
2023-12-29 15:42:26 +01:00
|
|
|
src = lib.cleanSource ../../.;
|
|
|
|
};
|
|
|
|
|
|
|
|
postPatch = ''
|
|
|
|
substituteInPlace ./ggml-metal.m \
|
|
|
|
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
2024-03-26 01:51:46 +01:00
|
|
|
substituteInPlace ./ggml-metal.m \
|
|
|
|
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
2023-12-29 15:42:26 +01:00
|
|
|
'';
|
|
|
|
|
2024-03-26 01:51:46 +01:00
|
|
|
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
|
|
|
|
# `default.metallib` may be compiled with Metal compiler from XCode
|
|
|
|
# and we need to escape sandbox on MacOS to access Metal compiler.
|
|
|
|
# `xcrun` is used find the path of the Metal compiler, which is varible
|
|
|
|
# and not on $PATH
|
|
|
|
# see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
|
|
|
|
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
|
|
|
|
|
2023-12-29 15:42:26 +01:00
|
|
|
nativeBuildInputs =
|
|
|
|
[
|
|
|
|
cmake
|
|
|
|
ninja
|
|
|
|
pkg-config
|
|
|
|
git
|
|
|
|
]
|
|
|
|
++ optionals useCuda [
|
|
|
|
cudaPackages.cuda_nvcc
|
|
|
|
|
|
|
|
# TODO: Replace with autoAddDriverRunpath
|
|
|
|
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
|
|
|
|
cudaPackages.autoAddOpenGLRunpathHook
|
2024-03-05 02:33:08 +01:00
|
|
|
]
|
|
|
|
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
|
|
|
|
glibc.static
|
2024-03-26 01:51:46 +01:00
|
|
|
] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [
|
|
|
|
xcrunHost
|
2023-12-29 15:42:26 +01:00
|
|
|
];
|
|
|
|
|
|
|
|
buildInputs =
|
|
|
|
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
|
|
|
++ optionals useCuda cudaBuildInputs
|
|
|
|
++ optionals useMpi [ mpi ]
|
|
|
|
++ optionals useOpenCL [ clblast ]
|
2024-01-28 12:59:43 +01:00
|
|
|
++ optionals useRocm rocmBuildInputs
|
2024-03-27 19:14:28 +01:00
|
|
|
++ optionals useBlas [ blas ]
|
2024-01-28 12:59:43 +01:00
|
|
|
++ optionals useVulkan vulkanBuildInputs;
|
2023-12-29 15:42:26 +01:00
|
|
|
|
|
|
|
cmakeFlags =
|
|
|
|
[
|
2024-01-21 22:37:13 +01:00
|
|
|
(cmakeBool "LLAMA_NATIVE" false)
|
2023-12-29 15:42:26 +01:00
|
|
|
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
2024-03-05 02:33:08 +01:00
|
|
|
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
2023-12-29 15:42:26 +01:00
|
|
|
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
|
|
|
(cmakeBool "LLAMA_BLAS" useBlas)
|
|
|
|
(cmakeBool "LLAMA_CLBLAST" useOpenCL)
|
2024-03-26 01:16:01 +01:00
|
|
|
(cmakeBool "LLAMA_CUDA" useCuda)
|
2023-12-29 15:42:26 +01:00
|
|
|
(cmakeBool "LLAMA_HIPBLAS" useRocm)
|
|
|
|
(cmakeBool "LLAMA_METAL" useMetalKit)
|
2024-01-28 12:59:43 +01:00
|
|
|
(cmakeBool "LLAMA_VULKAN" useVulkan)
|
2024-03-05 02:33:08 +01:00
|
|
|
(cmakeBool "LLAMA_STATIC" enableStatic)
|
2023-12-29 15:42:26 +01:00
|
|
|
]
|
|
|
|
++ optionals useCuda [
|
|
|
|
(
|
|
|
|
with cudaPackages.flags;
|
|
|
|
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
|
|
|
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
]
|
|
|
|
++ optionals useRocm [
|
ROCm: use native CMake HIP support (#5966)
Supercedes #4024 and #4813.
CMake's native HIP support has become the
recommended way to add HIP code into a project (see
[here](https://rocm.docs.amd.com/en/docs-6.0.0/conceptual/cmake-packages.html#using-hip-in-cmake)).
This PR makes the following changes:
1. The environment variable `HIPCXX` or CMake option
`CMAKE_HIP_COMPILER` should be used to specify the HIP
compiler. Notably this shouldn't be `hipcc`, but ROCm's clang,
which usually resides in `$ROCM_PATH/llvm/bin/clang`. Previously
this was control by `CMAKE_C_COMPILER` and `CMAKE_CXX_COMPILER`.
Note that since native CMake HIP support is not yet available on
Windows, on Windows we fall back to the old behavior.
2. CMake option `CMAKE_HIP_ARCHITECTURES` is used to control the
GPU architectures to build for. Previously this was controled by
`GPU_TARGETS`.
3. Updated the Nix recipe to account for these new changes.
4. The GPU targets to build against in the Nix recipe is now
consistent with the supported GPU targets in nixpkgs.
5. Added CI checks for HIP on both Linux and Windows. On Linux, we test
both the new and old behavior.
The most important part about this PR is the separation of the
HIP compiler and the C/C++ compiler. This allows users to choose
a different C/C++ compiler if desired, compared to the current
situation where when building for ROCm support, everything must be
compiled with ROCm's clang.
~~Makefile is unchanged. Please let me know if we want to be
consistent on variables' naming because Makefile still uses
`GPU_TARGETS` to control architectures to build for, but I feel
like setting `CMAKE_HIP_ARCHITECTURES` is a bit awkward when you're
calling `make`.~~ Makefile used `GPU_TARGETS` but the README says
to use `AMDGPU_TARGETS`. For consistency with CMake, all usage of
`GPU_TARGETS` in Makefile has been updated to `AMDGPU_TARGETS`.
Thanks to the suggestion of @jin-eld, to maintain backwards
compatibility (and not break too many downstream users' builds), if
`CMAKE_CXX_COMPILER` ends with `hipcc`, then we still compile using
the original behavior and emit a warning that recommends switching
to the new HIP support. Similarly, if `AMDGPU_TARGETS` is set but
`CMAKE_HIP_ARCHITECTURES` is not, then we forward `AMDGPU_TARGETS`
to `CMAKE_HIP_ARCHITECTURES` to ease the transition to the new
HIP support.
Signed-off-by: Gavin Zhao <git@gzgz.dev>
2024-05-17 17:03:03 +02:00
|
|
|
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
|
|
|
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
|
2023-12-29 15:42:26 +01:00
|
|
|
]
|
2024-03-26 01:51:46 +01:00
|
|
|
++ optionals useMetalKit [
|
|
|
|
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
|
|
|
(cmakeBool "LLAMA_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
|
|
|
|
];
|
2023-12-29 15:42:26 +01:00
|
|
|
|
ROCm: use native CMake HIP support (#5966)
Supercedes #4024 and #4813.
CMake's native HIP support has become the
recommended way to add HIP code into a project (see
[here](https://rocm.docs.amd.com/en/docs-6.0.0/conceptual/cmake-packages.html#using-hip-in-cmake)).
This PR makes the following changes:
1. The environment variable `HIPCXX` or CMake option
`CMAKE_HIP_COMPILER` should be used to specify the HIP
compiler. Notably this shouldn't be `hipcc`, but ROCm's clang,
which usually resides in `$ROCM_PATH/llvm/bin/clang`. Previously
this was control by `CMAKE_C_COMPILER` and `CMAKE_CXX_COMPILER`.
Note that since native CMake HIP support is not yet available on
Windows, on Windows we fall back to the old behavior.
2. CMake option `CMAKE_HIP_ARCHITECTURES` is used to control the
GPU architectures to build for. Previously this was controled by
`GPU_TARGETS`.
3. Updated the Nix recipe to account for these new changes.
4. The GPU targets to build against in the Nix recipe is now
consistent with the supported GPU targets in nixpkgs.
5. Added CI checks for HIP on both Linux and Windows. On Linux, we test
both the new and old behavior.
The most important part about this PR is the separation of the
HIP compiler and the C/C++ compiler. This allows users to choose
a different C/C++ compiler if desired, compared to the current
situation where when building for ROCm support, everything must be
compiled with ROCm's clang.
~~Makefile is unchanged. Please let me know if we want to be
consistent on variables' naming because Makefile still uses
`GPU_TARGETS` to control architectures to build for, but I feel
like setting `CMAKE_HIP_ARCHITECTURES` is a bit awkward when you're
calling `make`.~~ Makefile used `GPU_TARGETS` but the README says
to use `AMDGPU_TARGETS`. For consistency with CMake, all usage of
`GPU_TARGETS` in Makefile has been updated to `AMDGPU_TARGETS`.
Thanks to the suggestion of @jin-eld, to maintain backwards
compatibility (and not break too many downstream users' builds), if
`CMAKE_CXX_COMPILER` ends with `hipcc`, then we still compile using
the original behavior and emit a warning that recommends switching
to the new HIP support. Similarly, if `AMDGPU_TARGETS` is set but
`CMAKE_HIP_ARCHITECTURES` is not, then we forward `AMDGPU_TARGETS`
to `CMAKE_HIP_ARCHITECTURES` to ease the transition to the new
HIP support.
Signed-off-by: Gavin Zhao <git@gzgz.dev>
2024-05-17 17:03:03 +02:00
|
|
|
# Environment variables needed for ROCm
|
|
|
|
env = optionals useRocm {
|
|
|
|
ROCM_PATH = "${rocmPackages.clr}";
|
|
|
|
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
|
|
|
|
};
|
|
|
|
|
2023-12-29 15:42:26 +01:00
|
|
|
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
|
|
|
# if they haven't been added yet.
|
|
|
|
postInstall = ''
|
|
|
|
mkdir -p $out/include
|
|
|
|
cp $src/llama.h $out/include/
|
|
|
|
'';
|
|
|
|
|
|
|
|
# Define the shells here, but don't add in the inputsFrom to avoid recursion.
|
|
|
|
passthru = {
|
|
|
|
inherit
|
|
|
|
useBlas
|
|
|
|
useCuda
|
|
|
|
useMetalKit
|
|
|
|
useMpi
|
|
|
|
useOpenCL
|
|
|
|
useRocm
|
2024-01-28 12:59:43 +01:00
|
|
|
useVulkan
|
2023-12-29 15:42:26 +01:00
|
|
|
;
|
|
|
|
|
|
|
|
shell = mkShell {
|
|
|
|
name = "shell-${finalAttrs.finalPackage.name}";
|
|
|
|
description = "contains numpy and sentencepiece";
|
|
|
|
buildInputs = [ llama-python ];
|
|
|
|
inputsFrom = [ finalAttrs.finalPackage ];
|
2024-01-22 11:17:05 +01:00
|
|
|
shellHook = ''
|
2024-01-23 00:44:10 +01:00
|
|
|
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
|
2024-01-22 11:17:05 +01:00
|
|
|
'';
|
2023-12-29 15:42:26 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
shell-extra = mkShell {
|
|
|
|
name = "shell-extra-${finalAttrs.finalPackage.name}";
|
|
|
|
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
|
|
|
|
buildInputs = [ llama-python-extra ];
|
|
|
|
inputsFrom = [ finalAttrs.finalPackage ];
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
meta = {
|
|
|
|
# Configurations we don't want even the CI to evaluate. Results in the
|
|
|
|
# "unsupported platform" messages. This is mostly a no-op, because
|
|
|
|
# cudaPackages would've refused to evaluate anyway.
|
2024-02-10 22:18:33 +01:00
|
|
|
badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
|
2023-12-29 15:42:26 +01:00
|
|
|
|
|
|
|
# Configurations that are known to result in build failures. Can be
|
|
|
|
# overridden by importing Nixpkgs with `allowBroken = true`.
|
2024-02-10 22:18:33 +01:00
|
|
|
broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
2023-12-29 15:42:26 +01:00
|
|
|
|
|
|
|
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
|
|
|
homepage = "https://github.com/ggerganov/llama.cpp/";
|
|
|
|
license = lib.licenses.mit;
|
|
|
|
|
|
|
|
# Accommodates `nix run` and `lib.getExe`
|
2024-06-10 16:34:14 +02:00
|
|
|
mainProgram = "llama-cli";
|
2023-12-29 15:42:26 +01:00
|
|
|
|
|
|
|
# These people might respond, on the best effort basis, if you ping them
|
|
|
|
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
|
|
|
# Consider adding yourself to this list if you want to ensure this flake
|
|
|
|
# stays maintained and you're willing to invest your time. Do not add
|
|
|
|
# other people without their consent. Consider removing people after
|
|
|
|
# they've been unreachable for long periods of time.
|
|
|
|
|
|
|
|
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
|
|
|
# an attrset following the same format as in
|
|
|
|
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
|
|
|
maintainers = with lib.maintainers; [
|
|
|
|
philiptaron
|
|
|
|
SomeoneSerge
|
|
|
|
];
|
|
|
|
|
|
|
|
# Extend `badPlatforms` instead
|
|
|
|
platforms = lib.platforms.all;
|
|
|
|
};
|
|
|
|
}
|
|
|
|
)
|