From d462837a550929358879834371edc46c59900569 Mon Sep 17 00:00:00 2001 From: Doloro1978 Date: Mon, 20 Apr 2026 20:36:56 +0100 Subject: [PATCH] added: llama-cpp --- pkgs/by-name/ll/llama-cpp-hexagon/package.nix | 198 ++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 pkgs/by-name/ll/llama-cpp-hexagon/package.nix diff --git a/pkgs/by-name/ll/llama-cpp-hexagon/package.nix b/pkgs/by-name/ll/llama-cpp-hexagon/package.nix new file mode 100644 index 0000000..b883acc --- /dev/null +++ b/pkgs/by-name/ll/llama-cpp-hexagon/package.nix @@ -0,0 +1,198 @@ +{ + lib, + autoAddDriverRunpath, + cmake, + fetchFromGitHub, + nix-update-script, + stdenv, + + config, + cudaSupport ? config.cudaSupport, + cudaPackages ? { }, + + rocmSupport ? config.rocmSupport, + rocmPackages ? { }, + rocmGpuTargets ? rocmPackages.clr.localGpuTargets or rocmPackages.clr.gpuTargets, + + openclSupport ? false, + clblast, + + blasSupport ? builtins.all (x: !x) [ + cudaSupport + metalSupport + openclSupport + rocmSupport + vulkanSupport + ], + blas, + + pkg-config, + metalSupport ? stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64 && !openclSupport, + vulkanSupport ? false, + rpcSupport ? false, + hexagonSupport ? false, + curl, + llama-cpp, + shaderc, + vulkan-headers, + vulkan-loader, + hexagon-sdk, + ninja, +}: + +let + # It's necessary to consistently use backendStdenv when building with CUDA support, + # otherwise we get libstdc++ errors downstream. + # cuda imposes an upper bound on the gcc version + effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv; + inherit (lib) + cmakeBool + cmakeFeature + optionals + optionalString + ; + + cudaBuildInputs = with cudaPackages; [ + cuda_cccl # + + # A temporary hack for reducing the closure size, remove once cudaPackages + # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792 + cuda_cudart + libcublas + ]; + + rocmBuildInputs = with rocmPackages; [ + clr + hipblas + rocblas + ]; + + vulkanBuildInputs = [ + shaderc + vulkan-headers + vulkan-loader + ]; +in +effectiveStdenv.mkDerivation (finalAttrs: { + pname = "llama-cpp"; + version = "6981"; + + src = fetchFromGitHub { + owner = "ggml-org"; + repo = "llama.cpp"; + tag = "b${finalAttrs.version}"; + hash = "sha256-0WtiHDlMeb+m2XcMwkPFY1mtwVTwRJUoxQSwzpiRbts="; + leaveDotGit = true; + postFetch = '' + git -C "$out" rev-parse --short HEAD > $out/COMMIT + find "$out" -name .git -print0 | xargs -0 rm -rf + ''; + }; + + nativeBuildInputs = [ + cmake + ninja + pkg-config + ] + ++ optionals cudaSupport [ + cudaPackages.cuda_nvcc + autoAddDriverRunpath + ]; + + buildInputs = + optionals cudaSupport cudaBuildInputs + ++ optionals openclSupport [ clblast ] + ++ optionals rocmSupport rocmBuildInputs + ++ optionals blasSupport [ blas ] + ++ optionals vulkanSupport vulkanBuildInputs + ++ [ curl ]; + + preConfigure = '' + prependToVar cmakeFlags "-DLLAMA_BUILD_COMMIT:STRING=$(cat COMMIT)" + ''; + + cmakeFlags = [ + # -march=native is non-deterministic; override with platform-specific flags if needed + (cmakeBool "GGML_NATIVE" false) + (cmakeBool "LLAMA_BUILD_EXAMPLES" false) + (cmakeBool "LLAMA_BUILD_SERVER" true) + (cmakeBool "LLAMA_BUILD_TESTS" (finalAttrs.finalPackage.doCheck or false)) + (cmakeBool "LLAMA_CURL" true) + (cmakeBool "BUILD_SHARED_LIBS" true) + (cmakeBool "GGML_BLAS" blasSupport) + (cmakeBool "GGML_CLBLAST" openclSupport) + (cmakeBool "GGML_CUDA" cudaSupport) + (cmakeBool "GGML_HIP" rocmSupport) + (cmakeBool "GGML_METAL" metalSupport) + (cmakeBool "GGML_RPC" rpcSupport) + (cmakeBool "GGML_VULKAN" vulkanSupport) + (cmakeBool "GGML_HEXAGON" hexagonSupport) + (cmakeFeature "LLAMA_BUILD_NUMBER" finalAttrs.version) + ] + ++ optionals cudaSupport [ + (cmakeFeature "CMAKE_CUDA_ARCHITECTURES" cudaPackages.flags.cmakeCudaArchitecturesString) + ] + ++ optionals rocmSupport [ + (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.clr.hipClangPath}/clang++") + (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmGpuTargets)) + ] + ++ optionals metalSupport [ + (cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") + (cmakeBool "LLAMA_METAL_EMBED_LIBRARY" true) + ] + ++ optionals rpcSupport [ + # This is done so we can move rpc-server out of bin because llama.cpp doesn't + # install rpc-server in their install target. + (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) + ] + ++ optionals hexagonSupport [ + (cmakeFeature "GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE" "128") + (cmakeFeature "HEXAGON_SDK_ROOT" "${hexagon-sdk}/opt/hexagon") + (cmakeFeature "HEXAGON_TOOLS_ROOT" "${hexagon-sdk}/opt/hexagon/tools/HEXAGON_Tools/19.0.04") + ]; + + # upstream plans on adding targets at the cmakelevel, remove those + # additional steps after that + postInstall = '' + # Match previous binary name for this package + ln -sf $out/bin/llama-cli $out/bin/llama + + mkdir -p $out/include + cp $src/include/llama.h $out/include/ + '' + + optionalString rpcSupport "cp bin/rpc-server $out/bin/llama-rpc-server"; + + # the tests are failing as of 2025-08 + doCheck = false; + + passthru = { + tests = lib.optionalAttrs stdenv.hostPlatform.isDarwin { + metal = llama-cpp.override { metalSupport = true; }; + }; + updateScript = nix-update-script { + attrPath = "llama-cpp"; + extraArgs = [ + "--version-regex" + "b(.*)" + ]; + }; + }; + + meta = { + description = "Inference of Meta's LLaMA model (and others) in pure C/C++"; + homepage = "https://github.com/ggml-org/llama.cpp"; + license = lib.licenses.mit; + mainProgram = "llama"; + maintainers = with lib.maintainers; [ + booxter + dit7ya + philiptaron + xddxdd + ]; + platforms = lib.platforms.unix; + badPlatforms = optionals (cudaSupport || openclSupport) lib.platforms.darwin ( + hexagonSupport && !effectiveStdenv.hostPlatform.aarch64-linux + ); + broken = metalSupport && !effectiveStdenv.hostPlatform.isDarwin; + }; +})