From d462837a550929358879834371edc46c59900569 Mon Sep 17 00:00:00 2001 From: Doloro1978 Date: Mon, 20 Apr 2026 20:36:56 +0100 Subject: [PATCH 1/4] added: llama-cpp --- pkgs/by-name/ll/llama-cpp-hexagon/package.nix | 198 ++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 pkgs/by-name/ll/llama-cpp-hexagon/package.nix diff --git a/pkgs/by-name/ll/llama-cpp-hexagon/package.nix b/pkgs/by-name/ll/llama-cpp-hexagon/package.nix new file mode 100644 index 0000000..b883acc --- /dev/null +++ b/pkgs/by-name/ll/llama-cpp-hexagon/package.nix @@ -0,0 +1,198 @@ +{ + lib, + autoAddDriverRunpath, + cmake, + fetchFromGitHub, + nix-update-script, + stdenv, + + config, + cudaSupport ? config.cudaSupport, + cudaPackages ? { }, + + rocmSupport ? config.rocmSupport, + rocmPackages ? { }, + rocmGpuTargets ? rocmPackages.clr.localGpuTargets or rocmPackages.clr.gpuTargets, + + openclSupport ? false, + clblast, + + blasSupport ? builtins.all (x: !x) [ + cudaSupport + metalSupport + openclSupport + rocmSupport + vulkanSupport + ], + blas, + + pkg-config, + metalSupport ? stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64 && !openclSupport, + vulkanSupport ? false, + rpcSupport ? false, + hexagonSupport ? false, + curl, + llama-cpp, + shaderc, + vulkan-headers, + vulkan-loader, + hexagon-sdk, + ninja, +}: + +let + # It's necessary to consistently use backendStdenv when building with CUDA support, + # otherwise we get libstdc++ errors downstream. + # cuda imposes an upper bound on the gcc version + effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv; + inherit (lib) + cmakeBool + cmakeFeature + optionals + optionalString + ; + + cudaBuildInputs = with cudaPackages; [ + cuda_cccl # + + # A temporary hack for reducing the closure size, remove once cudaPackages + # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792 + cuda_cudart + libcublas + ]; + + rocmBuildInputs = with rocmPackages; [ + clr + hipblas + rocblas + ]; + + vulkanBuildInputs = [ + shaderc + vulkan-headers + vulkan-loader + ]; +in +effectiveStdenv.mkDerivation (finalAttrs: { + pname = "llama-cpp"; + version = "6981"; + + src = fetchFromGitHub { + owner = "ggml-org"; + repo = "llama.cpp"; + tag = "b${finalAttrs.version}"; + hash = "sha256-0WtiHDlMeb+m2XcMwkPFY1mtwVTwRJUoxQSwzpiRbts="; + leaveDotGit = true; + postFetch = '' + git -C "$out" rev-parse --short HEAD > $out/COMMIT + find "$out" -name .git -print0 | xargs -0 rm -rf + ''; + }; + + nativeBuildInputs = [ + cmake + ninja + pkg-config + ] + ++ optionals cudaSupport [ + cudaPackages.cuda_nvcc + autoAddDriverRunpath + ]; + + buildInputs = + optionals cudaSupport cudaBuildInputs + ++ optionals openclSupport [ clblast ] + ++ optionals rocmSupport rocmBuildInputs + ++ optionals blasSupport [ blas ] + ++ optionals vulkanSupport vulkanBuildInputs + ++ [ curl ]; + + preConfigure = '' + prependToVar cmakeFlags "-DLLAMA_BUILD_COMMIT:STRING=$(cat COMMIT)" + ''; + + cmakeFlags = [ + # -march=native is non-deterministic; override with platform-specific flags if needed + (cmakeBool "GGML_NATIVE" false) + (cmakeBool "LLAMA_BUILD_EXAMPLES" false) + (cmakeBool "LLAMA_BUILD_SERVER" true) + (cmakeBool "LLAMA_BUILD_TESTS" (finalAttrs.finalPackage.doCheck or false)) + (cmakeBool "LLAMA_CURL" true) + (cmakeBool "BUILD_SHARED_LIBS" true) + (cmakeBool "GGML_BLAS" blasSupport) + (cmakeBool "GGML_CLBLAST" openclSupport) + (cmakeBool "GGML_CUDA" cudaSupport) + (cmakeBool "GGML_HIP" rocmSupport) + (cmakeBool "GGML_METAL" metalSupport) + (cmakeBool "GGML_RPC" rpcSupport) + (cmakeBool "GGML_VULKAN" vulkanSupport) + (cmakeBool "GGML_HEXAGON" hexagonSupport) + (cmakeFeature "LLAMA_BUILD_NUMBER" finalAttrs.version) + ] + ++ optionals cudaSupport [ + (cmakeFeature "CMAKE_CUDA_ARCHITECTURES" cudaPackages.flags.cmakeCudaArchitecturesString) + ] + ++ optionals rocmSupport [ + (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.clr.hipClangPath}/clang++") + (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmGpuTargets)) + ] + ++ optionals metalSupport [ + (cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") + (cmakeBool "LLAMA_METAL_EMBED_LIBRARY" true) + ] + ++ optionals rpcSupport [ + # This is done so we can move rpc-server out of bin because llama.cpp doesn't + # install rpc-server in their install target. + (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) + ] + ++ optionals hexagonSupport [ + (cmakeFeature "GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE" "128") + (cmakeFeature "HEXAGON_SDK_ROOT" "${hexagon-sdk}/opt/hexagon") + (cmakeFeature "HEXAGON_TOOLS_ROOT" "${hexagon-sdk}/opt/hexagon/tools/HEXAGON_Tools/19.0.04") + ]; + + # upstream plans on adding targets at the cmakelevel, remove those + # additional steps after that + postInstall = '' + # Match previous binary name for this package + ln -sf $out/bin/llama-cli $out/bin/llama + + mkdir -p $out/include + cp $src/include/llama.h $out/include/ + '' + + optionalString rpcSupport "cp bin/rpc-server $out/bin/llama-rpc-server"; + + # the tests are failing as of 2025-08 + doCheck = false; + + passthru = { + tests = lib.optionalAttrs stdenv.hostPlatform.isDarwin { + metal = llama-cpp.override { metalSupport = true; }; + }; + updateScript = nix-update-script { + attrPath = "llama-cpp"; + extraArgs = [ + "--version-regex" + "b(.*)" + ]; + }; + }; + + meta = { + description = "Inference of Meta's LLaMA model (and others) in pure C/C++"; + homepage = "https://github.com/ggml-org/llama.cpp"; + license = lib.licenses.mit; + mainProgram = "llama"; + maintainers = with lib.maintainers; [ + booxter + dit7ya + philiptaron + xddxdd + ]; + platforms = lib.platforms.unix; + badPlatforms = optionals (cudaSupport || openclSupport) lib.platforms.darwin ( + hexagonSupport && !effectiveStdenv.hostPlatform.aarch64-linux + ); + broken = metalSupport && !effectiveStdenv.hostPlatform.isDarwin; + }; +}) -- 2.53.0 From 1228969d8061cf8ead0785ea79e43f422585f52e Mon Sep 17 00:00:00 2001 From: Doloro1978 Date: Mon, 20 Apr 2026 20:37:53 +0100 Subject: [PATCH 2/4] renamed: llama-cpp-hexagon -> llama-cpp --- pkgs/by-name/ll/{llama-cpp-hexagon => llama-cpp}/package.nix | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pkgs/by-name/ll/{llama-cpp-hexagon => llama-cpp}/package.nix (100%) diff --git a/pkgs/by-name/ll/llama-cpp-hexagon/package.nix b/pkgs/by-name/ll/llama-cpp/package.nix similarity index 100% rename from pkgs/by-name/ll/llama-cpp-hexagon/package.nix rename to pkgs/by-name/ll/llama-cpp/package.nix -- 2.53.0 From 34974204db9e82fb2c7c64a03d29c87c36a3c189 Mon Sep 17 00:00:00 2001 From: Doloro1978 Date: Tue, 21 Apr 2026 17:38:09 +0100 Subject: [PATCH 3/4] wip: llama-cpp with hexagon --- pkgs/by-name/ll/llama-cpp-hexagon/package.nix | 3 +++ pkgs/by-name/ll/llama-cpp/package.nix | 23 ++++++++++++------- 2 files changed, 18 insertions(+), 8 deletions(-) create mode 100644 pkgs/by-name/ll/llama-cpp-hexagon/package.nix diff --git a/pkgs/by-name/ll/llama-cpp-hexagon/package.nix b/pkgs/by-name/ll/llama-cpp-hexagon/package.nix new file mode 100644 index 0000000..d6cff05 --- /dev/null +++ b/pkgs/by-name/ll/llama-cpp-hexagon/package.nix @@ -0,0 +1,3 @@ +{ llama-cpp }: + +llama-cpp.override { hexagonSupport = true; } diff --git a/pkgs/by-name/ll/llama-cpp/package.nix b/pkgs/by-name/ll/llama-cpp/package.nix index b883acc..41a1886 100644 --- a/pkgs/by-name/ll/llama-cpp/package.nix +++ b/pkgs/by-name/ll/llama-cpp/package.nix @@ -1,5 +1,6 @@ { lib, + pkgs, autoAddDriverRunpath, cmake, fetchFromGitHub, @@ -36,7 +37,6 @@ shaderc, vulkan-headers, vulkan-loader, - hexagon-sdk, ninja, }: @@ -44,7 +44,14 @@ let # It's necessary to consistently use backendStdenv when building with CUDA support, # otherwise we get libstdc++ errors downstream. # cuda imposes an upper bound on the gcc version - effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv; + buildPkgs = import pkgs.path { + system = "x86_64-linux"; # builder uses x86_64 + }; + + # hexagon needs a x86 build env + crossPkgs = buildPkgs.pkgsCross.aarch64-multiplatform; + + effectiveStdenv = if hexagonSupport then crossPkgs.stdenv else stdenv; inherit (lib) cmakeBool cmakeFeature @@ -147,8 +154,8 @@ effectiveStdenv.mkDerivation (finalAttrs: { ] ++ optionals hexagonSupport [ (cmakeFeature "GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE" "128") - (cmakeFeature "HEXAGON_SDK_ROOT" "${hexagon-sdk}/opt/hexagon") - (cmakeFeature "HEXAGON_TOOLS_ROOT" "${hexagon-sdk}/opt/hexagon/tools/HEXAGON_Tools/19.0.04") + (cmakeFeature "HEXAGON_SDK_ROOT" "${pkgs.hexagon-sdk}/opt/hexagon") + (cmakeFeature "HEXAGON_TOOLS_ROOT" "${pkgs.hexagon-sdk}/opt/hexagon/tools/HEXAGON_Tools/19.0.04") ]; # upstream plans on adding targets at the cmakelevel, remove those @@ -190,9 +197,9 @@ effectiveStdenv.mkDerivation (finalAttrs: { xddxdd ]; platforms = lib.platforms.unix; - badPlatforms = optionals (cudaSupport || openclSupport) lib.platforms.darwin ( - hexagonSupport && !effectiveStdenv.hostPlatform.aarch64-linux - ); - broken = metalSupport && !effectiveStdenv.hostPlatform.isDarwin; + badPlatforms = optionals (cudaSupport || openclSupport) lib.platforms.darwin; + broken = + (metalSupport && !effectiveStdenv.hostPlatform.isDarwin) + || (hexagonSupport && !effectiveStdenv.hostPlatform.isAarch64); }; }) -- 2.53.0 From 0188aff399f5c80a28df657c8c4b5495ed7b15e0 Mon Sep 17 00:00:00 2001 From: Doloro1978 Date: Tue, 21 Apr 2026 23:10:14 +0100 Subject: [PATCH 4/4] fix: it builds now --- pkgs/by-name/ll/llama-cpp-hexagon/package.nix | 5 +++- pkgs/by-name/ll/llama-cpp/package.nix | 28 +++++++++++++++---- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/pkgs/by-name/ll/llama-cpp-hexagon/package.nix b/pkgs/by-name/ll/llama-cpp-hexagon/package.nix index d6cff05..8418479 100644 --- a/pkgs/by-name/ll/llama-cpp-hexagon/package.nix +++ b/pkgs/by-name/ll/llama-cpp-hexagon/package.nix @@ -1,3 +1,6 @@ { llama-cpp }: -llama-cpp.override { hexagonSupport = true; } +llama-cpp.override { + hexagonSupport = true; + blasSupport = false; +} diff --git a/pkgs/by-name/ll/llama-cpp/package.nix b/pkgs/by-name/ll/llama-cpp/package.nix index 41a1886..77769f7 100644 --- a/pkgs/by-name/ll/llama-cpp/package.nix +++ b/pkgs/by-name/ll/llama-cpp/package.nix @@ -36,6 +36,7 @@ llama-cpp, shaderc, vulkan-headers, + hexagon-sdk, vulkan-loader, ninja, }: @@ -79,16 +80,20 @@ let vulkan-headers vulkan-loader ]; + + hexagonBuildInputs = [ + hexagon-sdk + ]; in effectiveStdenv.mkDerivation (finalAttrs: { pname = "llama-cpp"; - version = "6981"; + version = "8871"; src = fetchFromGitHub { owner = "ggml-org"; repo = "llama.cpp"; tag = "b${finalAttrs.version}"; - hash = "sha256-0WtiHDlMeb+m2XcMwkPFY1mtwVTwRJUoxQSwzpiRbts="; + hash = "sha256-dSMomkkG3YFwXAcYTym6Z03u8ZAWFFio8jdQJPMJ/yg="; leaveDotGit = true; postFetch = '' git -C "$out" rev-parse --short HEAD > $out/COMMIT @@ -98,8 +103,10 @@ effectiveStdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake + pkgs.clang ninja pkg-config + blas ] ++ optionals cudaSupport [ cudaPackages.cuda_nvcc @@ -112,7 +119,10 @@ effectiveStdenv.mkDerivation (finalAttrs: { ++ optionals rocmSupport rocmBuildInputs ++ optionals blasSupport [ blas ] ++ optionals vulkanSupport vulkanBuildInputs - ++ [ curl ]; + ++ optionals hexagonSupport hexagonBuildInputs + ++ [ + curl + ]; preConfigure = '' prependToVar cmakeFlags "-DLLAMA_BUILD_COMMIT:STRING=$(cat COMMIT)" @@ -153,9 +163,17 @@ effectiveStdenv.mkDerivation (finalAttrs: { (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) ] ++ optionals hexagonSupport [ + # (cmakeFeature "CMAKE_TOOLCHAIN_FILE" "${finalAttrs.src}/cmake/arm64-linux-clang.cmake") + (cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") + (cmakeFeature "CMAKE_CXX_FLAGS" "") + (cmakeBool "GGML_OPENMP" false) + (cmakeBool "GGML_LLAMAFILE" false) + (cmakeFeature "GGML_OPENCL" "OFF") + (cmakeFeature "PREBUILT_LIB_DIR" "linux_aarch64") (cmakeFeature "GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE" "128") - (cmakeFeature "HEXAGON_SDK_ROOT" "${pkgs.hexagon-sdk}/opt/hexagon") - (cmakeFeature "HEXAGON_TOOLS_ROOT" "${pkgs.hexagon-sdk}/opt/hexagon/tools/HEXAGON_Tools/19.0.04") + (cmakeFeature "HEXAGON_SDK_ROOT" "${hexagon-sdk}/opt") + (cmakeFeature "HEXAGON_TOOLS_ROOT" "${hexagon-sdk}/opt/tools/HEXAGON_Tools/19.0.04") + (cmakeFeature "LLAMA_OPENSSL" "OFF") ]; # upstream plans on adding targets at the cmakelevel, remove those -- 2.53.0