renamed: llama-cpp-hexagon -> llama-cpp

2026-04-20 20:37:53 +01:00
parent d462837a55
commit 1228969d80
1 changed files with 0 additions and 0 deletions
@@ -0,0 +1,198 @@
+{
+  lib,
+  autoAddDriverRunpath,
+  cmake,
+  fetchFromGitHub,
+  nix-update-script,
+  stdenv,
+
+  config,
+  cudaSupport ? config.cudaSupport,
+  cudaPackages ? { },
+
+  rocmSupport ? config.rocmSupport,
+  rocmPackages ? { },
+  rocmGpuTargets ? rocmPackages.clr.localGpuTargets or rocmPackages.clr.gpuTargets,
+
+  openclSupport ? false,
+  clblast,
+
+  blasSupport ? builtins.all (x: !x) [
+    cudaSupport
+    metalSupport
+    openclSupport
+    rocmSupport
+    vulkanSupport
+  ],
+  blas,
+
+  pkg-config,
+  metalSupport ? stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64 && !openclSupport,
+  vulkanSupport ? false,
+  rpcSupport ? false,
+  hexagonSupport ? false,
+  curl,
+  llama-cpp,
+  shaderc,
+  vulkan-headers,
+  vulkan-loader,
+  hexagon-sdk,
+  ninja,
+}:
+
+let
+  # It's necessary to consistently use backendStdenv when building with CUDA support,
+  # otherwise we get libstdc++ errors downstream.
+  # cuda imposes an upper bound on the gcc version
+  effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv;
+  inherit (lib)
+    cmakeBool
+    cmakeFeature
+    optionals
+    optionalString
+    ;
+
+  cudaBuildInputs = with cudaPackages; [
+    cuda_cccl # <nv/target>
+
+    # A temporary hack for reducing the closure size, remove once cudaPackages
+    # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
+    cuda_cudart
+    libcublas
+  ];
+
+  rocmBuildInputs = with rocmPackages; [
+    clr
+    hipblas
+    rocblas
+  ];
+
+  vulkanBuildInputs = [
+    shaderc
+    vulkan-headers
+    vulkan-loader
+  ];
+in
+effectiveStdenv.mkDerivation (finalAttrs: {
+  pname = "llama-cpp";
+  version = "6981";
+
+  src = fetchFromGitHub {
+    owner = "ggml-org";
+    repo = "llama.cpp";
+    tag = "b${finalAttrs.version}";
+    hash = "sha256-0WtiHDlMeb+m2XcMwkPFY1mtwVTwRJUoxQSwzpiRbts=";
+    leaveDotGit = true;
+    postFetch = ''
+      git -C "$out" rev-parse --short HEAD > $out/COMMIT
+      find "$out" -name .git -print0 | xargs -0 rm -rf
+    '';
+  };
+
+  nativeBuildInputs = [
+    cmake
+    ninja
+    pkg-config
+  ]
+  ++ optionals cudaSupport [
+    cudaPackages.cuda_nvcc
+    autoAddDriverRunpath
+  ];
+
+  buildInputs =
+    optionals cudaSupport cudaBuildInputs
+    ++ optionals openclSupport [ clblast ]
+    ++ optionals rocmSupport rocmBuildInputs
+    ++ optionals blasSupport [ blas ]
+    ++ optionals vulkanSupport vulkanBuildInputs
+    ++ [ curl ];
+
+  preConfigure = ''
+    prependToVar cmakeFlags "-DLLAMA_BUILD_COMMIT:STRING=$(cat COMMIT)"
+  '';
+
+  cmakeFlags = [
+    # -march=native is non-deterministic; override with platform-specific flags if needed
+    (cmakeBool "GGML_NATIVE" false)
+    (cmakeBool "LLAMA_BUILD_EXAMPLES" false)
+    (cmakeBool "LLAMA_BUILD_SERVER" true)
+    (cmakeBool "LLAMA_BUILD_TESTS" (finalAttrs.finalPackage.doCheck or false))
+    (cmakeBool "LLAMA_CURL" true)
+    (cmakeBool "BUILD_SHARED_LIBS" true)
+    (cmakeBool "GGML_BLAS" blasSupport)
+    (cmakeBool "GGML_CLBLAST" openclSupport)
+    (cmakeBool "GGML_CUDA" cudaSupport)
+    (cmakeBool "GGML_HIP" rocmSupport)
+    (cmakeBool "GGML_METAL" metalSupport)
+    (cmakeBool "GGML_RPC" rpcSupport)
+    (cmakeBool "GGML_VULKAN" vulkanSupport)
+    (cmakeBool "GGML_HEXAGON" hexagonSupport)
+    (cmakeFeature "LLAMA_BUILD_NUMBER" finalAttrs.version)
+  ]
+  ++ optionals cudaSupport [
+    (cmakeFeature "CMAKE_CUDA_ARCHITECTURES" cudaPackages.flags.cmakeCudaArchitecturesString)
+  ]
+  ++ optionals rocmSupport [
+    (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.clr.hipClangPath}/clang++")
+    (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmGpuTargets))
+  ]
+  ++ optionals metalSupport [
+    (cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
+    (cmakeBool "LLAMA_METAL_EMBED_LIBRARY" true)
+  ]
+  ++ optionals rpcSupport [
+    # This is done so we can move rpc-server out of bin because llama.cpp doesn't
+    # install rpc-server in their install target.
+    (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
+  ]
+  ++ optionals hexagonSupport [
+    (cmakeFeature "GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE" "128")
+    (cmakeFeature "HEXAGON_SDK_ROOT" "${hexagon-sdk}/opt/hexagon")
+    (cmakeFeature "HEXAGON_TOOLS_ROOT" "${hexagon-sdk}/opt/hexagon/tools/HEXAGON_Tools/19.0.04")
+  ];
+
+  # upstream plans on adding targets at the cmakelevel, remove those
+  # additional steps after that
+  postInstall = ''
+    # Match previous binary name for this package
+    ln -sf $out/bin/llama-cli $out/bin/llama
+
+    mkdir -p $out/include
+    cp $src/include/llama.h $out/include/
+  ''
+  + optionalString rpcSupport "cp bin/rpc-server $out/bin/llama-rpc-server";
+
+  # the tests are failing as of 2025-08
+  doCheck = false;
+
+  passthru = {
+    tests = lib.optionalAttrs stdenv.hostPlatform.isDarwin {
+      metal = llama-cpp.override { metalSupport = true; };
+    };
+    updateScript = nix-update-script {
+      attrPath = "llama-cpp";
+      extraArgs = [
+        "--version-regex"
+        "b(.*)"
+      ];
+    };
+  };
+
+  meta = {
+    description = "Inference of Meta's LLaMA model (and others) in pure C/C++";
+    homepage = "https://github.com/ggml-org/llama.cpp";
+    license = lib.licenses.mit;
+    mainProgram = "llama";
+    maintainers = with lib.maintainers; [
+      booxter
+      dit7ya
+      philiptaron
+      xddxdd
+    ];
+    platforms = lib.platforms.unix;
+    badPlatforms = optionals (cudaSupport || openclSupport) lib.platforms.darwin (
+      hexagonSupport && !effectiveStdenv.hostPlatform.aarch64-linux
+    );
+    broken = metalSupport && !effectiveStdenv.hostPlatform.isDarwin;
+  };
+})