commit 14f20562275e48eae66fb76b61a05f462e1e2d13 Author: Nikkuss Date: Fri May 30 21:52:25 2025 +0400 Initial commit diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..aafe6f4 --- /dev/null +++ b/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1748370509, + "narHash": "sha256-QlL8slIgc16W5UaI3w7xHQEP+Qmv/6vSNTpoZrrSlbk=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "4faa5f5321320e49a78ae7848582f684d64783e9", + "type": "github" + }, + "original": { + "owner": "nixos", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..1cfe76f --- /dev/null +++ b/flake.nix @@ -0,0 +1,45 @@ +{ + description = "A very basic flake"; + + inputs = { + nixpkgs.url = "github:nixos/nixpkgs?ref=nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = + { + self, + nixpkgs, + flake-utils, + ... + }: + flake-utils.lib.eachDefaultSystem ( + system: + let + pkgs = import nixpkgs { + inherit system; + # overlays = [ + # (final: prev: { + # rocmPackages_6 = final.callPackage ./pkgs/rocm-modules { }; + # }) + # ]; + }; + in + { + packages = pkgs.callPackage ./pkgs/rocm-modules { }; + overlays.default = ( + final: prev: { + rocmPackages = final.callPackage ./pkgs/rocm-modules { }; + rocmPackages_6 = final.rocmPackages; + } + ); + # build all pkgs as check + # defaultPackage = self.packages.${system}.rocm-modules; + # + # devShells.default = pkgs.mkShell { + # buildInputs = [ self.packages.${system}.rocm-modules ]; + # }; + + } + ); +} diff --git a/pkgs/rocm-modules/0001-Compile-transupp.c-as-part-of-the-library.patch b/pkgs/rocm-modules/0001-Compile-transupp.c-as-part-of-the-library.patch new file mode 100644 index 0000000..0a09a88 --- /dev/null +++ b/pkgs/rocm-modules/0001-Compile-transupp.c-as-part-of-the-library.patch @@ -0,0 +1,104 @@ +From 4a0584f7c05641143151ebdc1be1163bebf9d35d Mon Sep 17 00:00:00 2001 +From: Las +Date: Sun, 3 Jan 2021 18:35:37 +0000 +Subject: [PATCH] Compile transupp.c as part of the library + +The exported symbols are made weak to not conflict with users +of the library that already vendor this functionality. +--- + CMakeLists.txt | 4 ++-- + transupp.c | 14 +++++++------- + 2 files changed, 9 insertions(+), 9 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 0ca6f98..a9a0fae 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -533,7 +533,7 @@ set(JPEG_SOURCES jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c + jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c jdicc.c jdinput.c + jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c jdpostct.c jdsample.c + jdtrans.c jerror.c jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c +- jidctint.c jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c) ++ jidctint.c jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c transupp.c) + + if(WITH_ARITH_ENC OR WITH_ARITH_DEC) + set(JPEG_SOURCES ${JPEG_SOURCES} jaricom.c) +@@ -1489,7 +1489,7 @@ install(EXPORT ${CMAKE_PROJECT_NAME}Targets + + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/jconfig.h + ${CMAKE_CURRENT_SOURCE_DIR}/jerror.h ${CMAKE_CURRENT_SOURCE_DIR}/jmorecfg.h +- ${CMAKE_CURRENT_SOURCE_DIR}/jpeglib.h ++ ${CMAKE_CURRENT_SOURCE_DIR}/jpeglib.h ${CMAKE_CURRENT_SOURCE_DIR}/transupp.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + + include(cmakescripts/BuildPackages.cmake) +diff --git a/transupp.c b/transupp.c +index 6e86077..2da49a7 100644 +--- a/transupp.c ++++ b/transupp.c +@@ -1386,7 +1386,7 @@ jt_read_integer(const char **strptr, JDIMENSION *result) + * This code is loosely based on XParseGeometry from the X11 distribution. + */ + +-GLOBAL(boolean) ++GLOBAL(boolean) __attribute__((weak)) + jtransform_parse_crop_spec(jpeg_transform_info *info, const char *spec) + { + info->crop = FALSE; +@@ -1486,7 +1486,7 @@ trim_bottom_edge(jpeg_transform_info *info, JDIMENSION full_height) + * and transformation is not perfect. Otherwise returns TRUE. + */ + +-GLOBAL(boolean) ++GLOBAL(boolean) __attribute__((weak)) + jtransform_request_workspace(j_decompress_ptr srcinfo, + jpeg_transform_info *info) + { +@@ -2033,7 +2033,7 @@ adjust_exif_parameters(JOCTET *data, unsigned int length, JDIMENSION new_width, + * to jpeg_write_coefficients(). + */ + +-GLOBAL(jvirt_barray_ptr *) ++GLOBAL(jvirt_barray_ptr *) __attribute__((weak)) + jtransform_adjust_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + jvirt_barray_ptr *src_coef_arrays, + jpeg_transform_info *info) +@@ -2152,7 +2152,7 @@ jtransform_adjust_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + * Note that some transformations will modify the source data arrays! + */ + +-GLOBAL(void) ++GLOBAL(void) __attribute__((weak)) + jtransform_execute_transform(j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + jvirt_barray_ptr *src_coef_arrays, + jpeg_transform_info *info) +@@ -2264,7 +2264,7 @@ jtransform_execute_transform(j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + * (may use custom action then) + */ + +-GLOBAL(boolean) ++GLOBAL(boolean) __attribute__((weak)) + jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height, + int MCU_width, int MCU_height, + JXFORM_CODE transform) +@@ -2303,7 +2303,7 @@ jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height, + * This must be called before jpeg_read_header() to have the desired effect. + */ + +-GLOBAL(void) ++GLOBAL(void) __attribute__((weak)) + jcopy_markers_setup(j_decompress_ptr srcinfo, JCOPY_OPTION option) + { + #ifdef SAVE_MARKERS_SUPPORTED +@@ -2331,7 +2331,7 @@ jcopy_markers_setup(j_decompress_ptr srcinfo, JCOPY_OPTION option) + * JFIF APP0 or Adobe APP14 markers if selected. + */ + +-GLOBAL(void) ++GLOBAL(void) __attribute__((weak)) + jcopy_markers_execute(j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JCOPY_OPTION option) + { +-- +2.29.2 + diff --git a/pkgs/rocm-modules/amdsmi/default.nix b/pkgs/rocm-modules/amdsmi/default.nix new file mode 100644 index 0000000..bd1c345 --- /dev/null +++ b/pkgs/rocm-modules/amdsmi/default.nix @@ -0,0 +1,90 @@ +{ + lib, + stdenv, + fetchFromGitHub, + fetchpatch, + rocmUpdateScript, + cmake, + pkg-config, + libdrm, + wrapPython, + autoPatchelfHook, +}: + +let + esmi_ib_src = fetchFromGitHub { + owner = "amd"; + repo = "esmi_ib_library"; + rev = "esmi_pkg_ver-4.1.2"; + hash = "sha256-wj3krY/6AdmnoNOSqN9EE/Yxbx++0AW2vu7dovQrQ9I="; + }; +in +stdenv.mkDerivation (finalAttrs: { + pname = "amdsmi"; + version = "6.4.1"; + src = fetchFromGitHub { + owner = "rocm"; + repo = "amdsmi"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-K6FVvieJnOCfbyNNwOWs3t836ihtvELJvcCjXQoHuRY="; + }; + + postPatch = '' + substituteInPlace goamdsmi_shim/CMakeLists.txt \ + --replace-fail "amd_smi)" ${"'"}''${AMD_SMI_TARGET})' \ + --replace-fail 'target_link_libraries(''${GOAMDSMI_SHIM_TARGET} -L' '#' + + # Manually unpack esmi_ib_src and add amd_hsmp.h so execute-process git clone doesn't run + cp -rf --no-preserve=mode ${esmi_ib_src} ./esmi_ib_library + mkdir -p ./esmi_ib_library/include/asm + cp ./include/amd_smi/impl/amd_hsmp.h ./esmi_ib_library/include/asm/amd_hsmp.h + ''; + + patches = [ + # Fix ld.lld undefined reference: drmGetVersion + # (fetchpatch { + # url = "https://github.com/ROCm/amdsmi/commit/c3864bf6171970d86dc50fd23f06377736823997.patch"; + # hash = "sha256-zRG1tBD8sIQCWdKfCbXC/Z/6d6NTrRYvRpddPWdM4j8="; + # }) + ]; + + nativeBuildInputs = [ + cmake + pkg-config + wrapPython + autoPatchelfHook + ]; + + buildInputs = [ + libdrm + ]; + + cmakeFlags = [ + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ]; + + postInstall = '' + wrapPythonProgramsIn $out + rm $out/bin/amd-smi + ln -sf $out/libexec/amdsmi_cli/amdsmi_cli.py $out/bin/amd-smi + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "System management interface for AMD GPUs supported by ROCm"; + homepage = "https://github.com/ROCm/rocm_smi_lib"; + license = with licenses; [ mit ]; + maintainers = with maintainers; [ lovesegfault ]; + teams = [ teams.rocm ]; + platforms = [ "x86_64-linux" ]; + }; +}) diff --git a/pkgs/rocm-modules/aotriton/default.nix b/pkgs/rocm-modules/aotriton/default.nix new file mode 100644 index 0000000..a7e58f2 --- /dev/null +++ b/pkgs/rocm-modules/aotriton/default.nix @@ -0,0 +1,230 @@ +{ + lib, + stdenv, + fetchFromGitHub, + cmake, + rocm-cmake, + clr, + rocblas, + rocsolver, + gtest, + msgpack, + libxml2, + python3, + python3Packages, + openmp, + hipblas-common, + hipblas, + nlohmann_json, + triton-llvm, + rocmlir, + lapack-reference, + ninja, + ncurses, + libffi, + zlib, + zstd, + xz, + pkg-config, + buildTests ? false, + buildBenchmarks ? false, + buildSamples ? false, + gpuTargets ? [ + # aotriton GPU support list: + # https://github.com/ROCm/aotriton/blob/main/v2python/gpu_targets.py + "gfx90a" + "gfx942" + "gfx1100" + "gfx1101" + ], +}: + +stdenv.mkDerivation ( + finalAttrs: + let + py = python3.withPackages (ps: [ + ps.pyyaml + ps.distutils + ps.setuptools + ps.packaging + ps.numpy + ps.wheel + ps.filelock + ps.iniconfig + ps.pluggy + ps.pybind11 + ]); + gpuTargets' = lib.concatStringsSep ";" gpuTargets; + compiler = "amdclang++"; + cFlags = "-O3 -DNDEBUG"; + cxxFlags = "${cFlags} -Wno-c++11-narrowing"; + triton-llvm' = triton-llvm; + in + { + pname = "aotriton"; + version = "0.9.2b"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "aotriton"; + rev = "${finalAttrs.version}"; + hash = "sha256-1Cf0olD3zRg9JESD6s/WaGifm3kfD12VUvjTZHpmGAE="; + fetchSubmodules = true; + }; + env.CXX = compiler; + env.ROCM_PATH = "${clr}"; + requiredSystemFeatures = [ "big-parallel" ]; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ] + ++ lib.optionals buildSamples [ + "sample" + ]; + + # Need an empty cuda.h for this to compile + # Better than pulling in unfree cuda headers + postPatch = '' + touch third_party/triton/third_party/nvidia/include/cuda.h + ''; + + doCheck = false; + doInstallCheck = false; + + nativeBuildInputs = [ + cmake + rocm-cmake + pkg-config + py + clr + ninja + ]; + + buildInputs = + [ + rocblas + rocsolver + hipblas-common + hipblas + openmp + libffi + ncurses + xz + nlohmann_json + rocmlir + + msgpack + libxml2 + python3Packages.msgpack + zlib + zstd + ] + ++ lib.optionals buildTests [ + gtest + ] + ++ lib.optionals (buildTests || buildBenchmarks) [ + lapack-reference + ]; + + env.TRITON_OFFLINE_BUILD = 1; + env.LLVM_SYSPATH = "${triton-llvm'}"; + env.JSON_SYSPATH = nlohmann_json; + env.MLIR_DIR = "${triton-llvm'}/lib/cmake/mlir"; + env.CXXFLAGS = "-I/build/source/third_party/triton/third_party/nvidia/backend/include"; + + # Fix up header issues in triton: https://github.com/triton-lang/triton/pull/3985/files + preConfigure = '' + mkdir third_party/triton/third_party/nvidia/backend/include/ + touch third_party/triton/third_party/nvidia/backend/include/cuda.h + find third_party/triton -type f -exec sed -i 's|[<]cupti.h[>]|"cupti.h"|g' {} + + find third_party/triton -type f -exec sed -i 's|[<]cuda.h[>]|"cuda.h"|g' {} + + + sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' CMakeLists.txt + sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' CMakeLists.txt + sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt + sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt + substituteInPlace third_party/triton/python/setup.py \ + --replace-fail "from distutils.command.clean import clean" "import setuptools;from distutils.command.clean import clean" \ + --replace-fail 'system == "Linux"' 'False' + # Fix 'ld: error: unable to insert .comment after .comment' + substituteInPlace v2python/ld_script.py \ + --replace-fail 'INSERT AFTER .comment;' "" + + cmakeFlagsArray+=( + '-DCMAKE_C_FLAGS_RELEASE=${cFlags}' + '-DCMAKE_CXX_FLAGS_RELEASE=${cxxFlags}' + ) + prependToVar cmakeFlags "-GNinja" + mkdir -p /build/tmp-home + export HOME=/build/tmp-home + ''; + + # Excerpt from README: + # Note: do not run ninja separately, due to the limit of the current build system, + # ninja install will run the whole build process unconditionally. + dontBuild = true; + + installPhase = '' + runHook preInstall + ninja -v install + runHook postInstall + ''; + + cmakeFlags = + [ + "-Wno-dev" + "-DAOTRITON_NOIMAGE_MODE=ON" # FIXME: Should be able to build with object code but generate_shim is failing + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_VERBOSE_MAKEFILE=ON" + "-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}" + "-DCMAKE_CXX_COMPILER=${compiler}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DAMDGPU_TARGETS=${gpuTargets'}" + "-DGPU_TARGETS=${gpuTargets'}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_CLIENTS_TESTS=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_CLIENTS_BENCHMARKS=ON" + ] + ++ lib.optionals buildSamples [ + "-DBUILD_CLIENTS_SAMPLES=ON" + ]; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/hipblas-test $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/hipblas-bench $benchmark/bin + '' + + lib.optionalString buildSamples '' + mkdir -p $sample/bin + mv $out/bin/example-* $sample/bin + '' + + lib.optionalString (buildTests || buildBenchmarks || buildSamples) '' + rmdir $out/bin + ''; + meta = with lib; { + description = "ROCm Ahead of Time (AOT) Triton Math Library "; + homepage = "https://github.com/ROCm/aotriton"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; + } +) diff --git a/pkgs/rocm-modules/clr/cmake-find-x11-libgl.patch b/pkgs/rocm-modules/clr/cmake-find-x11-libgl.patch new file mode 100644 index 0000000..5a36a2c --- /dev/null +++ b/pkgs/rocm-modules/clr/cmake-find-x11-libgl.patch @@ -0,0 +1,40 @@ +diff --git a/rocclr/cmake/ROCclr.cmake b/rocclr/cmake/ROCclr.cmake +index 3f233b72f..67bdc62ee 100644 +--- a/rocclr/cmake/ROCclr.cmake ++++ b/rocclr/cmake/ROCclr.cmake +@@ -44,6 +44,19 @@ find_package(Threads REQUIRED) + + find_package(AMD_OPENCL) + ++# Find X11 package ++find_package(X11 REQUIRED) ++if(NOT X11_FOUND) ++ message(FATAL_ERROR "X11 libraries not found") ++endif() ++ ++# Find OpenGL package ++find_package(OpenGL REQUIRED) ++if(NOT OpenGL_FOUND) ++ message(FATAL_ERROR "OpenGL not found") ++endif() ++ ++ + add_library(rocclr STATIC) + + include(ROCclrCompilerOptions) +@@ -123,9 +136,14 @@ target_include_directories(rocclr PUBLIC + ${ROCCLR_SRC_DIR}/device + ${ROCCLR_SRC_DIR}/elf + ${ROCCLR_SRC_DIR}/include ++ ${X11_INCLUDE_DIR} ++ ${OPENGL_INCLUDE_DIR} + ${AMD_OPENCL_INCLUDE_DIRS}) + +-target_link_libraries(rocclr PUBLIC Threads::Threads) ++target_link_libraries(rocclr PUBLIC ++ Threads::Threads ++ ${X11_LIBRARIES} ++ ${OPENGL_LIBRARIES}) + # IPC on Windows is not supported + if(UNIX) + target_link_libraries(rocclr PUBLIC rt) diff --git a/pkgs/rocm-modules/clr/default.nix b/pkgs/rocm-modules/clr/default.nix new file mode 100644 index 0000000..3500c4b --- /dev/null +++ b/pkgs/rocm-modules/clr/default.nix @@ -0,0 +1,287 @@ +{ + lib, + stdenv, + callPackage, + fetchFromGitHub, + fetchpatch, + rocmUpdateScript, + makeWrapper, + cmake, + perl, + hip-common, + hipcc, + rocm-device-libs, + rocm-comgr, + rocm-runtime, + rocm-core, + roctracer, + rocminfo, + rocm-smi, + numactl, + libffi, + zstd, + zlib, + libGL, + libxml2, + libX11, + python3Packages, + rocm-merged-llvm, + khronos-ocl-icd-loader, + gcc-unwrapped, + writeShellScriptBin, + localGpuTargets ? null, +}: + +let + inherit (rocm-core) ROCM_LIBPATCH_VERSION; + hipClang = rocm-merged-llvm; + hipClangPath = "${hipClang}/bin"; + wrapperArgs = [ + "--prefix PATH : $out/bin" + "--prefix LD_LIBRARY_PATH : ${rocm-runtime}" + "--set HIP_PLATFORM amd" + "--set HIP_PATH $out" + "--set HIP_CLANG_PATH ${hipClangPath}" + "--set DEVICE_LIB_PATH ${rocm-device-libs}/amdgcn/bitcode" + "--set HSA_PATH ${rocm-runtime}" + "--set ROCM_PATH $out" + ]; + amdclang = writeShellScriptBin "amdclang" '' + exec clang "$@" + ''; + amdclangxx = writeShellScriptBin "amdclang++" '' + exec clang++ "$@" + ''; +in +stdenv.mkDerivation (finalAttrs: { + pname = "clr"; + version = "6.4.1"; + + outputs = [ + "out" + "icd" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "clr"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-MA9MS/HF3j5iqpFuatHQJZ+nNkcGgzCvpkiNO6CjoPg="; + }; + + nativeBuildInputs = [ + makeWrapper + cmake + perl + python3Packages.python + python3Packages.cppheaderparser + amdclang + amdclangxx + ]; + + buildInputs = [ + numactl + libGL + libxml2 + libX11 + khronos-ocl-icd-loader + hipClang + libffi + zstd + zlib + ]; + + propagatedBuildInputs = [ + rocm-core + rocm-device-libs + rocm-comgr + rocm-runtime + rocminfo + ]; + + cmakeFlags = [ + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_POLICY_DEFAULT_CMP0072=NEW" # Prefer newer OpenGL libraries + "-DCLR_BUILD_HIP=ON" + "-DCLR_BUILD_OCL=ON" + "-DHIP_COMMON_DIR=${hip-common}" + "-DHIPCC_BIN_DIR=${hipcc}/bin" + "-DHIP_PLATFORM=amd" + "-DPROF_API_HEADER_PATH=${roctracer.src}/inc/ext" + "-DROCM_PATH=${rocminfo}" + "-DBUILD_ICD=ON" + "-DHIP_ENABLE_ROCPROFILER_REGISTER=OFF" # circular dep - may need -minimal and -full builds? + "-DAMD_ICD_LIBRARY_DIR=${khronos-ocl-icd-loader}" + + # Temporarily set variables to work around upstream CMakeLists issue + # Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DCMAKE_INSTALL_LIBDIR=lib" + ]; + + env.LLVM_DIR = ""; + + patches = [ + ./cmake-find-x11-libgl.patch + + # (fetchpatch { + # # Fix handling of old fatbin version https://github.com/ROCm/clr/issues/99 + # sha256 = "sha256-CK/QwgWJQEruiG4DqetF9YM0VEWpSiUMxAf1gGdJkuA="; + # url = "https://src.fedoraproject.org/rpms/rocclr/raw/rawhide/f/0001-handle-v1-of-compressed-fatbins.patch"; + # }) + # (fetchpatch { + # # improve rocclr isa compatibility check + # sha256 = "sha256-wUrhpYN68AbEXeFU5f366C6peqHyq25kujJXY/bBJMs="; + # url = "https://github.com/GZGavinZhao/clr/commit/22c17a0ac09c6b77866febf366591f669a1ed133.patch"; + # }) + # (fetchpatch { + # # [PATCH] Improve hipamd compat check + # sha256 = "sha256-uZQ8rMrWH61CCbxwLqQGggDmXFmYTi6x8OcgYPrZRC8="; + # url = "https://github.com/GZGavinZhao/clr/commit/63c6ee630966744d4199fdfb854e98d2da9e1122.patch"; + # }) + # (fetchpatch { + # # [PATCH] SWDEV-504340 - Move cast of cl_mem inside the condition + # # Fixes crash due to UB in KernelBlitManager::setArgument + # sha256 = "sha256-nL4CZ7EOXqsTVUtYhuu9DLOMpnMeMRUhkhylEQLTg9I="; + # url = "https://github.com/ROCm/clr/commit/fa63919a6339ea2a61111981ba2362c97fbdf743.patch"; + # }) + # (fetchpatch { + # # [PATCH] SWDEV-507104 - Removes alignment requirement for Semaphore class to resolve runtime misaligned memory issues + # sha256 = "sha256-nStJ22B/CM0fzQTvYjbHDbQt0GlE8DXxVK+UDU9BAx4="; + # url = "https://github.com/ROCm/clr/commit/21d764518363d74187deaef2e66c1a127bc5aa64.patch"; + # }) + ]; + + postPatch = '' + patchShebangs hipamd/*.sh + patchShebangs hipamd/src + + # We're not on Windows so these are never installed to hipcc... + substituteInPlace hipamd/CMakeLists.txt \ + --replace-fail "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipcc.bat DESTINATION bin)" "" \ + --replace-fail "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipconfig.bat DESTINATION bin)" "" + + substituteInPlace hipamd/src/hip_embed_pch.sh \ + --replace-fail "\''$LLVM_DIR/bin/clang" "${hipClangPath}/clang" + + substituteInPlace opencl/khronos/icd/loader/icd_platform.h \ + --replace-fail '#define ICD_VENDOR_PATH "/etc/OpenCL/vendors/";' \ + '#define ICD_VENDOR_PATH "/run/opengl-driver/etc/OpenCL/vendors/";' + + # new unbundler has better error messages, defaulting it on + substituteInPlace rocclr/utils/flags.hpp \ + --replace-fail "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, false" "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, true" + ''; + + postInstall = '' + chmod +x $out/bin/* + patchShebangs $out/bin + + cp ${amdclang}/bin/* $out/bin/ + cp ${amdclangxx}/bin/* $out/bin/ + + for prog in hip{cc,config}{,.pl}; do + wrapProgram $out/bin/$prog ${lib.concatStringsSep " " wrapperArgs} + done + + mkdir -p $out/nix-support/ + echo ' + export HIP_PATH="${placeholder "out"}" + export HIP_PLATFORM=amd + export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode" + export NIX_CC_USE_RESPONSE_FILE=0 + export HIP_CLANG_PATH="${hipClangPath}" + export ROCM_LIBPATCH_VERSION="${ROCM_LIBPATCH_VERSION}" + export HSA_PATH="${rocm-runtime}"' > $out/nix-support/setup-hook + + # Just link rocminfo, it's easier + ln -s ${rocminfo}/bin/* $out/bin + ln -s ${rocm-core}/include/* $out/include/ + + # Replace rocm-opencl-icd functionality + mkdir -p $icd/etc/OpenCL/vendors + echo "$out/lib/libamdocl64.so" > $icd/etc/OpenCL/vendors/amdocl64.icd + + # add version info to output (downstream rocmPackages look for this) + ln -s ${rocm-core}/.info/ $out/.info + + ln -s ${hipClang} $out/llvm + ''; + + disallowedRequisites = [ + gcc-unwrapped + ]; + + passthru = + { + # All known and valid general GPU targets + # We cannot use this for each ROCm library, as each defines their own supported targets + # See: https://github.com/ROCm/ROCm/blob/77cbac4abab13046ee93d8b5bf410684caf91145/README.md#library-target-matrix + # Generic targets are not yet available in rocm-6.3.1 llvm + gpuTargets = lib.forEach [ + # "9-generic" + "900" # MI25, Vega 56/64 + "906" # MI50/60, Radeon VII + "908" # MI100 + "90a" # MI210 / MI250 + # "9-4-generic" + # 940/1 - never released publicly, maybe HPE cray specific MI3xx? + "942" # MI300 + # "10-1-generic" + "1010" + "1012" + # "10-3-generic" + "1030" # W6800, various Radeon cards + # "11-generic" + "1100" + "1101" + "1102" + "1201" + ] (target: "gfx${target}"); + + inherit hipClangPath; + + updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + page = "tags?per_page=4"; + }; + + impureTests = { + rocm-smi = callPackage ./test-rocm-smi.nix { + inherit rocm-smi; + clr = finalAttrs.finalPackage; + }; + opencl-example = callPackage ./test-opencl-example.nix { + clr = finalAttrs.finalPackage; + }; + }; + + selectGpuTargets = + { + supported ? [ ], + }: + supported; + gpuArchSuffix = ""; + } + // lib.optionalAttrs (localGpuTargets != null) { + inherit localGpuTargets; + gpuArchSuffix = "-" + (builtins.concatStringsSep "-" localGpuTargets); + selectGpuTargets = + { + supported ? [ ], + }: + if supported == [ ] then localGpuTargets else lib.lists.intersectLists localGpuTargets supported; + }; + + meta = with lib; { + description = "AMD Common Language Runtime for hipamd, opencl, and rocclr"; + homepage = "https://github.com/ROCm/clr"; + license = with licenses; [ mit ]; + maintainers = with maintainers; [ lovesegfault ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/clr/test-opencl-example.nix b/pkgs/rocm-modules/clr/test-opencl-example.nix new file mode 100644 index 0000000..90da7f0 --- /dev/null +++ b/pkgs/rocm-modules/clr/test-opencl-example.nix @@ -0,0 +1,88 @@ +{ + lib, + stdenv, + makeImpureTest, + fetchFromGitHub, + clr, + cmake, + pkg-config, + glew, + libglut, + opencl-headers, + ocl-icd, +}: + +let + + examples = stdenv.mkDerivation { + pname = "amd-app-samples"; + version = "2018-06-10"; + + src = fetchFromGitHub { + owner = "OpenCL"; + repo = "AMD_APP_samples"; + rev = "54da6ca465634e78fc51fc25edf5840467ee2411"; + hash = "sha256-qARQpUiYsamHbko/I1gPZE9pUGJ+3396Vk2n7ERSftA="; + }; + + nativeBuildInputs = [ + cmake + pkg-config + ]; + + buildInputs = [ + glew + libglut + opencl-headers + ocl-icd + ]; + + installPhase = '' + runHook preInstall + + mkdir -p $out/bin + # Example path is bin/x86_64/Release/cl/Reduction/Reduction + cp -r bin/*/*/*/*/* $out/bin/ + + runHook postInstall + ''; + + cmakeFlags = [ "-DBUILD_CPP_CL=OFF" ]; + + meta = with lib; { + description = "Samples from the AMD APP SDK (with OpenCRun support)"; + homepage = "https://github.com/OpenCL/AMD_APP_samples"; + license = licenses.bsd2; + platforms = platforms.linux; + teams = [ lib.teams.rocm ]; + }; + }; + +in +makeImpureTest { + name = "opencl-example"; + testedPackage = "rocmPackages_6.clr"; + + sandboxPaths = [ + "/sys" + "/dev/dri" + "/dev/kfd" + ]; + + nativeBuildInputs = [ examples ]; + + OCL_ICD_VENDORS = "${clr.icd}/etc/OpenCL/vendors"; + + testScript = '' + # Examples load resources from current directory + cd ${examples}/bin + echo OCL_ICD_VENDORS=$OCL_ICD_VENDORS + pwd + + HelloWorld | grep HelloWorld + ''; + + meta = with lib; { + teams = [ teams.rocm ]; + }; +} diff --git a/pkgs/rocm-modules/clr/test-rocm-smi.nix b/pkgs/rocm-modules/clr/test-rocm-smi.nix new file mode 100644 index 0000000..904c4ae --- /dev/null +++ b/pkgs/rocm-modules/clr/test-rocm-smi.nix @@ -0,0 +1,27 @@ +{ + lib, + makeImpureTest, + clinfo, + clr, + rocm-smi, +}: + +makeImpureTest { + name = "rocm-smi"; + testedPackage = "rocmPackages_6.clr"; + nativeBuildInputs = [ + clinfo + rocm-smi + ]; + OCL_ICD_VENDORS = "${clr.icd}/etc/OpenCL/vendors"; + + testScript = '' + # Test fails if the number of platforms is 0 + clinfo | grep -E 'Number of platforms * [1-9]' + rocm-smi | grep -A1 GPU + ''; + + meta = with lib; { + teams = [ teams.rocm ]; + }; +} diff --git a/pkgs/rocm-modules/composable_kernel/base.nix b/pkgs/rocm-modules/composable_kernel/base.nix new file mode 100644 index 0000000..d6f90c7 --- /dev/null +++ b/pkgs/rocm-modules/composable_kernel/base.nix @@ -0,0 +1,167 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + rocm-merged-llvm, + clr, + rocminfo, + hipify, + gitMinimal, + gtest, + zstd, + buildTests ? false, + buildExamples ? false, + gpuTargets ? ( + clr.localGpuTargets or [ + "gfx1010" + "gfx1012" + "gfx1030" + "gfx1100" + "gfx1101" + "gfx1102" + "gfx1201" + ] + ), +}: + +stdenv.mkDerivation (finalAttrs: { + preBuild = '' + echo "This derivation isn't intended to be built directly and only exists to be overridden and built in chunks"; + exit 1 + ''; + + pname = "composable_kernel_base"; + # Picked this version over 6.3 because much easier to get to build + # and it matches the version torch 2.6 wants + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildExamples [ + "example" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "composable_kernel"; + rev = "07339c738396ebeae57374771ded4dcf11bddf1e"; + hash = "sha256-EvEBxlOpQ71BF57VW79WBo/cdxAwTKFXFMiYKyGyyEs="; + }; + + nativeBuildInputs = [ + # Deliberately not using ninja + # because we're jankily composing build outputs from multiple drvs + # ninja won't believe they're up to date + gitMinimal + cmake + rocminfo + clr + hipify + zstd + ]; + + buildInputs = [ + rocm-cmake + clr + zstd + ]; + + strictDeps = true; + enableParallelBuilding = true; + env.ROCM_PATH = clr; + env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin"; + + cmakeFlags = + [ + "-DCMAKE_MODULE_PATH=${clr}/hip/cmake" + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_POLICY_DEFAULT_CMP0069=NEW" + # "-DDL_KERNELS=ON" # Not needed, slow to build + # CK_USE_CODEGEN Required for migraphx which uses device_gemm_multiple_d.hpp + # but migraphx requires an incompatible fork of CK and fails anyway + # "-DCK_USE_CODEGEN=ON" + # It might be worth skipping fp64 in future with this: + # "-DDTYPES=fp32;fp16;fp8;bf16;int8" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DBUILD_DEV=OFF" + "-DROCM_PATH=${clr}" + "-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}" + + # FP8 can build for 908/90a but very slow build + # and produces unusably slow kernels that are huge + "-DCK_USE_FP8_ON_UNSUPPORTED_ARCH=OFF" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + # We intentionally set GPU_ARCHS and not AMD/GPU_TARGETS + # per readme this is required if archs are dissimilar + # In rocm-6.3.x not setting any arch flag worked + # but setting dissimilar arches always failed + "-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildTests [ + "-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names + ]; + + # No flags to build selectively it seems... + postPatch = + # Reduce configure time by preventing thousands of clang-tidy targets being added + # We will never call them + # Never build profiler + '' + substituteInPlace library/src/utility/CMakeLists.txt library/src/tensor_operation_instance/gpu/CMakeLists.txt \ + --replace-fail clang_tidy_check '#clang_tidy_check' + substituteInPlace CMakeLists.txt \ + --replace-fail "add_subdirectory(profiler)" "" + '' + # Optionally remove tests + + lib.optionalString (!buildTests) '' + substituteInPlace CMakeLists.txt \ + --replace-fail "add_subdirectory(test)" "" + substituteInPlace codegen/CMakeLists.txt \ + --replace-fail "include(ROCMTest)" "" + '' + # Optionally remove examples + + lib.optionalString (!buildExamples) '' + substituteInPlace CMakeLists.txt \ + --replace-fail "add_subdirectory(example)" "" + ''; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/test_* $test/bin + '' + + lib.optionalString buildExamples '' + mkdir -p $example/bin + mv $out/bin/example_* $example/bin + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + passthru.anyGfx9Target = lib.lists.any (lib.strings.hasPrefix "gfx9") gpuTargets; + + meta = with lib; { + description = "Performance portable programming model for machine learning tensor operators"; + homepage = "https://github.com/ROCm/composable_kernel"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + broken = true; + }; +}) diff --git a/pkgs/rocm-modules/composable_kernel/base.nix.cmp b/pkgs/rocm-modules/composable_kernel/base.nix.cmp new file mode 100644 index 0000000..402216c --- /dev/null +++ b/pkgs/rocm-modules/composable_kernel/base.nix.cmp @@ -0,0 +1,161 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + rocm-merged-llvm, + clr, + rocminfo, + hipify, + gitMinimal, + gtest, + zstd, + buildTests ? false, + buildExamples ? false, + gpuTargets ? ( + clr.localGpuTargets or [ + "gfx1201" + ] + ), +}: + +stdenv.mkDerivation (finalAttrs: { + preBuild = '' + echo "This derivation isn't intended to be built directly and only exists to be overridden and built in chunks"; + exit 1 + ''; + + pname = "composable_kernel_base"; + # Picked this version over 6.3 because much easier to get to build + # and it matches the version torch 2.6 wants + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildExamples [ + "example" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "composable_kernel"; + rev = "07339c738396ebeae57374771ded4dcf11bddf1e"; + hash = "sha256-EvEBxlOpQ71BF57VW79WBo/cdxAwTKFXFMiYKyGyyEs="; + }; + + nativeBuildInputs = [ + # Deliberately not using ninja + # because we're jankily composing build outputs from multiple drvs + # ninja won't believe they're up to date + gitMinimal + cmake + rocminfo + clr + hipify + zstd + ]; + + buildInputs = [ + rocm-cmake + clr + zstd + ]; + + strictDeps = true; + enableParallelBuilding = true; + env.ROCM_PATH = clr; + env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin"; + + cmakeFlags = + [ + "-DCMAKE_MODULE_PATH=${clr}/hip/cmake" + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_POLICY_DEFAULT_CMP0069=NEW" + # "-DDL_KERNELS=ON" # Not needed, slow to build + # CK_USE_CODEGEN Required for migraphx which uses device_gemm_multiple_d.hpp + # but migraphx requires an incompatible fork of CK and fails anyway + # "-DCK_USE_CODEGEN=ON" + # It might be worth skipping fp64 in future with this: + # "-DDTYPES=fp32;fp16;fp8;bf16;int8" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DBUILD_DEV=OFF" + "-DROCM_PATH=${clr}" + "-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}" + + # FP8 can build for 908/90a but very slow build + # and produces unusably slow kernels that are huge + "-DCK_USE_FP8_ON_UNSUPPORTED_ARCH=OFF" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + # We intentionally set GPU_ARCHS and not AMD/GPU_TARGETS + # per readme this is required if archs are dissimilar + # In rocm-6.3.x not setting any arch flag worked + # but setting dissimilar arches always failed + "-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildTests [ + "-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names + ]; + + # No flags to build selectively it seems... + postPatch = + # Reduce configure time by preventing thousands of clang-tidy targets being added + # We will never call them + # Never build profiler + '' + substituteInPlace library/src/utility/CMakeLists.txt library/src/tensor_operation_instance/gpu/CMakeLists.txt \ + --replace-fail clang_tidy_check '#clang_tidy_check' + substituteInPlace CMakeLists.txt \ + --replace-fail "add_subdirectory(profiler)" "" + '' + # Optionally remove tests + + lib.optionalString (!buildTests) '' + substituteInPlace CMakeLists.txt \ + --replace-fail "add_subdirectory(test)" "" + substituteInPlace codegen/CMakeLists.txt \ + --replace-fail "include(ROCMTest)" "" + '' + # Optionally remove examples + + lib.optionalString (!buildExamples) '' + substituteInPlace CMakeLists.txt \ + --replace-fail "add_subdirectory(example)" "" + ''; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/test_* $test/bin + '' + + lib.optionalString buildExamples '' + mkdir -p $example/bin + mv $out/bin/example_* $example/bin + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + passthru.anyGfx9Target = lib.lists.any (lib.strings.hasPrefix "gfx9") gpuTargets; + + meta = with lib; { + description = "Performance portable programming model for machine learning tensor operators"; + homepage = "https://github.com/ROCm/composable_kernel"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + broken = true; + }; +}) diff --git a/pkgs/rocm-modules/composable_kernel/ck4inductor.nix b/pkgs/rocm-modules/composable_kernel/ck4inductor.nix new file mode 100644 index 0000000..d09268c --- /dev/null +++ b/pkgs/rocm-modules/composable_kernel/ck4inductor.nix @@ -0,0 +1,43 @@ +{ + buildPythonPackage, + python, + composable_kernel, + lib, + setuptools, + setuptools-scm, + rocm-merged-llvm, +}: +buildPythonPackage { + pyproject = true; + pname = "ck4inductor"; + build-system = [ + setuptools + setuptools-scm + ]; + version = "6.4.0"; + inherit (composable_kernel) src; + pythonImportsCheck = [ + "ck4inductor" + "ck4inductor.universal_gemm.gen_instances" + "ck4inductor.universal_gemm.gen_instances" + "ck4inductor.universal_gemm.op" + ]; + propagatedBuildInputs = [ + # At runtime will fail to compile anything with ck4inductor without this + # can't easily use in checks phase because most of the compiler machinery is in torch + rocm-merged-llvm + ]; + checkPhase = '' + if [ ! -d "$out/${python.sitePackages}/ck4inductor" ]; then + echo "ck4inductor isn't at the expected location in $out/${python.sitePackages}/ck4inductor" + exit 1 + fi + ''; + meta = with lib; { + description = "pytorch inductor backend which uses composable_kernel universal GEMM implementations"; + homepage = "https://github.com/ROCm/composable_kernel"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +} diff --git a/pkgs/rocm-modules/composable_kernel/default.nix b/pkgs/rocm-modules/composable_kernel/default.nix new file mode 100644 index 0000000..dc8d6c3 --- /dev/null +++ b/pkgs/rocm-modules/composable_kernel/default.nix @@ -0,0 +1,249 @@ +{ + lib, + clr, + composable_kernel_base, +}: + +let + parts = { + _mha = { + # mha takes ~3hrs on 64 cores on an EPYC milan system at ~2.5GHz + # big-parallel builders are one gen newer and clocked ~30% higher but only 24 cores + # Should be <10h timeout but might be cutting it close + # TODO: work out how to split this into smaller chunks instead of all 3k mha instances together + # mha_0,1,2, search ninja target file for the individual instances, split by the index? + # TODO: can we prune the generated instances down to only what in practice are used with popular models + # when using flash-attention + MHA kernels? + targets = [ + "device_mha_instance" + ]; + requiredSystemFeatures = [ "big-parallel" ]; + extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ]; + }; + gemm_multiply_multiply = { + targets = [ + "device_gemm_multiply_multiply_instance" + ]; + requiredSystemFeatures = [ "big-parallel" ]; + extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ]; + }; + grouped_conv = { + targets = [ + "device_grouped_conv1d_bwd_weight_instance" + "device_grouped_conv2d_bwd_data_instance" + "device_grouped_conv2d_bwd_weight_instance" + "device_grouped_conv1d_fwd_instance" + "device_grouped_conv2d_fwd_instance" + "device_grouped_conv2d_fwd_dynamic_op_instance" + ]; + requiredSystemFeatures = [ "big-parallel" ]; + }; + grouped_conv_bwd_3d = { + targets = [ + "device_grouped_conv3d_bwd_data_instance" + "device_grouped_conv3d_bwd_data_bilinear_instance" + "device_grouped_conv3d_bwd_data_scale_instance" + "device_grouped_conv3d_bwd_weight_instance" + "device_grouped_conv3d_bwd_weight_bilinear_instance" + "device_grouped_conv3d_bwd_weight_scale_instance" + ]; + requiredSystemFeatures = [ "big-parallel" ]; + }; + grouped_conv_fwd_3d = { + targets = [ + "device_grouped_conv3d_fwd_instance" + "device_grouped_conv3d_fwd_bilinear_instance" + "device_grouped_conv3d_fwd_convinvscale_instance" + "device_grouped_conv3d_fwd_convscale_instance" + "device_grouped_conv3d_fwd_convscale_add_instance" + "device_grouped_conv3d_fwd_convscale_relu_instance" + "device_grouped_conv3d_fwd_dynamic_op_instance" + "device_grouped_conv3d_fwd_scale_instance" + "device_grouped_conv3d_fwd_scaleadd_ab_instance" + "device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance" + ]; + requiredSystemFeatures = [ "big-parallel" ]; + }; + batched_gemm = { + targets = [ + "device_batched_gemm_instance" + "device_batched_gemm_add_relu_gemm_add_instance" + "device_batched_gemm_bias_permute_instance" + "device_batched_gemm_gemm_instance" + "device_batched_gemm_reduce_instance" + "device_batched_gemm_softmax_gemm_instance" + "device_batched_gemm_softmax_gemm_permute_instance" + "device_grouped_gemm_instance" + "device_grouped_gemm_bias_instance" + "device_grouped_gemm_fastgelu_instance" + "device_grouped_gemm_fixed_nk_instance" + "device_grouped_gemm_fixed_nk_multi_abd_instance" + "device_grouped_gemm_tile_loop_instance" + ]; + requiredSystemFeatures = [ "big-parallel" ]; + }; + gemm_universal = { + targets = [ + "device_gemm_universal_instance" + "device_gemm_universal_batched_instance" + "device_gemm_universal_reduce_instance" + "device_gemm_universal_streamk_instance" + ]; + requiredSystemFeatures = [ "big-parallel" ]; + extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ]; + }; + gemm_other = { + targets = [ + "device_gemm_instance" + "device_gemm_ab_scale_instance" + "device_gemm_add_instance" + "device_gemm_add_add_fastgelu_instance" + "device_gemm_add_fastgelu_instance" + "device_gemm_add_multiply_instance" + "device_gemm_add_relu_instance" + "device_gemm_add_relu_add_layernorm_instance" + "device_gemm_add_silu_instance" + "device_gemm_bias_add_reduce_instance" + "device_gemm_bilinear_instance" + "device_gemm_fastgelu_instance" + "device_gemm_multi_abd_instance" + "device_gemm_multiply_add_instance" + "device_gemm_reduce_instance" + "device_gemm_splitk_instance" + "device_gemm_streamk_instance" + ]; + requiredSystemFeatures = [ "big-parallel" ]; + }; + conv = { + targets = [ + "device_conv1d_bwd_data_instance" + "device_conv2d_bwd_data_instance" + "device_conv2d_fwd_instance" + "device_conv2d_fwd_bias_relu_instance" + "device_conv2d_fwd_bias_relu_add_instance" + "device_conv3d_bwd_data_instance" + ]; + requiredSystemFeatures = [ "big-parallel" ]; + }; + pool = { + targets = [ + "device_avg_pool2d_bwd_instance" + "device_avg_pool3d_bwd_instance" + "device_pool2d_fwd_instance" + "device_pool3d_fwd_instance" + "device_max_pool_bwd_instance" + ]; + }; + other1 = { + targets = [ + "device_batchnorm_instance" + "device_contraction_bilinear_instance" + "device_contraction_scale_instance" + "device_elementwise_instance" + "device_elementwise_normalization_instance" + "device_normalization_bwd_data_instance" + "device_normalization_bwd_gamma_beta_instance" + "device_normalization_fwd_instance" + ]; + requiredSystemFeatures = [ "big-parallel" ]; + }; + other2 = { + targets = [ + "device_column_to_image_instance" + "device_image_to_column_instance" + "device_permute_scale_instance" + "device_quantization_instance" + "device_reduce_instance" + "device_softmax_instance" + "device_transpose_instance" + ]; + requiredSystemFeatures = [ "big-parallel" ]; + }; + }; + tensorOpBuilder = + { + part, + targets, + extraCmakeFlags ? [ ], + requiredSystemFeatures ? [ ], + }: + composable_kernel_base.overrideAttrs (old: { + inherit requiredSystemFeatures; + pname = "composable_kernel${clr.gpuArchSuffix}-${part}"; + makeTargets = targets; + preBuild = '' + echo "Building ${part}" + makeFlagsArray+=($makeTargets) + substituteInPlace Makefile \ + --replace-fail '.NOTPARALLEL:' "" + ''; + + # Compile parallelism adjusted based on available RAM + # Never uses less than NIX_BUILD_CORES/4, never uses more than NIX_BUILD_CORES + # CK uses an unusually high amount of memory per core in the build step + # Nix/nixpkgs doesn't really have any infra to tell it that this build is unusually memory hungry + # So, bodge. Otherwise you end up having to build all of ROCm with a low core limit when + # it's only this package that has trouble. + preConfigure = + old.preConfigure or "" + + '' + MEM_GB_TOTAL=$(awk '/MemTotal/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo) + MEM_GB_AVAILABLE=$(awk '/MemAvailable/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo) + APPX_GB=$((MEM_GB_AVAILABLE > MEM_GB_TOTAL ? MEM_GB_TOTAL : MEM_GB_AVAILABLE)) + MAX_CORES=$((1 + APPX_GB/3)) + MAX_CORES=$((MAX_CORES < NIX_BUILD_CORES/3 ? NIX_BUILD_CORES/3 : MAX_CORES)) + export NIX_BUILD_CORES="$((NIX_BUILD_CORES > MAX_CORES ? MAX_CORES : NIX_BUILD_CORES))" + echo "Picked new core limit NIX_BUILD_CORES=$NIX_BUILD_CORES based on available mem: $APPX_GB GB" + cmakeFlagsArray+=( + "-DCK_PARALLEL_COMPILE_JOBS=$NIX_BUILD_CORES" + ) + ''; + cmakeFlags = old.cmakeFlags ++ extraCmakeFlags; + # Early exit after build phase with success, skips fixups etc + # Will get copied back into /build of the final CK + postBuild = '' + find . -name "*.o" -type f | while read -r file; do + mkdir -p "$out/$(dirname "$file")" + cp --reflink=auto "$file" "$out/$file" + done + exit 0 + ''; + meta = old.meta // { + broken = false; + }; + }); + composable_kernel_parts = builtins.mapAttrs ( + part: targets: tensorOpBuilder (targets // { inherit part; }) + ) parts; +in + +composable_kernel_base.overrideAttrs ( + finalAttrs: old: { + pname = "composable_kernel${clr.gpuArchSuffix}"; + parts_dirs = builtins.attrValues composable_kernel_parts; + disallowedReferences = builtins.attrValues composable_kernel_parts; + preBuild = '' + for dir in $parts_dirs; do + find "$dir" -type f -name "*.o" | while read -r file; do + # Extract the relative path by removing the output directory prefix + rel_path="''${file#"$dir/"}" + + # Create parent directory if it doesn't exist + mkdir -p "$(dirname "$rel_path")" + + # Copy the file back to its original location, give it a future timestamp + # so make treats it as up to date + cp --reflink=auto --no-preserve=all "$file" "$rel_path" + touch -d "now +10 hours" "$rel_path" + done + done + ''; + passthru = old.passthru // { + parts = composable_kernel_parts; + }; + meta = old.meta // { + # Builds which don't don't target any gfx9 cause cmake errors in dependent projects + broken = !finalAttrs.passthru.anyGfx9Target; + }; + } +) diff --git a/pkgs/rocm-modules/default.nix b/pkgs/rocm-modules/default.nix new file mode 100644 index 0000000..cddffb2 --- /dev/null +++ b/pkgs/rocm-modules/default.nix @@ -0,0 +1,513 @@ +{ + lib, + config, + callPackage, + newScope, + recurseIntoAttrs, + symlinkJoin, + fetchFromGitHub, + ffmpeg_4, + boost179, + opencv, + libjpeg_turbo, + python3Packages, + triton-llvm, + openmpi, + rocmGpuArches ? [ ], +}: + +let + outer = lib.makeScope newScope ( + self: + let + inherit (self) llvm; + pyPackages = python3Packages; + openmpi-orig = openmpi; + in + { + inherit rocmGpuArches; + buildTests = false; + buildBenchmarks = false; + stdenv = llvm.rocmClangStdenv; + + rocmPath = self.callPackage ./rocm-path { }; + rocmUpdateScript = self.callPackage ./update.nix { }; + + ## ROCm ## + llvm = recurseIntoAttrs ( + callPackage ./llvm/default.nix { + inherit (self) rocm-device-libs rocm-runtime; + } + ); + inherit (self.llvm) rocm-merged-llvm clang openmp; + + rocm-core = self.callPackage ./rocm-core { }; + amdsmi = pyPackages.callPackage ./amdsmi { + inherit (self) rocmUpdateScript; + }; + + rocm-cmake = self.callPackage ./rocm-cmake { }; + + rocm-smi = pyPackages.callPackage ./rocm-smi { + inherit (self) rocmUpdateScript; + }; + + rocm-device-libs = self.callPackage ./rocm-device-libs { + inherit (llvm) rocm-merged-llvm; + }; + + rocm-runtime = self.callPackage ./rocm-runtime { + inherit (llvm) rocm-merged-llvm; + }; + + rocm-comgr = self.callPackage ./rocm-comgr { + inherit (llvm) rocm-merged-llvm; + }; + + rocminfo = self.callPackage ./rocminfo { }; + + # Unfree + hsa-amd-aqlprofile-bin = self.callPackage ./hsa-amd-aqlprofile-bin { }; + + rdc = self.callPackage ./rdc { }; + + rocm-docs-core = python3Packages.callPackage ./rocm-docs-core { }; + + hip-common = self.callPackage ./hip-common { }; + + # Eventually will be in the LLVM repo + hipcc = self.callPackage ./hipcc { + inherit (llvm) rocm-merged-llvm; + }; + + # Replaces hip, opencl-runtime, and rocclr + clr = self.callPackage ./clr { }; + + aotriton = self.callPackage ./aotriton { }; + + hipify = self.callPackage ./hipify { + inherit (llvm) + clang + rocm-merged-llvm + ; + }; + + # hsakmt was merged into rocm-runtime + hsakmt = self.rocm-runtime; + + rocprofiler = self.callPackage ./rocprofiler { + inherit (llvm) clang; + }; + rocprofiler-register = self.callPackage ./rocprofiler-register { + inherit (llvm) clang; + }; + + # Needs GCC + roctracer = self.callPackage ./roctracer { }; + + rocgdb = self.callPackage ./rocgdb { }; + + rocdbgapi = self.callPackage ./rocdbgapi { }; + + rocr-debug-agent = self.callPackage ./rocr-debug-agent { }; + + rocprim = self.callPackage ./rocprim { }; + + rocsparse = self.callPackage ./rocsparse { }; + + rocthrust = self.callPackage ./rocthrust { }; + + rocrand = self.callPackage ./rocrand { }; + + hiprand = self.callPackage ./hiprand { }; + + rocfft = self.callPackage ./rocfft { }; + + mscclpp = self.callPackage ./mscclpp { }; + + rccl = self.callPackage ./rccl { }; + + # RCCL with sanitizers and tests + # Can't have with sanitizer build as dep of other packages without + # runtime crashes due to ASAN not loading first + rccl-tests = self.callPackage ./rccl { + buildTests = true; + }; + + hipcub = self.callPackage ./hipcub { }; + + hipsparse = self.callPackage ./hipsparse { }; + + hipfort = self.callPackage ./hipfort { }; + + hipfft = self.callPackage ./hipfft { }; + + hiprt = self.callPackage ./hiprt { }; + + tensile = pyPackages.callPackage ./tensile { + inherit (self) + rocmUpdateScript + clr + ; + }; + + rocblas = self.callPackage ./rocblas { + buildTests = true; + buildBenchmarks = true; + inherit (self) roctracer; + }; + + rocsolver = self.callPackage ./rocsolver { }; + + rocwmma = self.callPackage ./rocwmma { }; + + rocalution = self.callPackage ./rocalution { }; + + rocmlir-rock = self.callPackage ./rocmlir { + buildRockCompiler = true; + }; + rocmlir = self.rocmlir-rock; + + hipsolver = self.callPackage ./hipsolver { }; + + hipblas-common = self.callPackage ./hipblas-common { }; + + hipblas = self.callPackage ./hipblas { }; + + hipblaslt = self.callPackage ./hipblaslt { }; + + # hipTensor - Only supports GFX9 + + composable_kernel_base = self.callPackage ./composable_kernel/base.nix { }; + composable_kernel = self.callPackage ./composable_kernel { }; + + ck4inductor = pyPackages.callPackage ./composable_kernel/ck4inductor.nix { + inherit (self) composable_kernel; + inherit (llvm) rocm-merged-llvm; + }; + + half = self.callPackage ./half { }; + + miopen = self.callPackage ./miopen { + boost = boost179.override { enableStatic = true; }; + }; + + miopen-hip = self.miopen; + + migraphx = self.callPackage ./migraphx { }; + + rpp = self.callPackage ./rpp { }; + + rpp-hip = self.rpp.override { + useOpenCL = false; + useCPU = false; + }; + + rpp-opencl = self.rpp.override { + useOpenCL = true; + useCPU = false; + }; + + rpp-cpu = self.rpp.override { + useOpenCL = false; + useCPU = true; + }; + + mivisionx = self.callPackage ./mivisionx { + opencv = opencv.override { enablePython = true; }; + # TODO: Remove this pin in ROCm 6.4+ + # FFMPEG support was improved in https://github.com/ROCm/MIVisionX/pull/1460 + ffmpeg = ffmpeg_4; + # Unfortunately, rocAL needs a custom libjpeg-turbo until further notice + # See: https://github.com/ROCm/MIVisionX/issues/1051 + libjpeg_turbo = libjpeg_turbo.overrideAttrs { + version = "2.0.6.1"; + src = fetchFromGitHub { + owner = "rrawther"; + repo = "libjpeg-turbo"; + rev = "640d7ee1917fcd3b6a5271aa6cf4576bccc7c5fb"; + sha256 = "sha256-T52whJ7nZi8jerJaZtYInC2YDN0QM+9tUDqiNr6IsNY="; + }; + # overwrite all patches, since patches for newer version do not apply + patches = [ ./0001-Compile-transupp.c-as-part-of-the-library.patch ]; + }; + }; + + mivisionx-hip = self.mivisionx.override { + rpp = self.rpp-hip; + useOpenCL = false; + useCPU = false; + }; + + mivisionx-cpu = self.mivisionx.override { + rpp = self.rpp-cpu; + useOpenCL = false; + useCPU = true; + }; + + # Even if config.rocmSupport is false we need rocmSupport true + # version of ucc/ucx in openmpi in this package set + openmpi = openmpi-orig.override ( + prev: + let + ucx = prev.ucx.override { + enableCuda = false; + enableRocm = true; + }; + in + { + inherit ucx; + ucc = prev.ucc.override { + enableCuda = false; + inherit ucx; + }; + } + ); + mpi = self.openmpi; + + triton-llvm = triton-llvm.overrideAttrs { + src = fetchFromGitHub { + owner = "llvm"; + repo = "llvm-project"; + # make sure this matches triton llvm rel branch hash for now + # https://github.com/triton-lang/triton/blob/release/3.2.x/cmake/llvm-hash.txt + rev = "86b69c31642e98f8357df62c09d118ad1da4e16a"; + hash = "sha256-W/mQwaLGx6/rIBjdzUTIbWrvGjdh7m4s15f70fQ1/hE="; + }; + pname = "triton-llvm-rocm"; + patches = [ ]; # FIXME: https://github.com/llvm/llvm-project//commit/84837e3cc1cf17ed71580e3ea38299ed2bfaa5f6.patch doesn't apply, may need to rebase + }; + + triton = pyPackages.callPackage ./triton { rocmPackages = self; }; + + ## Meta ## + # Emulate common ROCm meta layout + # These are mainly for users. I strongly suggest NOT using these in nixpkgs derivations + # Don't put these into `propagatedBuildInputs` unless you want PATH/PYTHONPATH issues! + # See: https://rocm.docs.amd.com/en/docs-5.7.1/_images/image.004.png + # See: https://rocm.docs.amd.com/en/docs-5.7.1/deploy/linux/os-native/package_manager_integration.html + meta = with self; rec { + rocm-developer-tools = symlinkJoin { + name = "rocm-developer-tools-meta"; + paths = [ + hsa-amd-aqlprofile-bin + rocm-core + rocr-debug-agent + roctracer + rocdbgapi + rocprofiler + rocgdb + rocm-language-runtime + ]; + }; + rocm-ml-sdk = symlinkJoin { + name = "rocm-ml-sdk-meta"; + paths = [ + rocm-core + miopen-hip + rocm-hip-sdk + rocm-ml-libraries + ]; + }; + rocm-ml-libraries = symlinkJoin { + name = "rocm-ml-libraries-meta"; + paths = [ + llvm.clang + llvm.mlir + llvm.openmp + rocm-core + miopen-hip + rocm-hip-libraries + ]; + }; + rocm-hip-sdk = symlinkJoin { + name = "rocm-hip-sdk-meta"; + paths = [ + rocprim + rocalution + hipfft + rocm-core + hipcub + hipblas + hipblaslt + rocrand + rocfft + hiprt + rocsparse + rccl + rocthrust + rocblas + hipsparse + hipfort + rocwmma + hipsolver + rocsolver + rocm-hip-libraries + rocm-hip-runtime-devel + ]; + }; + rocm-hip-libraries = symlinkJoin { + name = "rocm-hip-libraries-meta"; + paths = [ + rocblas + hipfort + rocm-core + rocsolver + rocalution + rocrand + hipblas + hipblaslt + rocfft + hipfft + hiprt + rccl + rocsparse + hipsparse + hipsolver + rocm-hip-runtime + ]; + }; + rocm-openmp-sdk = symlinkJoin { + name = "rocm-openmp-sdk-meta"; + paths = [ + rocm-core + llvm.clang + llvm.mlir + llvm.openmp # openmp-extras-devel (https://github.com/ROCm/aomp) + rocm-language-runtime + ]; + }; + rocm-opencl-sdk = symlinkJoin { + name = "rocm-opencl-sdk-meta"; + paths = [ + rocm-core + rocm-runtime + clr + clr.icd + rocm-opencl-runtime + ]; + }; + rocm-opencl-runtime = symlinkJoin { + name = "rocm-opencl-runtime-meta"; + paths = [ + rocm-core + clr + clr.icd + rocm-language-runtime + ]; + }; + rocm-hip-runtime-devel = symlinkJoin { + name = "rocm-hip-runtime-devel-meta"; + paths = [ + clr + rocm-core + hipify + rocm-cmake + llvm.clang + llvm.mlir + llvm.openmp + rocm-runtime + rocm-hip-runtime + ]; + }; + rocm-hip-runtime = symlinkJoin { + name = "rocm-hip-runtime-meta"; + paths = [ + rocm-core + rocminfo + clr + rocm-language-runtime + ]; + }; + rocm-language-runtime = symlinkJoin { + name = "rocm-language-runtime-meta"; + paths = [ + rocm-runtime + rocm-core + rocm-comgr + llvm.openmp # openmp-extras-runtime (https://github.com/ROCm/aomp) + ]; + }; + rocm-all = symlinkJoin { + name = "rocm-all-meta"; + paths = [ + rocm-developer-tools + rocm-ml-sdk + rocm-ml-libraries + rocm-hip-sdk + rocm-hip-libraries + rocm-openmp-sdk + rocm-opencl-sdk + rocm-opencl-runtime + rocm-hip-runtime-devel + rocm-hip-runtime + rocm-language-runtime + ]; + }; + }; + + rocm-tests = self.callPackage ./rocm-tests { + rocmPackages = self; + }; + } + // lib.optionalAttrs config.allowAliases { + rocm-thunk = throw '' + 'rocm-thunk' has been removed. It's now part of the ROCm runtime. + ''; # Added 2025-3-16 + + clang-ocl = throw '' + 'clang-ocl' has been deprecated upstream. Use ROCm's clang directly. + ''; # Added 2025-3-16 + + miopengemm = throw '' + 'miopengemm' has been deprecated. + ''; # Added 2024-3-3 + + miopen-opencl = throw '' + 'miopen-opencl' has been deprecated. + ''; # Added 2024-3-3 + + mivisionx-opencl = throw '' + 'mivisionx-opencl' has been deprecated. + Other versions of mivisionx are still available. + ''; # Added 2024-3-24 + } + ); + scopeForArches = + arches: + outer.overrideScope ( + _final: prev: { + clr = prev.clr.override { + localGpuTargets = arches; + }; + } + ); +in +outer +// builtins.listToAttrs ( + builtins.map (arch: { + name = arch; + value = scopeForArches [ arch ]; + }) outer.clr.gpuTargets +) +// { + gfx9 = scopeForArches [ + "gfx906" + "gfx908" + "gfx90a" + "gfx942" + ]; + gfx10 = scopeForArches [ + "gfx1010" + "gfx1030" + ]; + gfx11 = scopeForArches [ + "gfx1100" + "gfx1101" + "gfx1102" + ]; + + gfx12 = scopeForArches [ + "gfx1201" + ]; +} diff --git a/pkgs/rocm-modules/half/default.nix b/pkgs/rocm-modules/half/default.nix new file mode 100644 index 0000000..7c77d2b --- /dev/null +++ b/pkgs/rocm-modules/half/default.nix @@ -0,0 +1,39 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "half"; + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "half"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-H8Ogm4nxaxDB0WHx+KhRjUO3vzp3AwCqrIQ6k8R+xkc="; + }; + + nativeBuildInputs = [ + cmake + rocm-cmake + ]; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "C++ library for half precision floating point arithmetics"; + homepage = "https://github.com/ROCm/half"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.unix; + }; +}) diff --git a/pkgs/rocm-modules/hip-common/default.nix b/pkgs/rocm-modules/hip-common/default.nix new file mode 100644 index 0000000..fc152b8 --- /dev/null +++ b/pkgs/rocm-modules/hip-common/default.nix @@ -0,0 +1,45 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "hip-common"; + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "HIP"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-2Iekju0t12c6iiqb104j+Lh53FvZwyqYtST12RkkuKc="; + }; + + dontConfigure = true; + dontBuild = true; + + installPhase = '' + runHook preInstall + + mkdir -p $out + mv * $out + + runHook postInstall + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "C++ Heterogeneous-Compute Interface for Portability"; + homepage = "https://github.com/ROCm/HIP"; + license = with licenses; [ mit ]; + maintainers = with maintainers; [ lovesegfault ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/hipblas-common/default.nix b/pkgs/rocm-modules/hipblas-common/default.nix new file mode 100644 index 0000000..a5296e8 --- /dev/null +++ b/pkgs/rocm-modules/hipblas-common/default.nix @@ -0,0 +1,42 @@ +{ + lib, + stdenv, + cmake, + fetchFromGitHub, + rocm-cmake, + rocmUpdateScript, +}: +stdenv.mkDerivation (finalAttrs: { + pname = "hipblas-common"; + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "hipBLAS-common"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-eTwoAXH2HGdSAOLTZHJUFHF+c2wWHixqeMqr60KxJrc="; + }; + + nativeBuildInputs = [ + cmake + ]; + + buildInputs = [ + rocm-cmake + ]; + + strictDeps = true; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + meta = with lib; { + description = "Common files shared by hipBLAS and hipBLASLt"; + homepage = "https://github.com/ROCm/hipBLASlt"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/hipblas/default.nix b/pkgs/rocm-modules/hipblas/default.nix new file mode 100644 index 0000000..95cddfa --- /dev/null +++ b/pkgs/rocm-modules/hipblas/default.nix @@ -0,0 +1,140 @@ +{ + lib, + stdenv, + fetchFromGitHub, + fetchpatch, + rocmUpdateScript, + cmake, + rocm-cmake, + clr, + gfortran, + hipblas-common, + rocblas, + rocsolver, + rocsparse, + rocprim, + gtest, + lapack-reference, + buildTests ? false, + buildBenchmarks ? false, + buildSamples ? false, +}: + +# Can also use cuBLAS +stdenv.mkDerivation (finalAttrs: { + pname = "hipblas"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ] + ++ lib.optionals buildSamples [ + "sample" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "hipBLAS"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-lQv8Ik6+0ldqyeJ05CSGB0309nIpzlRL3CRYeQxVfd0="; + }; + + patches = [ + # https://github.com/ROCm/hipBLAS/pull/952 + # (fetchpatch { + # name = "transitively-depend-hipblas-common.patch"; + # url = "https://github.com/ROCm/hipBLAS/commit/54220fdaebf0fb4fd0921ee9e418ace5b143ec8f.patch"; + # hash = "sha256-MFEhv8Bkrd2zD0FFIDg9oJzO7ztdyMAF+R9oYA0rmwQ="; + # }) + ]; + + postPatch = '' + substituteInPlace library/CMakeLists.txt \ + --replace-fail "find_package(Git REQUIRED)" "" + ''; + + nativeBuildInputs = [ + + cmake + rocm-cmake + clr + gfortran + ]; + + propagatedBuildInputs = [ hipblas-common ]; + + buildInputs = + [ + rocblas + rocprim + rocsparse + rocsolver + ] + ++ lib.optionals buildTests [ + gtest + ] + ++ lib.optionals (buildTests || buildBenchmarks) [ + lapack-reference + ]; + + cmakeFlags = + [ + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_CXX_COMPILER=${lib.getExe' clr "hipcc"}" + # Upstream is migrating to amdclang++, it is likely this will be correct in next version bump + #"-DCMAKE_CXX_COMPILER=${lib.getBin clr}/bin/amdclang++" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DAMDGPU_TARGETS=${rocblas.amdgpu_targets}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_CLIENTS_TESTS=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_CLIENTS_BENCHMARKS=ON" + ] + ++ lib.optionals buildSamples [ + "-DBUILD_CLIENTS_SAMPLES=ON" + ]; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/hipblas-test $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/hipblas-bench $benchmark/bin + '' + + lib.optionalString buildSamples '' + mkdir -p $sample/bin + mv $out/bin/example-* $sample/bin + '' + + lib.optionalString (buildTests || buildBenchmarks || buildSamples) '' + rmdir $out/bin + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "ROCm BLAS marshalling library"; + homepage = "https://github.com/ROCm/hipBLAS"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/hipblaslt/default.nix b/pkgs/rocm-modules/hipblaslt/default.nix new file mode 100644 index 0000000..7b589e2 --- /dev/null +++ b/pkgs/rocm-modules/hipblaslt/default.nix @@ -0,0 +1,235 @@ +{ + lib, + stdenv, + fetchpatch, + fetchFromGitHub, + cmake, + rocm-cmake, + clr, + gfortran, + gtest, + msgpack, + libxml2, + python3, + python3Packages, + openmp, + hipblas-common, + tensile, + lapack-reference, + ncurses, + libffi, + zlib, + zstd, + rocmUpdateScript, + buildTests ? false, + buildBenchmarks ? false, + buildSamples ? false, + # hipblaslt supports only devices with MFMA or WMMA + # WMMA on gfx1100 may be broken + # MFMA on MI100 may be broken + # MI200/MI300 known to work + gpuTargets ? ( + clr.localGpuTargets or [ + # "gfx908" FIXME: confirm MFMA on MI100 works + "gfx90a" + "gfx942" + # "gfx1100" FIXME: confirm WMMA targets work + ] + ), +}: + +stdenv.mkDerivation ( + finalAttrs: + let + supportsTargetArches = + (builtins.any (lib.strings.hasPrefix "gfx9") gpuTargets) + || (builtins.any (lib.strings.hasPrefix "gfx11") gpuTargets); + tensile' = (tensile.override { isTensileLite = true; }).overrideAttrs { + inherit (finalAttrs) src; + sourceRoot = "${finalAttrs.src.name}/tensilelite"; + }; + py = python3.withPackages (ps: [ + ps.pyyaml + ps.setuptools + ps.packaging + ]); + gpuTargets' = lib.optionalString supportsTargetArches (lib.concatStringsSep ";" gpuTargets); + compiler = "amdclang++"; + cFlags = "-O3 -I${msgpack}/include"; # FIXME: cmake files need patched to include this properly + in + { + pname = "hipblaslt${clr.gpuArchSuffix}"; + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "hipBLASLt"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-ojNa3jt5285gsPwo4icATJD9JdxmbJBjfCF4A1ttCQ4="; + }; + env.CXX = compiler; + env.CFLAGS = cFlags; + env.CXXFLAGS = cFlags; + env.ROCM_PATH = "${clr}"; + env.TENSILE_ROCM_ASSEMBLER_PATH = lib.getExe' clr "amdclang++"; + env.TENSILE_GEN_ASSEMBLY_TOOLCHAIN = lib.getExe' clr "amdclang++"; + # Some tensile scripts look for this as an env var rather than a cmake flag + env.CMAKE_CXX_COMPILER = lib.getExe' clr "amdclang++"; + requiredSystemFeatures = [ "big-parallel" ]; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ] + ++ lib.optionals buildSamples [ + "sample" + ]; + + postPatch = '' + mkdir -p build/Tensile/library + # git isn't needed and we have no .git + substituteInPlace cmake/Dependencies.cmake \ + --replace-fail "find_package(Git REQUIRED)" "" + substituteInPlace CMakeLists.txt \ + --replace-fail "include(virtualenv)" "" \ + --replace-fail "virtualenv_install(\''${Tensile_TEST_LOCAL_PATH})" "" \ + --replace-fail "virtualenv_install(\''${CMAKE_SOURCE_DIR}/tensilelite)" "" \ + --replace-fail 'find_package(Tensile 4.33.0 EXACT REQUIRED HIP LLVM OpenMP PATHS "''${INSTALLED_TENSILE_PATH}")' "find_package(Tensile)" \ + --replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"' + # FIXME: TensileCreateExtOpLibraries build failure due to unsupported null operand + # Working around for now by disabling the ExtOp libs + substituteInPlace library/src/amd_detail/rocblaslt/src/CMakeLists.txt \ + --replace-fail 'TensileCreateExtOpLibraries("' '# skipping TensileCreateExtOpLibraries' + substituteInPlace library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh \ + --replace-fail '${"\${rocm_path}"}/bin/' "" + ''; + + # Apply patches to allow building without a target arch if we need to do that + patches = lib.optionals (!supportsTargetArches) [ + # Add ability to build without specitying any arch. + (fetchpatch { + sha256 = "sha256-VW3bPzmQvfo8+iKsVfpn4sbqAe41fLzCEUfBh9JxVyk="; + url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.1.1-no-arch.patch"; + }) + # Followup to above patch for 6.3.x + (fetchpatch { + sha256 = "sha256-GCsrne6BiWzwj8TMAfFuaYz1Pij97hoCc6E3qJhWb10="; + url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.3.0-no-arch-extra.patch"; + }) + ]; + + doCheck = false; + doInstallCheck = false; + + nativeBuildInputs = [ + cmake + rocm-cmake + py + clr + gfortran + # need make to get streaming console output so nix knows build is still running + # so deliberately not using ninja + ]; + + buildInputs = + [ + hipblas-common + tensile' + openmp + libffi + ncurses + + # Tensile deps - not optional, building without tensile isn't actually supported + msgpack # FIXME: not included in cmake! + libxml2 + python3Packages.msgpack + python3Packages.joblib + zlib + zstd + ] + ++ lib.optionals buildTests [ + gtest + ] + ++ lib.optionals (buildTests || buildBenchmarks) [ + lapack-reference + ]; + + cmakeFlags = + [ + "-Wno-dev" + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_VERBOSE_MAKEFILE=ON" + "-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}" + "-DTENSILE_USE_HIP=ON" + "-DTENSILE_BUILD_CLIENT=OFF" + "-DTENSILE_USE_FLOAT16_BUILTIN=ON" + "-DCMAKE_CXX_COMPILER=${compiler}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DHIPBLASLT_ENABLE_MARKER=Off" + # FIXME what are the implications of hardcoding this? + "-DTensile_CODE_OBJECT_VERSION=V5" + "-DTensile_COMPILER=${compiler}" + "-DAMDGPU_TARGETS=${gpuTargets'}" + "-DGPU_TARGETS=${gpuTargets'}" + "-DTensile_LIBRARY_FORMAT=msgpack" + ] + ++ lib.optionals (!supportsTargetArches) [ + "-DBUILD_WITH_TENSILE=OFF" + ] + ++ lib.optionals buildTests [ + "-DBUILD_CLIENTS_TESTS=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_CLIENTS_BENCHMARKS=ON" + ] + ++ lib.optionals buildSamples [ + "-DBUILD_CLIENTS_SAMPLES=ON" + ]; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/hipblas-test $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/hipblas-bench $benchmark/bin + '' + + lib.optionalString buildSamples '' + mkdir -p $sample/bin + mv $out/bin/example-* $sample/bin + '' + + lib.optionalString (buildTests || buildBenchmarks || buildSamples) '' + rmdir $out/bin + ''; + # If this is false there are no kernels in the output lib + # and it's useless at runtime + # so if it's an optional dep it's best to not depend on it + # Some packages like torch need hipblaslt to compile + # and are fine ignoring it at runtime if it's not supported + # so we have to support building an empty hipblaslt + passthru.supportsTargetArches = supportsTargetArches; + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner repo; + }; + passthru.tensilelite = tensile'; + meta = with lib; { + description = "hipBLASLt is a library that provides general matrix-matrix operations with a flexible API"; + homepage = "https://github.com/ROCm/hipBLASlt"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; + } +) diff --git a/pkgs/rocm-modules/hipcc/0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch b/pkgs/rocm-modules/hipcc/0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch new file mode 100644 index 0000000..bfc386c --- /dev/null +++ b/pkgs/rocm-modules/hipcc/0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch @@ -0,0 +1,39 @@ +From f259eca77c592813e11752a46c4e1f9a74c64091 Mon Sep 17 00:00:00 2001 +From: Luna Nova +Date: Fri, 11 Oct 2024 02:56:22 -0700 +Subject: [PATCH] [hipcc] Remove extra definition of hipBinUtilPtr_ in derived + platforms + +Fixes UB when hipBinUtilPtr_ is used. +--- + amd/hipcc/src/hipBin_amd.h | 1 - + amd/hipcc/src/hipBin_nvidia.h | 1 - + 2 files changed, 2 deletions(-) + +diff --git a/amd/hipcc/src/hipBin_amd.h b/amd/hipcc/src/hipBin_amd.h +index 0a782d1beab9..36cd625ae8bc 100644 +--- a/src/hipBin_amd.h ++++ b/src/hipBin_amd.h +@@ -42,7 +42,6 @@ THE SOFTWARE. + + class HipBinAmd : public HipBinBase { + private: +- HipBinUtil* hipBinUtilPtr_; + string hipClangPath_ = ""; + string roccmPathEnv_, hipRocclrPathEnv_, hsaPathEnv_; + PlatformInfo platformInfoAMD_; +diff --git a/amd/hipcc/src/hipBin_nvidia.h b/amd/hipcc/src/hipBin_nvidia.h +index ff142cc1cea2..09b7b80979c7 100644 +--- a/src/hipBin_nvidia.h ++++ b/src/hipBin_nvidia.h +@@ -31,7 +31,6 @@ THE SOFTWARE. + + class HipBinNvidia : public HipBinBase { + private: +- HipBinUtil* hipBinUtilPtr_; + string cudaPath_ = ""; + PlatformInfo platformInfoNV_; + string hipCFlags_, hipCXXFlags_, hipLdFlags_; +-- +2.46.0 + diff --git a/pkgs/rocm-modules/hipcc/default.nix b/pkgs/rocm-modules/hipcc/default.nix new file mode 100644 index 0000000..4dc1c60 --- /dev/null +++ b/pkgs/rocm-modules/hipcc/default.nix @@ -0,0 +1,47 @@ +{ + lib, + stdenv, + rocm-merged-llvm, + cmake, + lsb-release, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "hipcc"; + # In-tree with ROCm LLVM + inherit (rocm-merged-llvm) version; + src = rocm-merged-llvm.llvm-src; + sourceRoot = "${finalAttrs.src.name}/amd/hipcc"; + + nativeBuildInputs = [ cmake ]; + + buildInputs = [ rocm-merged-llvm ]; + + patches = [ + # https://github.com/ROCm/llvm-project/pull/183 + # Fixes always-invoked UB in hipcc + ./0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch + ]; + + postPatch = '' + substituteInPlace src/hipBin_amd.h \ + --replace-fail "/usr/bin/lsb_release" "${lsb-release}/bin/lsb_release" + ''; + + cmakeFlags = [ + "-DCMAKE_BUILD_TYPE=Release" + ]; + postInstall = '' + rm -r $out/hip/bin + ln -s $out/bin $out/hip/bin + ''; + + meta = with lib; { + description = "Compiler driver utility that calls clang or nvcc"; + homepage = "https://github.com/ROCm/HIPCC"; + license = with licenses; [ mit ]; + maintainers = with maintainers; [ lovesegfault ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/hipcub/default.nix b/pkgs/rocm-modules/hipcub/default.nix new file mode 100644 index 0000000..ca6f388 --- /dev/null +++ b/pkgs/rocm-modules/hipcub/default.nix @@ -0,0 +1,102 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + rocprim, + clr, + gtest, + gbenchmark, + buildTests ? false, + buildBenchmarks ? false, + gpuTargets ? [ ], +}: + +# CUB can also be used as a backend instead of rocPRIM. +stdenv.mkDerivation (finalAttrs: { + pname = "hipcub"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "hipCUB"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-pwCAsRx5XyuCvppTmZ4VG83iYl9ilAQCZds4oKINhSI="; + }; + + nativeBuildInputs = [ + cmake + rocm-cmake + clr + ]; + + buildInputs = + [ + rocprim + ] + ++ lib.optionals buildTests [ + gtest + ] + ++ lib.optionals buildBenchmarks [ + gbenchmark + ]; + + cmakeFlags = + [ + "-DHIP_ROOT_DIR=${clr}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_TEST=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_BENCHMARK=ON" + ]; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/test_* $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/benchmark_* $benchmark/bin + '' + + lib.optionalString (buildTests || buildBenchmarks) '' + rmdir $out/bin + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "Thin wrapper library on top of rocPRIM or CUB"; + homepage = "https://github.com/ROCm/hipCUB"; + license = with licenses; [ bsd3 ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/hipfft/default.nix b/pkgs/rocm-modules/hipfft/default.nix new file mode 100644 index 0000000..f6bfaf7 --- /dev/null +++ b/pkgs/rocm-modules/hipfft/default.nix @@ -0,0 +1,125 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + clr, + git, + rocfft, + gtest, + boost, + fftw, + fftwFloat, + openmp, + buildTests ? false, + buildBenchmarks ? false, + buildSamples ? false, + gpuTargets ? [ ], +}: + +# Can also use cuFFT +stdenv.mkDerivation (finalAttrs: { + pname = "hipfft"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ] + ++ lib.optionals buildSamples [ + "sample" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "hipFFT"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-VA9OC/TvvQyFCVVox/9ihuE2W0Ia87O0R3YsLM4Jzuk="; + fetchSubmodules = true; + }; + + nativeBuildInputs = [ + clr + git + cmake + rocm-cmake + ]; + + buildInputs = + [ + rocfft + ] + ++ lib.optionals (buildTests || buildBenchmarks || buildSamples) [ + gtest + boost + fftw + fftwFloat + openmp + ]; + + cmakeFlags = + [ + "-DCMAKE_C_COMPILER=hipcc" + "-DCMAKE_CXX_COMPILER=hipcc" + "-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip" + "-DHIP_ROOT_DIR=${clr}" + "-DHIP_PATH=${clr}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_CLIENTS_TESTS=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_CLIENTS_RIDER=ON" + ] + ++ lib.optionals buildSamples [ + "-DBUILD_CLIENTS_SAMPLES=ON" + ]; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/hipfft-test $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/hipfft-rider $benchmark/bin + '' + + lib.optionalString buildSamples '' + mkdir -p $sample/bin + mv clients/staging/hipfft_* $sample/bin + patchelf $sample/bin/hipfft_* --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE" + '' + + lib.optionalString (buildTests || buildBenchmarks) '' + rmdir $out/bin + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "FFT marshalling library"; + homepage = "https://github.com/ROCm/hipFFT"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/hipfort/default.nix b/pkgs/rocm-modules/hipfort/default.nix new file mode 100644 index 0000000..bad42cc --- /dev/null +++ b/pkgs/rocm-modules/hipfort/default.nix @@ -0,0 +1,66 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + gfortran, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "hipfort"; + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "hipfort"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-a2YPGAWP+gF2EykpKmkG/fEIW6blm2ChOybmLAHQQdw="; + }; + + nativeBuildInputs = [ + cmake + rocm-cmake + gfortran + ]; + + cmakeFlags = [ + "-DHIPFORT_COMPILER=${gfortran}/bin/gfortran" + "-DHIPFORT_AR=${gfortran.cc}/bin/gcc-ar" + "-DHIPFORT_RANLIB=${gfortran.cc}/bin/gcc-ranlib" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ]; + + postPatch = '' + patchShebangs bin + + substituteInPlace bin/hipfc bin/mymcpu \ + --replace "/bin/cat" "cat" + + substituteInPlace bin/CMakeLists.txt \ + --replace "/bin/mkdir" "mkdir" \ + --replace "/bin/cp" "cp" \ + --replace "/bin/sed" "sed" \ + --replace "/bin/chmod" "chmod" \ + --replace "/bin/ln" "ln" + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "Fortran interfaces for ROCm libraries"; + homepage = "https://github.com/ROCm/hipfort"; + license = with licenses; [ mit ]; # mitx11 + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/hipify/default.nix b/pkgs/rocm-modules/hipify/default.nix new file mode 100644 index 0000000..4b14f0a --- /dev/null +++ b/pkgs/rocm-modules/hipify/default.nix @@ -0,0 +1,64 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + clang, + libxml2, + rocm-merged-llvm, + zlib, + zstd, + perl, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "hipify"; + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "HIPIFY"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-uj25WmGCpwouS1yzW9Oil5Vyrbyj5yRITvWF9WaGozM="; + }; + + nativeBuildInputs = [ + cmake + ]; + + buildInputs = [ + libxml2 + rocm-merged-llvm + zlib + zstd + perl + ]; + + postPatch = '' + substituteInPlace CMakeLists.txt \ + --replace "\''${LLVM_TOOLS_BINARY_DIR}/clang" "${clang}/bin/clang" + chmod +x bin/* + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + postInstall = '' + chmod +x $out/bin/* + chmod +x $out/libexec/* + patchShebangs $out/bin/ + patchShebangs $out/libexec/ + ''; + + meta = with lib; { + description = "Convert CUDA to Portable C++ Code"; + homepage = "https://github.com/ROCm/HIPIFY"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/hiprand/default.nix b/pkgs/rocm-modules/hiprand/default.nix new file mode 100644 index 0000000..c65fe3d --- /dev/null +++ b/pkgs/rocm-modules/hiprand/default.nix @@ -0,0 +1,79 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + clr, + rocrand, + gtest, + buildTests ? false, + gpuTargets ? [ ], +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "hiprand"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "hipRAND"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-ISl4bVW/JvT81gJ/10JlKliv7ds5WtP2f/Dnc9qvh9Q="; + }; + + nativeBuildInputs = [ + cmake + rocm-cmake + clr + ]; + + buildInputs = [ rocrand ] ++ (lib.optionals buildTests [ gtest ]); + + cmakeFlags = + [ + "-DHIP_ROOT_DIR=${clr}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_TEST=ON" + ]; + + postInstall = lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/test_* $test/bin + rm -r $out/bin/hipRAND + # Fail if bin/ isn't actually empty + rmdir $out/bin + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "HIP wrapper for rocRAND and cuRAND"; + homepage = "https://github.com/ROCm/hipRAND"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/hiprt/default.nix b/pkgs/rocm-modules/hiprt/default.nix new file mode 100644 index 0000000..9abf6f7 --- /dev/null +++ b/pkgs/rocm-modules/hiprt/default.nix @@ -0,0 +1,60 @@ +{ + lib, + stdenv, + fetchFromGitHub, + cmake, + clr, + gcc, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "hiprt"; + version = "2.5.a21e075.3"; + + src = fetchFromGitHub { + owner = "GPUOpen-LibrariesAndSDKs"; + repo = "HIPRT"; + tag = finalAttrs.version; + sha256 = "sha256-3yGhwIsFHlFMCEzuYnXuXNzs99m7f2LTkYaTGs0GEcI="; + }; + + postPatch = '' + g++ contrib/easy-encryption/cl.cpp -o contrib/easy-encryption/bin/linux/ee64 #replacing prebuilt binary + ''; + + nativeBuildInputs = [ + gcc # required for replacing easy-encryption binary + cmake + clr + ]; + buildInputs = [ + # TODO: do we need anything here? + ]; + + cmakeFlags = [ + #TODO: mostly copied from the Arch package, verify these: + "-D CMAKE_BUILD_TYPE=Release" + "-D HIP_PATH=${clr}" + "-D BAKE_KERNEL=OFF" + "-D BAKE_COMPILED_KERNEL=OFF" + "-D BITCODE=ON" + "-D PRECOMPILE=ON" + "-D NO_UNITTEST=ON" + "-D FORCE_DISABLE_CUDA=ON" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-D CMAKE_INSTALL_BINDIR=bin" + "-D CMAKE_INSTALL_LIBDIR=lib" + "-D CMAKE_INSTALL_INCLUDEDIR=include" + ]; + + meta = { + homepage = "https://github.com/GPUOpen-LibrariesAndSDKs/HIPRT"; + description = ""; + license = lib.licenses.mit; + maintainers = with lib.maintainers; [ + mksafavi + ]; + platforms = lib.platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/hipsolver/default.nix b/pkgs/rocm-modules/hipsolver/default.nix new file mode 100644 index 0000000..981f591 --- /dev/null +++ b/pkgs/rocm-modules/hipsolver/default.nix @@ -0,0 +1,119 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + clr, + gfortran, + rocblas, + rocsolver, + rocsparse, + suitesparse, + gtest, + lapack-reference, + buildTests ? false, + buildBenchmarks ? false, + buildSamples ? false, +}: + +# Can also use cuSOLVER +stdenv.mkDerivation (finalAttrs: { + pname = "hipsolver"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ] + ++ lib.optionals buildSamples [ + "sample" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "hipSOLVER"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-4ig8/P7JQCx3WB+PRHlhSlRhzdbnDo8QrFnWxsxJdwk="; + }; + + nativeBuildInputs = [ + cmake + rocm-cmake + clr + gfortran + ]; + + buildInputs = + [ + rocblas + rocsolver + rocsparse + suitesparse + ] + ++ lib.optionals buildTests [ + gtest + ] + ++ lib.optionals (buildTests || buildBenchmarks) [ + lapack-reference + ]; + + cmakeFlags = + [ + "-DCMAKE_CXX_COMPILER=hipcc" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DBUILD_WITH_SPARSE=OFF" # FIXME: broken - can't find suitesparse/cholmod, looks fixed in master + ] + ++ lib.optionals buildTests [ + "-DBUILD_CLIENTS_TESTS=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_CLIENTS_BENCHMARKS=ON" + ] + ++ lib.optionals buildSamples [ + "-DBUILD_CLIENTS_SAMPLES=ON" + ]; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/hipsolver-test $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/hipsolver-bench $benchmark/bin + '' + + lib.optionalString buildSamples '' + mkdir -p $sample/bin + mv clients/staging/example-* $sample/bin + patchelf $sample/bin/example-* --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE" + '' + + lib.optionalString (buildTests || buildBenchmarks) '' + rmdir $out/bin + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "ROCm SOLVER marshalling library"; + homepage = "https://github.com/ROCm/hipSOLVER"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/hipsparse/default.nix b/pkgs/rocm-modules/hipsparse/default.nix new file mode 100644 index 0000000..c89bed1 --- /dev/null +++ b/pkgs/rocm-modules/hipsparse/default.nix @@ -0,0 +1,152 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + rocsparse, + clr, + gfortran, + git, + gtest, + openmp, + buildTests ? false, + buildBenchmarks ? false, + buildSamples ? false, + gpuTargets ? [ ], +}: + +# This can also use cuSPARSE as a backend instead of rocSPARSE +stdenv.mkDerivation (finalAttrs: { + pname = "hipsparse"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildSamples [ + "sample" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "hipSPARSE"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-pRR/3t7YXgtPQwGFb5lA6DI2OTF6AnDcfkydRIEod2Q="; + }; + + nativeBuildInputs = [ + cmake + rocm-cmake + clr + gfortran + ]; + + buildInputs = + [ + rocsparse + git + ] + ++ lib.optionals (buildTests || buildBenchmarks) [ + gtest + ] + ++ lib.optionals (buildTests || buildSamples) [ + openmp + ]; + + cmakeFlags = + [ + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + (lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests) + (lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks) + (lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildSamples) + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ]; + + # We have to manually generate the matrices + # CMAKE_MATRICES_DIR seems to be reset in clients/tests/CMakeLists.txt + postPatch = lib.optionalString buildTests '' + mkdir -p matrices + + ln -s ${rocsparse.passthru.matrices.matrix-01}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-02}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-03}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-04}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-05}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-06}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-07}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-08}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-09}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-10}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-11}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-12}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-13}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-14}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-15}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-16}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-17}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-18}/*.mtx matrices + ln -s ${rocsparse.passthru.matrices.matrix-19}/*.mtx matrices + + # Not used by the original cmake, causes an error + rm matrices/*_b.mtx + + echo "deps/convert.cpp -> deps/mtx2csr" + hipcc deps/convert.cpp -O3 -o deps/mtx2csr + + for mat in $(ls -1 matrices | cut -d "." -f 1); do + echo "mtx2csr: $mat.mtx -> $mat.bin" + deps/mtx2csr matrices/$mat.mtx matrices/$mat.bin + unlink matrices/$mat.mtx + done + + substituteInPlace clients/tests/CMakeLists.txt \ + --replace "\''${PROJECT_BINARY_DIR}/matrices" "/build/source/matrices" + ''; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/hipsparse-test $test/bin + mv /build/source/matrices $test + rmdir $out/bin + '' + + lib.optionalString buildSamples '' + mkdir -p $sample/bin + mv clients/staging/example_* $sample/bin + patchelf --set-rpath $out/lib:${ + lib.makeLibraryPath ( + finalAttrs.buildInputs + ++ [ + clr + gfortran.cc + ] + ) + } $sample/bin/example_* + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "ROCm SPARSE marshalling library"; + homepage = "https://github.com/ROCm/hipSPARSE"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/hsa-amd-aqlprofile-bin/default.nix b/pkgs/rocm-modules/hsa-amd-aqlprofile-bin/default.nix new file mode 100644 index 0000000..68d7a33 --- /dev/null +++ b/pkgs/rocm-modules/hsa-amd-aqlprofile-bin/default.nix @@ -0,0 +1,53 @@ +{ + lib, + stdenv, + fetchurl, + callPackage, + dpkg, + rocm-core, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "hsa-amd-aqlprofile-bin"; + version = "6.3.3"; + + src = + let + inherit (finalAttrs) version; + patch = rocm-core.ROCM_LIBPATCH_VERSION; + majorMinor = lib.versions.majorMinor version; + poolVersion = if majorMinor + ".0" == version then majorMinor else version; + incremental = "74"; + osRelease = "22.04"; + in + fetchurl { + url = "https://repo.radeon.com/rocm/apt/${poolVersion}/pool/main/h/hsa-amd-aqlprofile/hsa-amd-aqlprofile_1.0.0.${patch}-${incremental}~${osRelease}_amd64.deb"; + hash = "sha256-Lo6gU9ywkujtsKvnOAwL3L8qQNPwjjm0Pm4OyzoUYao="; + }; + + nativeBuildInputs = [ dpkg ]; + dontPatch = true; + dontConfigure = true; + dontBuild = true; + + installPhase = '' + runHook preInstall + + mkdir -p $out + cp -a opt/rocm-${finalAttrs.version}*/* $out + chmod +x $out/lib/libhsa-amd-aqlprofile64.so.1.* + chmod +x $out/lib/hsa-amd-aqlprofile/librocprofv2_att.so + + runHook postInstall + ''; + + passthru.updateScript = (callPackage ./update.nix { }) { inherit (finalAttrs) version; }; + + meta = with lib; { + description = "AQLPROFILE library for AMD HSA runtime API extension support"; + homepage = "https://rocm.docs.amd.com/en/latest/"; + license = with licenses; [ unfree ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/hsa-amd-aqlprofile-bin/update.nix b/pkgs/rocm-modules/hsa-amd-aqlprofile-bin/update.nix new file mode 100644 index 0000000..ed834c8 --- /dev/null +++ b/pkgs/rocm-modules/hsa-amd-aqlprofile-bin/update.nix @@ -0,0 +1,61 @@ +{ + lib, + writeScript, +}: + +{ version }: + +let + prefix = "hsa-amd-aqlprofile"; + extVersion = lib.strings.concatStrings ( + lib.strings.intersperse "0" (lib.versions.splitVersion version) + ); + major = lib.versions.major version; + minor = lib.versions.minor version; + patch = lib.versions.patch version; + + updateScript = writeScript "update.sh" '' + #!/usr/bin/env nix-shell + #!nix-shell -i bash -p curl common-updater-scripts + apt="https://repo.radeon.com/rocm/apt" + pool="pool/main/h/${prefix}/" + url="$apt/latest/$pool" + res="$(curl -sL "$url")" + deb="${prefix}$(echo "$res" | grep -o -P "(?<=href=\"${prefix}).*(?=\">)" | tail -1)" + patch="${patch}" + + # Try up to 10 patch versions + for i in {1..10}; do + ((patch++)) + extVersion="$(echo "$deb" | grep -o -P "(?<=\.....).*(?=\..*-)")" + + if (( ''${#extVersion} == 6 )) && (( $extVersion <= ${extVersion} )); then + url="https://repo.radeon.com/rocm/apt/${major}.${minor}.$patch/pool/main/h/${prefix}/" + res="$(curl -sL "$url")" + deb="${prefix}$(echo "$res" | grep -o -P "(?<=href=\"${prefix}).*(?=\">)" | tail -1)" + else + break + fi + done + + extVersion="$(echo $deb | grep -o -P "(?<=\.....).*(?=\..*-)")" + version="$(echo $extVersion | sed "s/0/./1" | sed "s/0/./1")" + IFS='.' read -a version_arr <<< "$version" + + if (( ''${version_arr[0]} > 6 )); then + echo "'rocmPackages_6.${prefix}-bin' is already at it's maximum allowed version.''\nAny further upgrades should go into 'rocmPackages_X.${prefix}-bin'." 1>&2 + exit 1 + fi + + if (( ''${#extVersion} == 6 )); then + repoVersion="$version" + + if (( ''${version:4:1} == 0 )); then + repoVersion=''${version:0:3} + fi + + update-source-version rocmPackages_6.${prefix}-bin "$version" "" "$apt/$repoVersion/$pool$deb" --ignore-same-hash + fi + ''; +in +[ updateScript ] diff --git a/pkgs/rocm-modules/llvm/96cbfymn788ssbhmay4sy7h268qg81fl-gnu-install-dirs.patch b/pkgs/rocm-modules/llvm/96cbfymn788ssbhmay4sy7h268qg81fl-gnu-install-dirs.patch new file mode 100644 index 0000000..91487f2 --- /dev/null +++ b/pkgs/rocm-modules/llvm/96cbfymn788ssbhmay4sy7h268qg81fl-gnu-install-dirs.patch @@ -0,0 +1,70 @@ +diff --git a/cmake/modules/AddClang.cmake b/cmake/modules/AddClang.cmake +index 75b0080f6..c895b884c 100644 +--- a/cmake/modules/AddClang.cmake ++++ b/cmake/modules/AddClang.cmake +@@ -119,8 +119,8 @@ macro(add_clang_library name) + install(TARGETS ${lib} + COMPONENT ${lib} + ${export_to_clangtargets} +- LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} +- ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} ++ LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}" ++ ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}" + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") + + if (NOT LLVM_ENABLE_IDE) +diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt +index e6ae4e19e..5ef01aea2 100644 +--- a/lib/Headers/CMakeLists.txt ++++ b/lib/Headers/CMakeLists.txt +@@ -337,6 +337,7 @@ set(llvm_libc_wrapper_files + + include(GetClangResourceDir) + get_clang_resource_dir(output_dir PREFIX ${LLVM_LIBRARY_OUTPUT_INTDIR}/.. SUBDIR include) ++set(header_install_dir ${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION_MAJOR}/include) + set(out_files) + set(generated_files) + +diff --git a/tools/libclang/CMakeLists.txt b/tools/libclang/CMakeLists.txt +index b5b6d2807..6b592d255 100644 +--- a/tools/libclang/CMakeLists.txt ++++ b/tools/libclang/CMakeLists.txt +@@ -246,7 +246,7 @@ foreach(PythonVersion ${CLANG_PYTHON_BINDINGS_VERSIONS}) + COMPONENT + libclang-python-bindings + DESTINATION +- "lib${LLVM_LIBDIR_SUFFIX}/python${PythonVersion}/site-packages") ++ "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}/python${PythonVersion}/site-packages") + endforeach() + if(NOT LLVM_ENABLE_IDE) + add_custom_target(libclang-python-bindings) +diff --git a/tools/scan-build-py/CMakeLists.txt b/tools/scan-build-py/CMakeLists.txt +index 3aca22c0b..3115353e3 100644 +--- a/tools/scan-build-py/CMakeLists.txt ++++ b/tools/scan-build-py/CMakeLists.txt +@@ -88,7 +88,7 @@ foreach(lib ${LibScanbuild}) + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libscanbuild/${lib}) + list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libscanbuild/${lib}) + install(FILES lib/libscanbuild/${lib} +- DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libscanbuild ++ DESTINATION "${CMAKE_INSTALL_LIBDIR}/libscanbuild" + COMPONENT scan-build-py) + endforeach() + +@@ -106,7 +106,7 @@ foreach(resource ${LibScanbuildResources}) + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libscanbuild/resources/${resource}) + list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libscanbuild/resources/${resource}) + install(FILES lib/libscanbuild/resources/${resource} +- DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libscanbuild/resources ++ DESTINATION "${CMAKE_INSTALL_LIBDIR}/libscanbuild/resources" + COMPONENT scan-build-py) + endforeach() + +@@ -122,7 +122,7 @@ foreach(lib ${LibEar}) + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libear/${lib}) + list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libear/${lib}) + install(FILES lib/libear/${lib} +- DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libear ++ DESTINATION "${CMAKE_INSTALL_LIBDIR}/libear" + COMPONENT scan-build-py) + endforeach() diff --git a/pkgs/rocm-modules/llvm/clang-bodge-ignore-systemwide-incls.diff b/pkgs/rocm-modules/llvm/clang-bodge-ignore-systemwide-incls.diff new file mode 100644 index 0000000..5f44dbe --- /dev/null +++ b/pkgs/rocm-modules/llvm/clang-bodge-ignore-systemwide-incls.diff @@ -0,0 +1,23 @@ +diff --git a/lib/Driver/ToolChains/Linux.cpp b/lib/Driver/ToolChains/Linux.cpp +index 57368104c914..71c57f72078e 100644 +--- a/lib/Driver/ToolChains/Linux.cpp ++++ b/lib/Driver/ToolChains/Linux.cpp +@@ -640,6 +640,7 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, + return; + + // LOCAL_INCLUDE_DIR ++ if (!SysRoot.empty()) + addSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/local/include")); + // TOOL_INCLUDE_DIR + AddMultilibIncludeArgs(DriverArgs, CC1Args); +@@ -672,8 +673,10 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, + // Add an include of '/include' directly. This isn't provided by default by + // system GCCs, but is often used with cross-compiling GCCs, and harmless to + // add even when Clang is acting as-if it were a system compiler. ++ if (!SysRoot.empty()) + addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/include")); + ++ if (!SysRoot.empty()) + addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/include")); + + if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && getTriple().isMusl()) diff --git a/pkgs/rocm-modules/llvm/clang-log-jobs.diff b/pkgs/rocm-modules/llvm/clang-log-jobs.diff new file mode 100644 index 0000000..3a3a712 --- /dev/null +++ b/pkgs/rocm-modules/llvm/clang-log-jobs.diff @@ -0,0 +1,40 @@ +diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp +index 06f5e7e7e335..8407d664886a 100644 +--- a/lib/Driver/Compilation.cpp ++++ b/lib/Driver/Compilation.cpp +@@ -340,6 +340,9 @@ private: + void Compilation::ExecuteJobs(const JobList &Jobs, + FailingCommandList &FailingCommands, + bool LogOnly) const { ++ // If >1 job, log as each job finishes so can see progress while building many offloads ++ const bool logJobs = Jobs.size() > 1; ++ auto start_time = std::chrono::steady_clock::now(); + // According to UNIX standard, driver need to continue compiling all the + // inputs on the command line even one of them failed. + // In all but CLMode, execute all the jobs unless the necessary inputs for the +@@ -364,11 +367,25 @@ void Compilation::ExecuteJobs(const JobList &Jobs, + + JS.setJobState(Next, JobScheduler::JS_RUN); + auto Work = [&, Next]() { ++ auto job_start_time = std::chrono::steady_clock::now(); + const Command *FailingCommand = nullptr; + if (int Res = ExecuteCommand(*Next, FailingCommand, LogOnly)) { + FailingCommands.push_back(std::make_pair(Res, FailingCommand)); + JS.setJobState(Next, JobScheduler::JS_FAIL); + } else { ++ if (logJobs && Next) { ++ auto now = std::chrono::steady_clock::now(); ++ auto job_duration = std::chrono::duration_cast(now - job_start_time).count(); ++ auto duration = std::chrono::duration_cast(now - start_time).count(); ++ if (duration > 10 && job_duration > 0) { ++ if (Next->getOutputFilenames().empty()) ++ if (Next->getExecutable()) llvm::errs() << "Job completed: " << Next->getExecutable() << "\n"; ++ else (llvm::errs() << "Job completed: "), Next->Print(llvm::errs(), "\n", true); ++ else ++ llvm::errs() << "Job completed: " << Next->getOutputFilenames().front().c_str() << "\n"; ++ } ++ } ++ + JS.setJobState(Next, JobScheduler::JS_DONE); + } + }; diff --git a/pkgs/rocm-modules/llvm/default.nix b/pkgs/rocm-modules/llvm/default.nix new file mode 100644 index 0000000..0ab9c2d --- /dev/null +++ b/pkgs/rocm-modules/llvm/default.nix @@ -0,0 +1,570 @@ +{ + lib, + stdenv, + llvmPackages_19, + overrideCC, + rocm-device-libs, + rocm-runtime, + fetchFromGitHub, + runCommand, + symlinkJoin, + rdfind, + wrapBintoolsWith, + emptyDirectory, + zstd, + zlib, + gcc-unwrapped, + glibc, + replaceVars, + libffi, + libxml2, + removeReferencesTo, + fetchpatch, + writeShellScript, + makeWrapper, + # Build compilers and stdenv suitable for profiling + # compressed line tables (-g1 -gz) and + # frame pointers for sampling profilers (-fno-omit-frame-pointer -momit-leaf-frame-pointer) + # TODO: Should also apply to downstream packages which use rocmClangStdenv + profilableStdenv ? false, +}: + +let + llvmPackagesNoBintools = llvmPackages_19.override { + bootBintools = null; + bootBintoolsNoLibc = null; + }; + useLibcxx = false; # whether rocm stdenv uses libcxx (clang c++ stdlib) instead of gcc stdlibc++ + + llvmStdenv = overrideCC llvmPackagesNoBintools.libcxxStdenv llvmPackagesNoBintools.clangUseLLVM; + llvmLibstdcxxStdenv = overrideCC llvmPackagesNoBintools.stdenv ( + llvmPackagesNoBintools.libstdcxxClang.override { + inherit (llvmPackages_19) bintools; + } + ); + stdenvToBuildRocmLlvm = if useLibcxx then llvmStdenv else llvmLibstdcxxStdenv; + gcc-include = runCommand "gcc-include" { } '' + mkdir -p $out + ln -s ${gcc-unwrapped}/include/ $out/ + ln -s ${gcc-unwrapped}/lib/ $out/ + ''; + + # A prefix for use as the GCC prefix when building rocmcxx + disallowedRefsForToolchain = [ + stdenv.cc + stdenv.cc.cc + stdenv.cc.bintools + gcc-unwrapped + stdenvToBuildRocmLlvm + ]; + gcc-prefix = + let + gccPrefixPaths = [ + gcc-unwrapped + gcc-unwrapped.lib + glibc.dev + ]; + in + symlinkJoin { + name = "gcc-prefix"; + paths = gccPrefixPaths ++ [ + glibc + ]; + disallowedRequisites = gccPrefixPaths; + postBuild = '' + rm -rf $out/{bin,libexec,nix-support,lib64,share,etc} + rm $out/lib/gcc/x86_64-unknown-linux-gnu/*/plugin/include/auto-host.h + + mkdir /build/tmpout + mv $out/* /build/tmpout + cp -Lr --no-preserve=mode /build/tmpout/* $out/ + set -x + versionedIncludePath="$(echo $out/include/c++/*/)" + mv $versionedIncludePath/* $out/include/c++/ + rm -rf $versionedIncludePath/ + + find $out/lib -type f -exec ${removeReferencesTo}/bin/remove-references-to -t ${gcc-unwrapped.lib} {} + + + ln -s $out $out/x86_64-unknown-linux-gnu + ''; + }; + version = "6.4.1"; + # major version of this should be the clang version ROCm forked from + rocmLlvmVersion = "19.0.0-${llvmSrc.rev}"; + usefulOutputs = + drv: + builtins.filter (x: x != null) [ + drv + (drv.lib or null) + (drv.dev or null) + ]; + listUsefulOutputs = builtins.concatMap usefulOutputs; + # llvmSrc = fetchFromGitHub { + # # Performance improvements cherry-picked on top of rocm-6.3.x + # # most importantly, amdgpu-early-alwaysinline memory usage fix + # owner = "LunNova"; + # repo = "llvm-project-rocm"; + # rev = "4182046534deb851753f0d962146e5176f648893"; + # hash = "sha256-sPmYi1WiiAqnRnHVNba2nPUxGflBC01FWCTNLPlYF9c="; + # }; + llvmSrc = fetchFromGitHub { + owner = "ROCm"; + repo = "llvm-project"; + # rev = "873e9660026931bbd2cbce41475090039f81f8c7"; + # hash = "sha256-7B4NQ1LBN3btHjrh9Ht2S+BYYqhKNPAwMoP18qYJw4E="; + # hash = "sha256-cIkKG5rB34G+AqonOS76acnhynmI29PIW7TuE0SQzO4="; + # hash = "sha256-DtbPZ75KqfPqPsGwmT1sUyeI1HyICDDs3SxwQQl72BM="; + rev = "rocm-${version}"; + # hash = "sha256-h4mD6gu0Gt4zYme7qtlm9QpsqWZ6XoH+XKd3hsQly1I="; + # hash = "sha256-jJsmPainHOd4BJ0bQbf1M3Kd4+aLbx3ENxtuzJ9+lLY="; + # hash = "sha256-5n3EQby17JEgr3kh1pUNuo/La4hUxMf10O7CckVMS5U="; + hash = "sha256-84+ZsKjIhXip2yLU5jpoV53+ejxy2dzgamVU6AcAngU="; + # hash = "sha256-4b1d9a2c7f0e8c3f5b6d8c1e4f0b2c5f3a6b7c8d9e0f1a2b3c4d5e6f7g8h9i0j"; + }; + llvmSrcFixed = llvmSrc; + llvmMajorVersion = lib.versions.major rocmLlvmVersion; + # An llvmPackages (pkgs/development/compilers/llvm/) built from ROCm LLVM's source tree + # optionally using LLVM libcxx + llvmPackagesRocm = llvmPackages_19.override (_old: { + stdenv = stdenvToBuildRocmLlvm; # old.stdenv #llvmPackagesNoBintools.libcxxStdenv; + + # not setting gitRelease = because that causes patch selection logic to use git patches + # ROCm LLVM is closer to 18 official + # gitRelease = { + # rev-version = rocmLlvmVersion; + # }; + # gitRelease = null; + # officialRelease = null; + officialRelease = { }; # Set but empty because we're overriding everything from it. + version = rocmLlvmVersion; + src = llvmSrcFixed; + monorepoSrc = llvmSrcFixed; + doCheck = false; + }); + sysrootCompiler = + cc: name: paths: + let + linked = symlinkJoin { inherit name paths; }; + in + runCommand name { } '' + set -x + mkdir -p $out/ + cp --reflink=auto -rL ${linked}/* $out/ + chmod -R +rw $out + mkdir -p $out/usr + ln -s $out/ $out/usr/local + mkdir -p $out/nix-support/ + rm -rf $out/lib64 # we don't need mixed 32 bit + echo 'export CC=clang' >> $out/nix-support/setup-hook + echo 'export CXX=clang++' >> $out/nix-support/setup-hook + mkdir -p $out/lib/clang/${llvmMajorVersion}/lib/linux/ + ln -s $out/lib/linux/libclang_rt.* $out/lib/clang/${llvmMajorVersion}/lib/linux/ + file $out/bin/.clang-wrapped + file $out/bin/.clang++-wrapped + find $out -type f -exec sed -i "s|${cc.out}|$out|g" {} + + find $out -type f -exec sed -i "s|${cc.dev}|$out|g" {} + + + file $out/bin/.clang-wrapped + file $out/bin/.clang++-wrapped + # our /include now has more than clang expects, so this specific dir still needs to point to cc.dev + # FIXME: could copy into a different subdir? + sed -i 's|set(CLANG_INCLUDE_DIRS.*$|set(CLANG_INCLUDE_DIRS "${cc.dev}/include")|g' $out/lib/cmake/clang/ClangConfig.cmake + # ${lib.getExe rdfind} -makesymlinks true $out/ # create links *within* the sysroot to save space + ''; + findClangNostdlibincPatch = + x: + ( + (lib.strings.hasSuffix "add-nostdlibinc-flag.patch" (builtins.baseNameOf x)) + || (lib.strings.hasSuffix "clang-at-least-16-LLVMgold-path.patch" (builtins.baseNameOf x)) + ); + llvmTargetsFlag = "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${ + { + "x86_64" = "X86"; + "aarch64" = "AArch64"; + } + .${llvmStdenv.targetPlatform.parsed.cpu.name} + }"; + # -ffat-lto-objects = emit LTO object files that are compatible with non-LTO-supporting builds too + # FatLTO objects are a special type of fat object file that contain LTO compatible IR in addition to generated object code, + # instead of containing object code for multiple target architectures. This allows users to defer the choice of whether to + # use LTO or not to link-time, and has been a feature available in other compilers, like GCC, for some time. + + tablegenUsage = x: !(lib.strings.hasInfix "llvm-tblgen" x); + addGccLtoCmakeFlags = !llvmPackagesRocm.stdenv.cc.isClang; + llvmExtraCflags = + "-O3 -DNDEBUG -march=skylake -mtune=znver3" + + (lib.optionalString addGccLtoCmakeFlags " -D_GLIBCXX_USE_CXX11_ABI=0 -flto -ffat-lto-objects -flto-compression-level=19 -Wl,-flto") + + (lib.optionalString llvmPackagesRocm.stdenv.cc.isClang " -flto=thin -ffat-lto-objects") + + (lib.optionalString profilableStdenv " -fno-omit-frame-pointer -momit-leaf-frame-pointer -gz -g1"); +in +rec { + inherit llvmSrc; + inherit (llvmPackagesRocm) libunwind; + inherit (llvmPackagesRocm) libcxx; + # Pass through original attrs for debugging where non-overridden llvm/clang is getting used + # llvm-orig = llvmPackagesRocm.llvm; # nix why-depends --derivation .#rocmPackages.clr .#rocmPackages.llvm.llvm-orig + # clang-orig = llvmPackagesRocm.clang; # nix why-depends --derivation .#rocmPackages.clr .#rocmPackages.llvm.clang-orig + llvm = (llvmPackagesRocm.llvm.override { ninja = emptyDirectory; }).overrideAttrs (old: { + patches = old.patches ++ [ + ./rocm-llvm-6.4-llvm-gold-plugin-fix-ModuleName.patch + ]; + # patches = builtins.filter ( + # x: + # ( + # !(lib.strings.hasSuffix "gnu-install-dirs.patch" (builtins.baseNameOf x)) + # && !(lib.strings.hasSuffix "gnu-install-dirs-polly.patch" (builtins.baseNameOf x)) + # ) + # ) old.patches; + dontStrip = profilableStdenv; + nativeBuildInputs = old.nativeBuildInputs ++ [ removeReferencesTo ]; + buildInputs = old.buildInputs ++ [ + zstd + zlib + ]; + env.NIX_BUILD_ID_STYLE = "fast"; + postPatch = '' + ${old.postPatch or ""} + patchShebangs lib/OffloadArch/make_generated_offload_arch_h.sh + ''; + LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib"; + cmakeFlags = + (builtins.filter tablegenUsage old.cmakeFlags) + ++ [ + llvmTargetsFlag + "-DCMAKE_BUILD_TYPE=Release" + "-DLLVM_ENABLE_ZSTD=FORCE_ON" + "-DLLVM_ENABLE_ZLIB=FORCE_ON" + "-DLLVM_ENABLE_THREADS=ON" + "-DLLVM_ENABLE_LTO=Thin" + "-DLLVM_USE_LINKER=lld" + (lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx) + "-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}" + ] + ++ lib.optionals addGccLtoCmakeFlags [ + "-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar" + "-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib" + "-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm" + ] + ++ lib.optionals useLibcxx [ + "-DLLVM_ENABLE_LTO=Thin" + "-DLLVM_USE_LINKER=lld" + "-DLLVM_ENABLE_LIBCXX=ON" + ]; + preConfigure = '' + ${old.preConfigure or ""} + cmakeFlagsArray+=( + '-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}' + '-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}' + ) + ''; + # Ensure we don't leak refs to compiler that was used to bootstrap this LLVM + disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain; + postFixup = '' + ${old.postFixup or ""} + remove-references-to -t "${stdenv.cc}" "$lib/lib/libLLVMSupport.a" + find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} + + find $lib -type f -exec remove-references-to -t ${stdenvToBuildRocmLlvm.cc} {} + + find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} + + ''; + doCheck = false; + }); + lld = + (llvmPackagesRocm.lld.override { + libllvm = llvm; + ninja = emptyDirectory; + }).overrideAttrs + (old: { + patches = builtins.filter ( + x: !(lib.strings.hasSuffix "more-openbsd-program-headers.patch" (builtins.baseNameOf x)) + ) old.patches; + dontStrip = profilableStdenv; + nativeBuildInputs = old.nativeBuildInputs ++ [ + llvmPackagesNoBintools.lld + removeReferencesTo + ]; + buildInputs = old.buildInputs ++ [ + zstd + zlib + ]; + env.NIX_BUILD_ID_STYLE = "fast"; + LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib"; + cmakeFlags = + (builtins.filter tablegenUsage old.cmakeFlags) + ++ [ + llvmTargetsFlag + "-DCMAKE_BUILD_TYPE=Release" + "-DLLVM_ENABLE_ZSTD=FORCE_ON" + "-DLLVM_ENABLE_ZLIB=FORCE_ON" + "-DLLVM_ENABLE_THREADS=ON" + "-DLLVM_ENABLE_LTO=Thin" + "-DLLVM_USE_LINKER=lld" + (lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx) + "-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}" + ] + ++ lib.optionals addGccLtoCmakeFlags [ + "-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar" + "-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib" + "-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm" + ] + ++ lib.optionals useLibcxx [ + "-DLLVM_ENABLE_LIBCXX=ON" + ]; + # Ensure we don't leak refs to compiler that was used to bootstrap this LLVM + disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain; + postFixup = '' + ${old.postFixup or ""} + find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} + + find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} + + ''; + preConfigure = '' + ${old.preConfigure or ""} + cmakeFlagsArray+=( + '-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}' + '-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}' + ) + ''; + }); + clang-unwrapped = + ( + (llvmPackagesRocm.clang-unwrapped.override { + libllvm = llvm; + ninja = emptyDirectory; + }).overrideAttrs + ( + old: + let + # filteredPatches = builtins.filter (x: !(findClangNostdlibincPatch x)) old.patches; + filteredPatches = builtins.filter ( + x: + ( + !(lib.strings.hasSuffix "gnu-install-dirs.patch" (builtins.baseNameOf x)) + # && !(lib.strings.hasSuffix "gnu-install-dirs-polly.patch" (builtins.baseNameOf x)) + && !(findClangNostdlibincPatch x) + ) + ) old.patches; + in + { + meta.platforms = [ + "x86_64-linux" + ]; + pname = "${old.pname}-rocm"; + patches = filteredPatches ++ [ + ./96cbfymn788ssbhmay4sy7h268qg81fl-gnu-install-dirs.patch + # Never add FHS include paths + ./clang-bodge-ignore-systemwide-incls.diff + # Prevents builds timing out if a single compiler invocation is very slow but + # per-arch jobs are completing by ensuring there's terminal output + ./clang-log-jobs.diff + (fetchpatch { + # [ClangOffloadBundler]: Add GetBundleIDsInFile to OffloadBundler + sha256 = "sha256-G/mzUdFfrJ2bLJgo4+mBcR6Ox7xGhWu5X+XxT4kH2c8="; + url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/6d296f879b0fed830c54b2a9d26240da86c8bb3a.patch"; + relative = "clang"; + }) + # FIXME: Needed due to https://github.com/NixOS/nixpkgs/issues/375431 + # Once we can switch to overrideScope this can be removed + # (replaceVars ./../../../compilers/llvm/common/clang/clang-at-least-16-LLVMgold-path.patch { + # libllvmLibdir = "${llvm.lib}/lib"; + # }) + ]; + nativeBuildInputs = old.nativeBuildInputs ++ [ + llvmPackagesNoBintools.lld + removeReferencesTo + ]; + buildInputs = old.buildInputs ++ [ + zstd + zlib + ]; + dontStrip = profilableStdenv; + LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib"; + env = (old.env or { }) // { + NIX_BUILD_ID_STYLE = "fast"; + }; + # Ensure we don't leak refs to compiler that was used to bootstrap this LLVM + disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain; + requiredSystemFeatures = (old.requiredSystemFeatures or [ ]) ++ [ "big-parallel" ]; + # https://github.com/llvm/llvm-project/blob/6976deebafa8e7de993ce159aa6b82c0e7089313/clang/cmake/caches/DistributionExample-stage2.cmake#L9-L11 + cmakeFlags = + (builtins.filter tablegenUsage old.cmakeFlags) + ++ [ + llvmTargetsFlag + "-DCMAKE_BUILD_TYPE=Release" + "-DLLVM_ENABLE_ZSTD=FORCE_ON" + "-DLLVM_ENABLE_ZLIB=FORCE_ON" + "-DLLVM_ENABLE_THREADS=ON" + "-DLLVM_ENABLE_LTO=Thin" + "-DLLVM_USE_LINKER=lld" + (lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx) + "-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}" + ] + ++ lib.optionals addGccLtoCmakeFlags [ + "-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar" + "-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib" + "-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm" + ] + ++ lib.optionals useLibcxx [ + "-DLLVM_ENABLE_LTO=Thin" + "-DLLVM_ENABLE_LIBCXX=ON" + "-DLLVM_USE_LINKER=lld" + "-DCLANG_DEFAULT_RTLIB=compiler-rt" + ]; + # ++ lib.optionals (!useLibcxx) [ + # # FIXME: Config file in rocmcxx instead of GCC_INSTALL_PREFIX? + # "-DGCC_INSTALL_PREFIX=${gcc-prefix}" + # ]; + postFixup = + (old.postFixup or "") + + '' + find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} + + find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} + + ''; + preConfigure = + (old.preConfigure or "") + + '' + cmakeFlagsArray+=( + '-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}' + '-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}' + ) + ''; + postInstall = + (old.postInstall or "") + + '' + echo "--gcc-toolchain=${gcc-prefix}" > $out/bin/clang.cfg + echo "--gcc-toolchain=${gcc-prefix}" > $out/bin/clang++.cfg + ''; + } + ) + ) + // { + libllvm = llvm; + }; + # A clang that understands standard include searching in a GNU sysroot and will put GPU libs in include path + # in the right order + # and expects its libc to be in the sysroot + rocmcxx = + (sysrootCompiler clang-unwrapped "rocmcxx" ( + listUsefulOutputs ( + [ + clang-unwrapped + bintools + compiler-rt + ] + ++ (lib.optionals useLibcxx [ + libcxx + ]) + ++ (lib.optionals (!useLibcxx) [ + gcc-include + glibc + glibc.dev + ]) + ) + )) + // { + version = llvmMajorVersion; + cc = rocmcxx; + libllvm = llvm; + isClang = true; + isGNU = false; + + }; + clang-tools = llvmPackagesRocm.clang-tools.override { + inherit clang-unwrapped clang; + }; + compiler-rt-libc = llvmPackagesRocm.compiler-rt-libc.overrideAttrs (old: { + patches = old.patches ++ [ + (fetchpatch { + name = "avoid-overload-ambiguity-for-interceptors.patch"; + url = "https://github.com/ROCm/llvm-project/commit/155b7a12820ec45095988b6aa6e057afaf2bc892.patch"; + hash = "sha256-pgpN1q1vIQrPXHPxNSZ6zfgV2EflHO5Amzl+2BDjXbs="; + relative = "compiler-rt"; + }) + ]; + }); + compiler-rt = compiler-rt-libc; + bintools = wrapBintoolsWith { + bintools = llvmPackagesRocm.bintools-unwrapped.override { + inherit lld llvm; + }; + }; + + clang = rocmcxx; + + # Emulate a monolithic ROCm LLVM build to support building ROCm's in-tree LLVM projects + rocm-merged-llvm = symlinkJoin { + name = "rocm-llvm-merge"; + paths = + [ + llvm + llvm.dev + lld + lld.lib + lld.dev + libunwind + libunwind.dev + compiler-rt + compiler-rt.dev + rocmcxx + ] + ++ lib.optionals useLibcxx [ + libcxx + libcxx.out + libcxx.dev + ]; + postBuild = builtins.unsafeDiscardStringContext '' + found_files=$(find $out -name '*.cmake') + if [ -z "$found_files" ]; then + >&2 echo "Error: No CMake files found in $out" + exit 1 + fi + + for target in ${clang-unwrapped.out} ${clang-unwrapped.lib} ${clang-unwrapped.dev}; do + if grep "$target" $found_files; then + >&2 echo "Unexpected ref to $target (clang-unwrapped) found" + # exit 1 + # # FIXME: enable this to reduce closure size + fi + done + ''; + inherit version; + llvm-src = llvmSrc; + }; + + rocmClangStdenv = overrideCC ( + if useLibcxx then llvmPackagesRocm.libcxxStdenv else llvmPackagesRocm.stdenv + ) clang; + + # Projects + openmp = + (llvmPackagesRocm.openmp.override { + stdenv = rocmClangStdenv; + llvm = rocm-merged-llvm; + targetLlvm = rocm-merged-llvm; + clang-unwrapped = clang; + }).overrideAttrs + (old: { + disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain; + nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ removeReferencesTo ]; + cmakeFlags = + old.cmakeFlags + ++ [ + "-DDEVICELIBS_ROOT=${rocm-device-libs.src}" + # OMPD support is broken in ROCm 6.3. Haven't investigated why. + "-DLIBOMP_OMPD_SUPPORT:BOOL=FALSE" + "-DLIBOMP_OMPD_GDB_SUPPORT:BOOL=FALSE" + ] + ++ lib.optionals addGccLtoCmakeFlags [ + "-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar" + "-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib" + ]; + env.LLVM = "${rocm-merged-llvm}"; + env.LLVM_DIR = "${rocm-merged-llvm}"; + buildInputs = old.buildInputs ++ [ + rocm-device-libs + rocm-runtime + zlib + zstd + libxml2 + libffi + ]; + }); +} diff --git a/pkgs/rocm-modules/llvm/rocm-llvm-6.4-llvm-gold-plugin-fix-ModuleName.patch b/pkgs/rocm-modules/llvm/rocm-llvm-6.4-llvm-gold-plugin-fix-ModuleName.patch new file mode 100644 index 0000000..6dd51eb --- /dev/null +++ b/pkgs/rocm-modules/llvm/rocm-llvm-6.4-llvm-gold-plugin-fix-ModuleName.patch @@ -0,0 +1,28 @@ +From 6c2872afcd9ae8e313621eb6cb7f407e89097304 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Sun, 13 Apr 2025 07:41:34 -0700 +Subject: [PATCH] rocm-llvm: gold-plugin: fix ModuleName + +--- + llvm/tools/gold/gold-plugin.cpp | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp +index 0d4ca5299689..dd577206408c 100644 +--- a/tools/gold/gold-plugin.cpp ++++ b/tools/gold/gold-plugin.cpp +@@ -1100,10 +1100,8 @@ static std::vector, bool>> runLTO() { + }; + + auto AddBuffer = [&](size_t Task, const Twine &moduleName, +- std::unique_ptr MB) { +- auto Stream = *AddStream(Task, ModuleName); +- Stream->OS << MB->getBuffer(); +- check(Stream->commit(), "Failed to commit cache"); ++ std::unique_ptr MB) { ++ *AddStream(Task, moduleName)->OS << MB->getBuffer(); + }; + + FileCache Cache; +-- +2.48.1 diff --git a/pkgs/rocm-modules/migraphx/default.nix b/pkgs/rocm-modules/migraphx/default.nix new file mode 100644 index 0000000..ffb3ccf --- /dev/null +++ b/pkgs/rocm-modules/migraphx/default.nix @@ -0,0 +1,194 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + pkg-config, + cmake, + rocm-cmake, + clr, + openmp, + rocblas, + hipblas-common, + hipblas, + hipblaslt, + rocmlir, + miopen, + protobuf, + abseil-cpp, + half, + nlohmann_json, + msgpack, + sqlite, + oneDNN_2, + blaze, + texliveSmall, + doxygen, + sphinx, + docutils, + ghostscript, + python3Packages, + buildDocs ? false, + buildTests ? false, + gpuTargets ? clr.gpuTargets, +}: + +let + latex = lib.optionalAttrs buildDocs ( + texliveSmall.withPackages ( + ps: with ps; [ + latexmk + tex-gyre + fncychap + wrapfig + capt-of + framed + needspace + tabulary + varwidth + titlesec + epstopdf + ] + ) + ); +in +stdenv.mkDerivation (finalAttrs: { + pname = "migraphx"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildDocs [ + "doc" + ] + ++ lib.optionals buildTests [ + "test" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "AMDMIGraphX"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-PytBEnLBHyp0JzkSLbLRHslqGBk4mabKC62JZoXwHxE="; + }; + + nativeBuildInputs = + [ + pkg-config + cmake + rocm-cmake + clr + python3Packages.python + ] + ++ lib.optionals buildDocs [ + latex + doxygen + sphinx + docutils + ghostscript + python3Packages.sphinx-rtd-theme + python3Packages.breathe + ]; + + buildInputs = [ + openmp + rocblas + hipblas-common + hipblas + hipblaslt + rocmlir + miopen + protobuf + half + nlohmann_json + msgpack + sqlite + oneDNN_2 + blaze + python3Packages.pybind11 + python3Packages.onnx + ]; + + LDFLAGS = "-Wl,--allow-shlib-undefined"; + + cmakeFlags = [ + "-DMIGRAPHX_ENABLE_GPU=ON" + "-DMIGRAPHX_ENABLE_CPU=ON" + "-DMIGRAPHX_ENABLE_FPGA=ON" + "-DMIGRAPHX_ENABLE_MLIR=OFF" # LLVM or rocMLIR mismatch? + "-DCMAKE_C_COMPILER=amdclang" + "-DCMAKE_CXX_COMPILER=amdclang++" + "-DCMAKE_VERBOSE_MAKEFILE=ON" + "-DEMBED_USE=CArrays" # Fixes error with lld + "-DDMIGRAPHX_ENABLE_PYTHON=ON" + "-DROCM_PATH=${clr}" + "-DHIP_ROOT_DIR=${clr}" + # migraphx relies on an incompatible fork of composable_kernel + # migraphxs relies on miopen which relies on current composable_kernel + # impossible to build with this ON; we can't link both of them even if we package both + "-DMIGRAPHX_USE_COMPOSABLEKERNEL=OFF" + "-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_omp_LIBRARY=${openmp}/lib" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ]; + + postPatch = + '' + export CXXFLAGS+=" -w -isystem${rocmlir}/include/rocmlir -I${half}/include -I${abseil-cpp}/include -I${hipblas-common}/include" + patchShebangs tools + + # `error: '__clang_hip_runtime_wrapper.h' file not found [clang-diagnostic-error]` + substituteInPlace CMakeLists.txt \ + --replace "set(MIGRAPHX_TIDY_ERRORS ALL)" "" + '' + + lib.optionalString (!buildDocs) '' + substituteInPlace CMakeLists.txt \ + --replace "add_subdirectory(doc)" "" + '' + + lib.optionalString (!buildTests) '' + substituteInPlace CMakeLists.txt \ + --replace "add_subdirectory(test)" "" + ''; + + # Unfortunately, it seems like we have to call make on this manually + preInstall = lib.optionalString buildDocs '' + export HOME=$(mktemp -d) + make -j$NIX_BUILD_CORES doc + cd ../doc/pdf + make -j$NIX_BUILD_CORES + cd - + ''; + + postInstall = + lib.optionalString buildDocs '' + mv ../doc/html $out/share/doc/migraphx + mv ../doc/pdf/MIGraphX.pdf $out/share/doc/migraphx + '' + + lib.optionalString buildTests '' + mkdir -p $test/bin + mv bin/test_* $test/bin + patchelf $test/bin/test_* --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE" + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "AMD's graph optimization engine"; + homepage = "https://github.com/ROCm/AMDMIGraphX"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/miopen/default.nix b/pkgs/rocm-modules/miopen/default.nix new file mode 100644 index 0000000..9d15ea0 --- /dev/null +++ b/pkgs/rocm-modules/miopen/default.nix @@ -0,0 +1,324 @@ +{ + lib, + stdenv, + fetchFromGitHub, + fetchpatch, + rocmUpdateScript, + runCommand, + pkg-config, + cmake, + rocm-cmake, + rocblas, + rocmlir, + rocrand, + rocm-runtime, + rocm-merged-llvm, + hipblas-common, + hipblas, + hipblaslt, + clr, + composable_kernel, + frugally-deep, + rocm-docs-core, + half, + boost, + sqlite, + bzip2, + lbzip2, + nlohmann_json, + texliveSmall, + doxygen, + sphinx, + zlib, + gtest, + rocm-comgr, + roctracer, + python3Packages, + # FIXME: should be able to use all clr targets + gpuTargets ? [ + "gfx900" + "gfx906" + "gfx908" + "gfx90a" + "gfx942" + "gfx1030" + "gfx1100" + "gfx1101" + "gfx1102" + "gfx1201" + ], # clr.gpuTargets + buildDocs ? false, # Needs internet because of rocm-docs-core + buildTests ? false, + withComposableKernel ? composable_kernel.anyGfx9Target, +}: + +let + # FIXME: cmake files need patched to include this properly + cFlags = "-O3 -DNDEBUG -Wno-documentation-pedantic --offload-compress -I${hipblas-common}/include -I${hipblas}/include -I${roctracer}/include -I${nlohmann_json}/include -I${sqlite.dev}/include -I${rocrand}/include"; + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "MIOpen"; + rev = "rocm-${version}"; + hash = "sha256-DEcVj2vOwIYYyNKEKFqZ0fb9o+/QRpwiSksxwnmgEMc="; + fetchLFS = true; + fetchSubmodules = true; + # WORKAROUND: .lfsconfig is incorrectly set to exclude everything upstream + leaveDotGit = true; + postFetch = '' + export HOME=$(mktemp -d) + cd $out + set -x + git remote add origin $url + git fetch origin +refs/tags/rocm-${version}:refs/tags/rocm-${version} + git clean -fdx + git switch -c rocm-${version} refs/tags/rocm-${version} + git config lfs.fetchexclude "none" + rm .lfsconfig + git lfs install + git lfs track "*.kdb.bz2" + GIT_TRACE=1 git lfs fetch --include="src/kernels/**" + GIT_TRACE=1 git lfs pull --include="src/kernels/**" + git lfs checkout + + rm -rf .git + ''; + }; + + latex = lib.optionalAttrs buildDocs ( + texliveSmall.withPackages ( + ps: with ps; [ + latexmk + tex-gyre + fncychap + wrapfig + capt-of + framed + needspace + tabulary + varwidth + titlesec + ] + ) + ); + + gfx900 = runCommand "miopen-gfx900.kdb" { preferLocalBuild = true; } '' + ${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx900.kdb.bz2 > $out + ''; + + gfx906 = runCommand "miopen-gfx906.kdb" { preferLocalBuild = true; } '' + ${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx906.kdb.bz2 > $out + ''; + + gfx908 = runCommand "miopen-gfx908.kdb" { preferLocalBuild = true; } '' + ${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx908.kdb.bz2 > $out + ''; + + gfx90a = runCommand "miopen-gfx90a.kdb" { preferLocalBuild = true; } '' + ${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx90a.kdb.bz2 > $out + ''; + + gfx1030 = runCommand "miopen-gfx1030.kdb" { preferLocalBuild = true; } '' + ${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx1030.kdb.bz2 > $out + ''; + gfx1201 = runCommand "miopen-gfx1201.kdb" { preferLocalBuild = true; } '' + ${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx1201.kdb.bz2 > $out + ''; +in +stdenv.mkDerivation (finalAttrs: { + inherit version src; + pname = "miopen"; + + env.CFLAGS = cFlags; + env.CXXFLAGS = cFlags; + + # Find zstd and add to target. Mainly for torch. + patches = [ + ./skip-preexisting-dbs.patch + ./fix-isnan.patch # https://github.com/ROCm/MIOpen/pull/3448 + (fetchpatch { + url = "https://github.com/ROCm/MIOpen/commit/e608b4325646afeabb5e52846997b926d2019d19.patch"; + hash = "sha256-oxa3qlIC2bzbwGxrQOZXoY/S7CpLsMrnWRB7Og0tk0M="; + }) + (fetchpatch { + url = "https://github.com/ROCm/MIOpen/commit/3413d2daaeb44b7d6eadcc03033a5954a118491e.patch"; + hash = "sha256-ST4snUcTmmSI1Ogx815KEX9GdMnmubsavDzXCGJkiKs="; + }) + # FIXME: We need to rebase or drop this arch compat patch + # https://github.com/ROCm/MIOpen/issues/3540 suggests that + # arch compat patching doesn't work correctly for gfx1031 + # (fetchpatch { + # name = "Extend-MIOpen-ISA-compatibility.patch"; + # url = "https://github.com/GZGavinZhao/MIOpen/commit/416088b534618bd669a765afce59cfc7197064c1.patch"; + # hash = "sha256-OwONCA68y8s2GqtQj+OtotXwUXQ5jM8tpeM92iaD4MU="; + # }) + ]; + + outputs = + [ + "out" + ] + ++ lib.optionals buildDocs [ + "doc" + ] + ++ lib.optionals buildTests [ + "test" + ]; + enableParallelBuilding = true; + env.ROCM_PATH = clr; + env.LD_LIBRARY_PATH = lib.makeLibraryPath [ rocm-runtime ]; + env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin"; + + nativeBuildInputs = [ + pkg-config + cmake + rocm-cmake + clr + ]; + + buildInputs = + [ + hipblas + hipblas-common + rocblas + rocmlir + half + boost + sqlite + bzip2 + nlohmann_json + frugally-deep + roctracer + rocrand + hipblaslt + ] + ++ lib.optionals withComposableKernel [ + composable_kernel + ] + ++ lib.optionals buildDocs [ + latex + doxygen + sphinx + rocm-docs-core + python3Packages.sphinx-rtd-theme + python3Packages.breathe + python3Packages.myst-parser + ] + ++ lib.optionals buildTests [ + gtest + zlib + ]; + + cmakeFlags = + [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + "-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}" + "-DMIOPEN_USE_SQLITE_PERFDB=ON" + "-DCMAKE_VERBOSE_MAKEFILE=ON" + "-DCMAKE_MODULE_PATH=${clr}/hip/cmake" + "-DCMAKE_BUILD_TYPE=Release" + + # needs to stream to stdout so bzcat rather than bunzip2 + "-DUNZIPPER=${bzip2}/bin/bzcat" + + "-DCMAKE_C_COMPILER=amdclang" + "-DCMAKE_CXX_COMPILER=amdclang++" + "-DROCM_PATH=${clr}" + "-DHIP_ROOT_DIR=${clr}" + (lib.cmakeBool "MIOPEN_USE_ROCBLAS" true) + (lib.cmakeBool "MIOPEN_USE_HIPBLASLT" true) + (lib.cmakeBool "MIOPEN_USE_COMPOSABLEKERNEL" withComposableKernel) + (lib.cmakeBool "MIOPEN_USE_HIPRTC" true) + (lib.cmakeBool "MIOPEN_USE_COMGR" true) + "-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DMIOPEN_BACKEND=HIP" + ] + ++ lib.optionals buildTests [ + "-DBUILD_TESTS=ON" + "-DMIOPEN_TEST_ALL=ON" + ]; + + postPatch = '' + substituteInPlace cmake/ClangTidy.cmake \ + --replace-fail 'macro(enable_clang_tidy)' 'macro(enable_clang_tidy) + endmacro() + macro(enable_clang_tidy_unused)' \ + --replace-fail 'function(clang_tidy_check TARGET)' 'function(clang_tidy_check TARGET) + return()' + + patchShebangs test src/composable_kernel fin utils install_deps.cmake + + ln -sf ${gfx900} src/kernels/gfx900.kdb + ln -sf ${gfx906} src/kernels/gfx906.kdb + ln -sf ${gfx908} src/kernels/gfx908.kdb + ln -sf ${gfx90a} src/kernels/gfx90a.kdb + ln -sf ${gfx1030} src/kernels/gfx1030.kdb + ln -sf ${gfx1201} src/kernels/gfx1201.kdb + mkdir -p build/share/miopen/db/ + ln -sf ${gfx900} build/share/miopen/db/gfx900.kdb + ln -sf ${gfx906} build/share/miopen/db/gfx906.kdb + ln -sf ${gfx908} build/share/miopen/db/gfx908.kdb + ln -sf ${gfx90a} build/share/miopen/db/gfx90a.kdb + ln -sf ${gfx1030} build/share/miopen/db/gfx1030.kdb + ln -sf ${gfx1201} build/share/miopen/db/gfx1201.kdb + ''; + + # Unfortunately, it seems like we have to call make on these manually + postBuild = + lib.optionalString buildDocs '' + python -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html + '' + + lib.optionalString buildTests '' + make -j$NIX_BUILD_CORES check + ''; + + postInstall = + '' + rm $out/bin/install_precompiled_kernels.sh + ln -sf ${gfx900} $out/share/miopen/db/gfx900.kdb + ln -sf ${gfx906} $out/share/miopen/db/gfx906.kdb + ln -sf ${gfx908} $out/share/miopen/db/gfx908.kdb + ln -sf ${gfx90a} $out/share/miopen/db/gfx90a.kdb + ln -sf ${gfx1030} $out/share/miopen/db/gfx1030.kdb + ln -sf ${gfx1201} $out/share/miopen/db/gfx1201.kdb + '' + + lib.optionalString buildDocs '' + mv ../doc/html $out/share/doc/miopen-hip + '' + + lib.optionalString buildTests '' + mkdir -p $test/bin + mv bin/test_* $test/bin + patchelf --set-rpath $out/lib:${ + lib.makeLibraryPath ( + finalAttrs.buildInputs + ++ [ + clr + rocm-comgr + ] + ) + } $test/bin/* + ''; + + requiredSystemFeatures = [ "big-parallel" ]; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "Machine intelligence library for ROCm"; + homepage = "https://github.com/ROCm/MIOpen"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/miopen/fix-isnan.patch b/pkgs/rocm-modules/miopen/fix-isnan.patch new file mode 100644 index 0000000..71c140d --- /dev/null +++ b/pkgs/rocm-modules/miopen/fix-isnan.patch @@ -0,0 +1,31 @@ +From 17f67e0aa31cd2f1c1cb012d3858abf6956acc72 Mon Sep 17 00:00:00 2001 +From: "Sv. Lockal" +Date: Tue, 24 Dec 2024 14:43:10 +0000 +Subject: [PATCH] Fix missing isnan definition on libstdc++ >=14 systems + +Closes #3441 +--- + driver/reducecalculation_driver.hpp | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/driver/reducecalculation_driver.hpp b/driver/reducecalculation_driver.hpp +index 8226b3c953..2001969509 100644 +--- a/driver/reducecalculation_driver.hpp ++++ b/driver/reducecalculation_driver.hpp +@@ -33,6 +33,7 @@ + #include "random.hpp" + #include + #include ++#include + #include + #include + #include +@@ -77,7 +78,7 @@ int32_t mloReduceCalculationForwardRunHost(miopenTensorDescriptor_t inputDesc, + for(size_t i = 0; i < reduce_size; ++i) + { + Tcheck val = static_cast(input[input_idx]); +- if(nanPropagation && isnan(val)) ++ if(nanPropagation && std::isnan(val)) + { + val = 0.0f; + } diff --git a/pkgs/rocm-modules/miopen/skip-preexisting-dbs.patch b/pkgs/rocm-modules/miopen/skip-preexisting-dbs.patch new file mode 100644 index 0000000..89be2f4 --- /dev/null +++ b/pkgs/rocm-modules/miopen/skip-preexisting-dbs.patch @@ -0,0 +1,22 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index d0ffaf983..0b9ed0952 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -554,7 +554,7 @@ endif() + function(unpack_db db_bzip2_file) + get_filename_component(__fname ${db_bzip2_file} NAME_WLE) + add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname} +- COMMAND ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname}) ++ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname} || ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname}) + string(REPLACE "." "_" __tname ${__fname}) + add_custom_target(generate_${__tname} ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname}) + +@@ -563,7 +563,7 @@ function(unpack_db db_bzip2_file) + if(NOT MIOPEN_USE_SQLITE_PERFDB AND __extension STREQUAL ".db") + add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname}.txt + DEPENDS sqlite2txt generate_${__tname} +- COMMAND $ ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt ++ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname}.txt || $ ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt + ) + add_custom_target(generate_${__tname}_txt ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname}.txt) + add_dependencies(generate_kernels generate_${__tname}_txt) diff --git a/pkgs/rocm-modules/mivisionx/0001-set-__STDC_CONSTANT_MACROS-to-make-rocAL-compile.patch b/pkgs/rocm-modules/mivisionx/0001-set-__STDC_CONSTANT_MACROS-to-make-rocAL-compile.patch new file mode 100644 index 0000000..1894683 --- /dev/null +++ b/pkgs/rocm-modules/mivisionx/0001-set-__STDC_CONSTANT_MACROS-to-make-rocAL-compile.patch @@ -0,0 +1,25 @@ +From f0e66bd446d44df1d30faaad520613f5fb7f5916 Mon Sep 17 00:00:00 2001 +From: Martin Schwaighofer +Date: Sat, 30 Mar 2024 15:36:52 +0100 +Subject: [PATCH] set __STDC_CONSTANT_MACROS to make rocAL compile + +--- + CMakeLists.txt | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 42b139b6..509915f1 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -149,6 +149,8 @@ message("-- ${Cyan} -D MIGRAPHX=${MIGRAPHX} [Turn ON/OFF MIGraphX Module (de + message("-- ${Cyan} -D BACKEND=${BACKEND} [Select MIVisionX Backend [options:CPU/OPENCL/HIP](default:HIP)]${ColourReset}") + message("-- ${Cyan} -D BUILD_WITH_AMD_ADVANCE=${BUILD_WITH_AMD_ADVANCE} [Turn ON/OFF Build for AMD advanced GPUs(default:OFF)]${ColourReset}") + ++add_definitions(-D__STDC_CONSTANT_MACROS) ++ + add_subdirectory(amd_openvx) + add_subdirectory(amd_openvx_extensions) + add_subdirectory(utilities) +-- +2.43.0 + diff --git a/pkgs/rocm-modules/mivisionx/default.nix b/pkgs/rocm-modules/mivisionx/default.nix new file mode 100644 index 0000000..ed54a30 --- /dev/null +++ b/pkgs/rocm-modules/mivisionx/default.nix @@ -0,0 +1,150 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + rocm-device-libs, + clr, + pkg-config, + rpp, + rocblas, + miopen, + migraphx, + openmp, + protobuf, + qtcreator, + opencv, + ffmpeg, + boost, + libjpeg_turbo, + half, + lmdb, + rapidjson, + rocm-docs-core, + python3Packages, + useOpenCL ? false, + useCPU ? false, + buildDocs ? false, # Needs internet + gpuTargets ? [ ], +}: + +stdenv.mkDerivation (finalAttrs: { + pname = + "mivisionx-" + + ( + if (!useOpenCL && !useCPU) then + "hip" + else if (!useOpenCL && !useCPU) then + "opencl" + else + "cpu" + ); + + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "MIVisionX"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-07MivgCYmKLnhGDjOYsFBfwIxEoQLYNoRbOo3MPpVzE="; + }; + + patches = [ + ./0001-set-__STDC_CONSTANT_MACROS-to-make-rocAL-compile.patch + ]; + + nativeBuildInputs = + [ + cmake + rocm-cmake + clr + pkg-config + ] + ++ lib.optionals buildDocs [ + rocm-docs-core + python3Packages.python + ]; + + buildInputs = [ + miopen + migraphx + rpp + rocblas + openmp + half + protobuf + qtcreator + opencv + ffmpeg + boost + libjpeg_turbo + lmdb + rapidjson + python3Packages.pybind11 + python3Packages.numpy + python3Packages.torchWithRocm + ]; + + cmakeFlags = + [ + "-DROCM_PATH=${clr}" + "-DAMDRPP_PATH=${rpp}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DCMAKE_INSTALL_PREFIX_PYTHON=lib" + "-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_omp_LIBRARY=${openmp}/lib" + # "-DAMD_FP16_SUPPORT=ON" `error: typedef redefinition with different types ('__half' vs 'half_float::half')` + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals (!useOpenCL && !useCPU) [ + "-DBACKEND=HIP" + ] + ++ lib.optionals (useOpenCL && !useCPU) [ + "-DBACKEND=OCL" + ] + ++ lib.optionals useCPU [ + "-DBACKEND=CPU" + ]; + + postPatch = '' + # We need to not use hipcc and define the CXXFLAGS manually due to `undefined hidden symbol: tensorflow:: ...` + export CXXFLAGS+=" --rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode" + # Properly find miopen, fix ffmpeg version detection + substituteInPlace amd_openvx_extensions/CMakeLists.txt \ + --replace-fail "miopen PATHS \''${ROCM_PATH} QUIET" "miopen PATHS ${miopen} QUIET" \ + --replace-fail "\''${ROCM_PATH}/include/miopen/config.h" "${miopen}/include/miopen/config.h" + + # Properly find turbojpeg + substituteInPlace cmake/FindTurboJpeg.cmake \ + --replace-fail "\''${TURBO_JPEG_PATH}/include" "${libjpeg_turbo.dev}/include" \ + --replace-fail "\''${TURBO_JPEG_PATH}/lib" "${libjpeg_turbo.out}/lib" + ''; + + postBuild = lib.optionalString buildDocs '' + python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "Set of comprehensive computer vision and machine intelligence libraries, utilities, and applications"; + homepage = "https://github.com/ROCm/MIVisionX"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + broken = useOpenCL; + }; +}) diff --git a/pkgs/rocm-modules/mscclpp/default.nix b/pkgs/rocm-modules/mscclpp/default.nix new file mode 100644 index 0000000..b7fa373 --- /dev/null +++ b/pkgs/rocm-modules/mscclpp/default.nix @@ -0,0 +1,42 @@ +{ + fetchFromGitHub, + stdenv, + cmake, + clr, + numactl, + nlohmann_json, +}: +stdenv.mkDerivation { + pname = "mscclpp"; + version = "unstable-2024-12-13"; + src = fetchFromGitHub { + owner = "microsoft"; + repo = "mscclpp"; + rev = "ee75caf365a27b9ab7521cfdda220b55429e5c37"; + hash = "sha256-/mi9T9T6OIVtJWN3YoEe9az/86rz7BrX537lqaEh3ig="; + }; + nativeBuildInputs = [ + cmake + ]; + buildInputs = [ + clr + numactl + ]; + postPatch = '' + substituteInPlace CMakeLists.txt \ + --replace-fail "gfx90a gfx941 gfx942" "gfx908 gfx90a gfx942 gfx1030 gfx1100" + ''; + cmakeFlags = [ + "-DMSCCLPP_BYPASS_GPU_CHECK=ON" + "-DMSCCLPP_USE_ROCM=ON" + "-DMSCCLPP_BUILD_TESTS=OFF" + "-DGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100" + "-DAMDGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100" + "-DMSCCLPP_BUILD_APPS_NCCL=ON" + "-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF" + "-DFETCHCONTENT_QUIET=OFF" + "-DFETCHCONTENT_TRY_FIND_PACKAGE_MODE=ALWAYS" + "-DFETCHCONTENT_SOURCE_DIR_JSON=${nlohmann_json.src}" + ]; + env.ROCM_PATH = clr; +} diff --git a/pkgs/rocm-modules/rccl/default.nix b/pkgs/rocm-modules/rccl/default.nix new file mode 100644 index 0000000..02f99dd --- /dev/null +++ b/pkgs/rocm-modules/rccl/default.nix @@ -0,0 +1,144 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + rocm-smi, + rocm-core, + clr, + mscclpp, + perl, + hipify, + gtest, + chrpath, + rocprofiler, + rocprofiler-register, + autoPatchelfHook, + buildTests ? false, + gpuTargets ? (clr.localGpuTargets or [ ]), +}: + +let + useAsan = buildTests; + useUbsan = buildTests; + san = lib.optionalString (useAsan || useUbsan) ( + "-fno-gpu-sanitize -fsanitize=undefined " + + (lib.optionalString useAsan "-fsanitize=address -shared-libsan ") + ); +in +# Note: we can't properly test or make use of multi-node collective ops +# https://github.com/NixOS/nixpkgs/issues/366242 tracks kernel support +# kfd_peerdirect support which is on out-of-tree amdkfd in ROCm/ROCK-Kernel-Driver +# infiniband ib_peer_mem support isn't in the mainline kernel but is carried by some distros +stdenv.mkDerivation (finalAttrs: { + pname = "rccl${clr.gpuArchSuffix}"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ]; + + patches = [ + ./fix-mainline-support-and-ub.diff + ./enable-mscclpp-on-all-gfx9.diff + ./rccl-test-missing-iomanip.diff + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rccl"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-6lQBpoJKszgvt+UpNEKdiw74s3ZhC4zpA4HP+F6u7X4="; + }; + + nativeBuildInputs = [ + cmake + rocm-cmake + clr + perl + hipify + autoPatchelfHook # ASAN doesn't add rpath without this + ]; + + buildInputs = + [ + rocm-smi + gtest + rocprofiler + rocprofiler-register + mscclpp + ] + ++ lib.optionals buildTests [ + chrpath + ]; + + cmakeFlags = + [ + "-DHIP_CLANG_NUM_PARALLEL_JOBS=4" + "-DCMAKE_BUILD_TYPE=Release" + "-DROCM_PATH=${clr}" + "-DHIP_COMPILER=${clr}/bin/amdclang++" + "-DCMAKE_CXX_COMPILER=${clr}/bin/amdclang++" + "-DROCM_PATCH_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}" + "-DROCM_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}" + "-DBUILD_BFD=OFF" # Can't get it to detect bfd.h + "-DENABLE_MSCCL_KERNEL=ON" + "-DENABLE_MSCCLPP=ON" + "-DMSCCLPP_ROOT=${mscclpp}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + # AMD can't make up their minds and keep changing which one is used in different projects. + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_TESTS=ON" + ]; + + # -O2 and -fno-strict-aliasing due to UB issues in RCCL :c + # Reported upstream + env.CFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer"; + env.CXXFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer"; + env.LDFLAGS = "${san}"; + postPatch = '' + patchShebangs src tools + ''; + + postInstall = + lib.optionalString useAsan '' + patchelf --add-needed ${clr}/llvm/lib/linux/libclang_rt.asan-${stdenv.hostPlatform.parsed.cpu.name}.so $out/lib/librccl.so + '' + + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/* $test/bin + rmdir $out/bin + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "ROCm communication collectives library"; + homepage = "https://github.com/ROCm/rccl"; + license = with licenses; [ + bsd2 + bsd3 + ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rccl/enable-mscclpp-on-all-gfx9.diff b/pkgs/rocm-modules/rccl/enable-mscclpp-on-all-gfx9.diff new file mode 100644 index 0000000..6fc3759 --- /dev/null +++ b/pkgs/rocm-modules/rccl/enable-mscclpp-on-all-gfx9.diff @@ -0,0 +1,13 @@ +diff --git a/src/init.cc b/src/init.cc +index 738f756..1b0e4fc 100644 +--- a/src/init.cc ++++ b/src/init.cc +@@ -2049,7 +2049,7 @@ static ncclResult_t ncclCommInitRankFunc(struct ncclAsyncJob* job_) { + if (mscclEnabled() && (comm->topo->mscclEnabled || mscclForceEnabled()) && mscclppCommCompatible(comm)) { + hipDeviceProp_t devProp; + CUDACHECK(hipGetDeviceProperties(&devProp, cudaDev)); +- comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx94"); ++ comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx9"); + if (comm->mscclppCompatible) { + bool mapContainsId = (mscclpp_uniqueIdMap.count(job->commId) > 0); + auto& mscclppUniqueId = mscclpp_uniqueIdMap[job->commId]; diff --git a/pkgs/rocm-modules/rccl/fix-mainline-support-and-ub.diff b/pkgs/rocm-modules/rccl/fix-mainline-support-and-ub.diff new file mode 100644 index 0000000..15efac8 --- /dev/null +++ b/pkgs/rocm-modules/rccl/fix-mainline-support-and-ub.diff @@ -0,0 +1,178 @@ +diff --git a/src/include/bootstrap.h b/src/include/bootstrap.h +index 8c5f081..9922b79 100644 +--- a/src/include/bootstrap.h ++++ b/src/include/bootstrap.h +@@ -10,11 +10,13 @@ + #include "nccl.h" + #include "comm.h" + ++// this is accessed through unaligned ptrs because ncclUniqueId is a typedef of char[128] + struct ncclBootstrapHandle { + uint64_t magic; + union ncclSocketAddress addr; + }; + static_assert(sizeof(struct ncclBootstrapHandle) <= sizeof(ncclUniqueId), "Bootstrap handle is too large to fit inside NCCL unique ID"); ++static_assert(alignof(struct ncclBootstrapHandle) == alignof(ncclUniqueId), "Bootstrap handle must have same alignment as NCCL unique ID to avoid UB"); + + ncclResult_t bootstrapNetInit(); + ncclResult_t bootstrapCreateRoot(struct ncclBootstrapHandle* handle, bool idFromEnv); +diff --git a/src/misc/rocmwrap.cc b/src/misc/rocmwrap.cc +index b3063d5..464b80d 100644 +--- a/src/misc/rocmwrap.cc ++++ b/src/misc/rocmwrap.cc +@@ -131,9 +131,12 @@ static void initOnceFunc() { + //format and store the kernel conf file location + snprintf(kernel_conf_file, sizeof(kernel_conf_file), "/boot/config-%s", utsname.release); + fp = fopen(kernel_conf_file, "r"); +- if (fp == NULL) INFO(NCCL_INIT,"Could not open kernel conf file"); ++ if (fp == NULL) { ++ INFO(NCCL_INIT,"Could not open kernel conf file, will assume CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA are enabled"); ++ } + //look for kernel_opt1 and kernel_opt2 in the conf file and check +- while (fgets(buf, sizeof(buf), fp) != NULL) { ++ // FIXME: This check is broken, CONFIG_DMABUF_MOVE_NOTIFY could be across a buf boundary. ++ while (fp && fgets(buf, sizeof(buf), fp) != NULL) { + if (strstr(buf, kernel_opt1) != NULL) { + found_opt1 = 1; + INFO(NCCL_INIT,"CONFIG_DMABUF_MOVE_NOTIFY=y in /boot/config-%s", utsname.release); +@@ -143,11 +146,12 @@ static void initOnceFunc() { + INFO(NCCL_INIT,"CONFIG_PCI_P2PDMA=y in /boot/config-%s", utsname.release); + } + } +- if (!found_opt1 || !found_opt2) { ++ if (fp && (!found_opt1 || !found_opt2)) { + dmaBufSupport = 0; + INFO(NCCL_INIT, "CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA should be set for DMA_BUF in /boot/config-%s", utsname.release); + INFO(NCCL_INIT, "DMA_BUF_SUPPORT Failed due to OS kernel support"); + } ++ if (fp) fclose(fp); + + if(dmaBufSupport) INFO(NCCL_INIT, "DMA_BUF Support Enabled"); + else goto error; +diff --git a/src/nccl.h.in b/src/nccl.h.in +index 1d127b0..6296073 100644 +--- a/src/nccl.h.in ++++ b/src/nccl.h.in +@@ -39,7 +39,7 @@ typedef struct ncclComm* ncclComm_t; + #define NCCL_UNIQUE_ID_BYTES 128 + /*! @brief Opaque unique id used to initialize communicators + @details The ncclUniqueId must be passed to all participating ranks */ +-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId; ++typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId; + + /*! @defgroup rccl_result_code Result Codes + @details The various result codes that RCCL API calls may return +diff --git a/src/proxy.cc b/src/proxy.cc +index 50e5437..51bb401 100644 +--- a/src/proxy.cc ++++ b/src/proxy.cc +@@ -965,7 +965,11 @@ struct ncclProxyConnectionPool { + + static ncclResult_t ncclProxyNewConnection(struct ncclProxyConnectionPool* pool, int* id) { + if (pool->offset == NCCL_PROXY_CONN_POOL_SIZE) { +- NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1)); ++ if (pool->pools) { ++ NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1)); ++ } else { ++ NCCLCHECK(ncclCalloc(&pool->pools, pool->banks+1)); ++ } + NCCLCHECK(ncclCalloc(pool->pools+pool->banks, NCCL_PROXY_CONN_POOL_SIZE)); + pool->banks++; + pool->offset = 0; +diff --git a/src/transport/net_ib.cc b/src/transport/net_ib.cc +index 6d77784..49762d3 100644 +--- a/src/transport/net_ib.cc ++++ b/src/transport/net_ib.cc +@@ -573,7 +573,7 @@ ncclResult_t ncclIbGdrSupport() { + // Requires support from NIC driver modules + // Use ONLY for debugging! + moduleLoaded = 1; +- INFO(NCCL_INIT, "RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1"); ++ INFO(NCCL_INIT, "ncclIbGdrSupport: RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1"); + } + + if (moduleLoaded == -1) { +@@ -586,13 +586,14 @@ ncclResult_t ncclIbGdrSupport() { + // or created under a different path like `/sys/kernel/` or `/sys/` (depending on your ib_peer_mem module) + const char* memory_peers_paths[] = {"/sys/kernel/mm/memory_peers/amdkfd/version", + "/sys/kernel/memory_peers/amdkfd/version", +- "/sys/memory_peers/amdkfd/version"}; ++ "/sys/memory_peers/amdkfd/version", ++ NULL}; + int i = 0; + + while (memory_peers_paths[i]) { + if (access(memory_peers_paths[i], F_OK) == 0) { + moduleLoaded = 1; +- INFO(NCCL_INIT,"Found %s", memory_peers_paths[i]); ++ INFO(NCCL_INIT,"ncclIbGdrSupport: Found %s", memory_peers_paths[i]); + break; + } else { + moduleLoaded = 0; +@@ -612,22 +613,23 @@ ncclResult_t ncclIbGdrSupport() { + if (moduleLoaded == 0) { + // Check for `ib_register_peer_memory_client` symbol in `/proc/kallsyms` + // if your system uses native OS ib_peer module +- char buf[256]; +- FILE *fp = NULL; +- fp = fopen("/proc/kallsyms", "r"); ++ FILE *fp = fopen("/proc/kallsyms", "r"); ++ char *line = NULL; ++ size_t len = 0; + + if (fp == NULL) { +- INFO(NCCL_INIT,"Could not open /proc/kallsyms"); ++ INFO(NCCL_INIT,"ncclIbGdrSupport: Could not open /proc/kallsyms to check for ib_register_peer_memory_client"); + } else { +- while (fgets(buf, sizeof(buf), fp) != NULL) { +- if (strstr(buf, "t ib_register_peer_memory_client") != NULL || +- strstr(buf, "T ib_register_peer_memory_client") != NULL) { ++ while (getline(&line, &len, fp) > 0) { ++ if (line && strstr(line, "ib_register_peer_memory_client") != NULL) { + moduleLoaded = 1; +- INFO(NCCL_INIT,"Found ib_register_peer_memory_client in /proc/kallsyms"); ++ INFO(NCCL_INIT,"ncclIbGdrSupport: Found ib_register_peer_memory_client in /proc/kallsyms"); + break; + } + } + } ++ if (line) free(line); ++ if (fp) fclose(fp); + } + #else + // Check for the nv_peer_mem module being loaded +@@ -637,7 +639,7 @@ ncclResult_t ncclIbGdrSupport() { + #endif + } + if (moduleLoaded == 0) { +- INFO(NCCL_INIT,"GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol"); ++ INFO(NCCL_INIT,"ncclIbGdrSupport: GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol"); + return ncclSystemError; + } + return ncclSuccess; +diff --git a/tools/ib-test/include/nccl.h b/tools/ib-test/include/nccl.h +index 2c86c33..5801c61 100755 +--- a/tools/ib-test/include/nccl.h ++++ b/tools/ib-test/include/nccl.h +@@ -31,7 +31,7 @@ extern "C" { + typedef struct ncclComm* ncclComm_t; + + #define NCCL_UNIQUE_ID_BYTES 128 +-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId; ++typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId; + + /* Error type */ + typedef enum { ncclSuccess = 0, +diff --git a/tools/topo_expl/include/nccl.h b/tools/topo_expl/include/nccl.h +index 729561b..4e4bdd9 100644 +--- a/tools/topo_expl/include/nccl.h ++++ b/tools/topo_expl/include/nccl.h +@@ -35,7 +35,7 @@ typedef struct ncclComm* ncclComm_t; + #define NCCL_COMM_NULL NULL + + #define NCCL_UNIQUE_ID_BYTES 128 +-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId; ++typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId; + + /*! @brief Error type */ + typedef enum { ncclSuccess = 0, diff --git a/pkgs/rocm-modules/rccl/rccl-test-missing-iomanip.diff b/pkgs/rocm-modules/rccl/rccl-test-missing-iomanip.diff new file mode 100644 index 0000000..3cc54a8 --- /dev/null +++ b/pkgs/rocm-modules/rccl/rccl-test-missing-iomanip.diff @@ -0,0 +1,10 @@ +--- a/test/common/TestBed.cpp ++++ b/test/common/TestBed.cpp +@@ -4,6 +4,7 @@ + * See LICENSE.txt for license information + ************************************************************************/ + #include ++#include + #include "TestBed.hpp" + #include + diff --git a/pkgs/rocm-modules/rdc/default.nix b/pkgs/rocm-modules/rdc/default.nix new file mode 100644 index 0000000..5af566e --- /dev/null +++ b/pkgs/rocm-modules/rdc/default.nix @@ -0,0 +1,146 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + amdsmi, + rocm-smi, + rocm-runtime, + libcap, + libdrm, + grpc, + protobuf, + openssl, + doxygen, + graphviz, + texliveSmall, + gtest, + buildDocs ? true, + buildTests ? false, +}: + +let + latex = lib.optionalAttrs buildDocs ( + texliveSmall.withPackages ( + ps: with ps; [ + changepage + latexmk + varwidth + multirow + hanging + adjustbox + collectbox + stackengine + enumitem + alphalph + wasysym + sectsty + tocloft + newunicodechar + etoc + helvetic + wasy + courier + ] + ) + ); +in +stdenv.mkDerivation (finalAttrs: { + pname = "rdc"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildDocs [ + "doc" + ] + ++ lib.optionals buildTests [ + "test" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rdc"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-HkGumwag7mDERHiWwZ7cRQz0tzH+vIovY1HgX2g69d4="; + }; + + nativeBuildInputs = + [ + cmake + protobuf + ] + ++ lib.optionals buildDocs [ + doxygen + graphviz + latex + ]; + + buildInputs = + [ + amdsmi + rocm-smi + rocm-runtime + libcap + libdrm + grpc + openssl + ] + ++ lib.optionals buildTests [ + gtest + ]; + + CXXFLAGS = "-I${libcap.dev}/include"; + + cmakeFlags = + [ + "-DCMAKE_VERBOSE_MAKEFILE=OFF" + "-DRDC_INSTALL_PREFIX=${placeholder "out"}" + "-DBUILD_ROCRTEST=ON" + "-DRSMI_INC_DIR=${rocm-smi}/include" + "-DRSMI_LIB_DIR=${rocm-smi}/lib" + "-DGRPC_ROOT=${grpc}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DCMAKE_INSTALL_LIBEXECDIR=libexec" + "-DCMAKE_INSTALL_DOCDIR=doc" + ] + ++ lib.optionals buildTests [ + "-DBUILD_TESTS=ON" + ]; + + postPatch = '' + substituteInPlace CMakeLists.txt \ + --replace "file(STRINGS /etc/os-release LINUX_DISTRO LIMIT_COUNT 1 REGEX \"NAME=\")" "set(LINUX_DISTRO \"NixOS\")" + ''; + + postInstall = + '' + find $out/bin -executable -type f -exec \ + patchelf {} --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE" \; + '' + + lib.optionalString buildTests '' + mkdir -p $test + mv $out/bin/rdctst_tests $test/bin + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "Simplifies administration and addresses infrastructure challenges in cluster and datacenter environments"; + homepage = "https://github.com/ROCm/rdc"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocalution/default.nix b/pkgs/rocm-modules/rocalution/default.nix new file mode 100644 index 0000000..1908393 --- /dev/null +++ b/pkgs/rocm-modules/rocalution/default.nix @@ -0,0 +1,135 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + rocblas, + rocsparse, + rocprim, + rocrand, + clr, + git, + pkg-config, + openmp, + openmpi, + gtest, + buildTests ? false, + buildBenchmarks ? false, + buildSamples ? false, + gpuTargets ? [ ], # gpuTargets = [ "gfx803" "gfx900:xnack-" "gfx906:xnack-" ... ] +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "rocalution"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ] + ++ lib.optionals buildSamples [ + "sample" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocALUTION"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-bZx1Cc2jcIfysohKCKzj5mowM3IeCelRhVaBU73KnTo="; + }; + + nativeBuildInputs = [ + cmake + rocm-cmake + clr + git + pkg-config + ]; + + buildInputs = + [ + rocblas + rocsparse + rocprim + rocrand + openmp + openmpi + ] + ++ lib.optionals buildTests [ + gtest + ]; + + CXXFLAGS = "-I${openmp.dev}/include"; + cmakeFlags = + [ + "-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_omp_LIBRARY=${openmp}/lib" + "-DROCM_PATH=${clr}" + "-DHIP_ROOT_DIR=${clr}" + "-DSUPPORT_HIP=ON" + "-DSUPPORT_OMP=ON" + "-DSUPPORT_MPI=ON" + "-DBUILD_CLIENTS_SAMPLES=${if buildSamples then "ON" else "OFF"}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}" + "-DGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_CLIENTS_TESTS=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_CLIENTS_BENCHMARKS=ON" + ]; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/rocalution-test $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/rocalution-bench $benchmark/bin + '' + + lib.optionalString buildSamples '' + mkdir -p $sample/bin + mv clients/staging/* $sample/bin + rm $sample/bin/rocalution-test || true + rm $sample/bin/rocalution-bench || true + + patchelf --set-rpath \ + $out/lib:${lib.makeLibraryPath (finalAttrs.buildInputs ++ [ clr ])} \ + $sample/bin/* + '' + + lib.optionalString (buildTests || buildBenchmarks) '' + rmdir $out/bin + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "Iterative sparse solvers for ROCm"; + homepage = "https://github.com/ROCm/rocALUTION"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocblas/default.nix b/pkgs/rocm-modules/rocblas/default.nix new file mode 100644 index 0000000..a9ceeb9 --- /dev/null +++ b/pkgs/rocm-modules/rocblas/default.nix @@ -0,0 +1,200 @@ +{ + lib, + stdenv, + fetchFromGitHub, + fetchpatch, + rocmUpdateScript, + cmake, + rocm-cmake, + clr, + python3, + tensile, + msgpack, + libxml2, + gtest, + gfortran, + openmp, + git, + amd-blis, + zstd, + hipblas-common, + hipblaslt, + python3Packages, + rocm-smi, + libdrm, + roctracer, + buildTensile ? true, + buildTests ? true, + buildBenchmarks ? true, + # https://github.com/ROCm/Tensile/issues/1757 + # Allows gfx101* users to use rocBLAS normally. + # Turn the below two values to `true` after the fix has been cherry-picked + # into a release. Just backporting that single fix is not enough because it + # depends on some previous commits. + tensileSepArch ? true, + tensileLazyLib ? true, + withHipBlasLt ? true, + # `gfx940`, `gfx941` are not present in this list because they are early + # engineering samples, and all final MI300 hardware are `gfx942`: + # https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130 + # + # `gfx1012` is not present in this list because the ISA compatibility patches + # would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will + # always try to use `gfx1010` code objects, hence building for `gfx1012` is + # useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152 + gpuTargets ? ( + clr.localGpuTargets or [ + "gfx1010" + # "gfx1012" + # "gfx1030" + # "gfx1100" + # "gfx1101" + # "gfx1102" + "gfx1201" + ] + ), +}: + +let + gpuTargets' = lib.concatStringsSep ";" gpuTargets; +in +stdenv.mkDerivation (finalAttrs: { + pname = "rocblas${clr.gpuArchSuffix}"; + version = "6.4.1"; + + outputs = [ + "out" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocBLAS"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-To5V5bydYR5iuUxkwpx79RrNdncvWmR/v/w9VnlKBq4="; + }; + + nativeBuildInputs = + [ + cmake + # no ninja, it buffers console output and nix times out long periods of no output + rocm-cmake + clr + git + ] + ++ lib.optionals buildTensile [ + tensile + ]; + + buildInputs = + [ + python3 + hipblas-common + ] + ++ lib.optionals withHipBlasLt [ + hipblaslt + ] + ++ lib.optionals buildTensile [ + zstd + msgpack + libxml2 + python3Packages.msgpack + python3Packages.zstandard + ] + ++ lib.optionals buildTests [ + gtest + ] + ++ lib.optionals (buildTests || buildBenchmarks) [ + gfortran + openmp + amd-blis + rocm-smi + ] + ++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [ + python3Packages.pyyaml + ]; + + dontStrip = true; + env.CXXFLAGS = + "-O3 -DNDEBUG -I${hipblas-common}/include -I${roctracer}/include -I${libdrm.dev}/include" + + + lib.optionalString (buildTests || buildBenchmarks) " -I${amd-blis}/include/blis"; + # Fails to link tests if we don't add amd-blis libs + env.LDFLAGS = + "-L${roctracer}/lib -L${libdrm.dev}/lib" + + lib.optionalString ( + buildTests || buildBenchmarks + ) " -Wl,--as-needed -L${amd-blis}/lib -lblis-mt -lcblas"; + env.TENSILE_ROCM_ASSEMBLER_PATH = "${stdenv.cc}/bin/clang++"; + + cmakeFlags = + [ + (lib.cmakeFeature "CMAKE_BUILD_TYPE" "Release") + (lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true) + (lib.cmakeFeature "CMAKE_EXECUTE_PROCESS_COMMAND_ECHO" "STDERR") + (lib.cmakeFeature "CMAKE_Fortran_COMPILER" "${lib.getBin gfortran}/bin/gfortran") + (lib.cmakeFeature "CMAKE_Fortran_COMPILER_AR" "${lib.getBin gfortran}/bin/ar") + (lib.cmakeFeature "CMAKE_Fortran_COMPILER_RANLIB" "${lib.getBin gfortran}/bin/ranlib") + (lib.cmakeFeature "python" "python3") + (lib.cmakeFeature "SUPPORTED_TARGETS" gpuTargets') + (lib.cmakeFeature "AMDGPU_TARGETS" gpuTargets') + (lib.cmakeFeature "GPU_TARGETS" gpuTargets') + (lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile) + (lib.cmakeBool "ROCM_SYMLINK_LIBS" false) + (lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas") + (lib.cmakeBool "BUILD_WITH_HIPBLASLT" withHipBlasLt) + (lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests) + (lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks) + (lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildBenchmarks) + (lib.cmakeBool "BUILD_OFFLOAD_COMPRESS" true) + # Temporarily set variables to work around upstream CMakeLists issue + # Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DCMAKE_INSTALL_LIBDIR=lib" + ] + ++ lib.optionals buildTensile [ + "-DCPACK_SET_DESTDIR=OFF" + "-DLINK_BLIS=ON" + "-DTensile_CODE_OBJECT_VERSION=default" + "-DTensile_LOGIC=asm_full" + "-DTensile_LIBRARY_FORMAT=msgpack" + (lib.cmakeBool "BUILD_WITH_PIP" false) + (lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch) + (lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib) + ]; + + passthru.amdgpu_targets = gpuTargets'; + + patches = [ + # (fetchpatch { + # name = "Extend-rocBLAS-HIP-ISA-compatibility.patch"; + # url = "https://github.com/GZGavinZhao/rocBLAS/commit/89b75ff9cc731f71f370fad90517395e117b03bb.patch"; + # hash = "sha256-W/ohOOyNCcYYLOiQlPzsrTlNtCBdJpKVxO8s+4G7sjo="; + # }) + ]; + + # Pass $NIX_BUILD_CORES to Tensile + postPatch = '' + substituteInPlace cmake/build-options.cmake \ + --replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"' + # substituteInPlace CMakeLists.txt \ + # --replace-fail "4.42.0" "4.43.0" + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + enableParallelBuilding = true; + requiredSystemFeatures = [ "big-parallel" ]; + + meta = with lib; { + description = "BLAS implementation for ROCm platform"; + homepage = "https://github.com/ROCm/rocBLAS"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocdbgapi/default.nix b/pkgs/rocm-modules/rocdbgapi/default.nix new file mode 100644 index 0000000..0474569 --- /dev/null +++ b/pkgs/rocm-modules/rocdbgapi/default.nix @@ -0,0 +1,114 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + git, + rocm-comgr, + rocm-runtime, + hwdata, + texliveSmall, + doxygen, + graphviz, + buildDocs ? true, +}: + +let + latex = lib.optionalAttrs buildDocs ( + texliveSmall.withPackages ( + ps: with ps; [ + changepage + latexmk + varwidth + multirow + hanging + adjustbox + collectbox + stackengine + enumitem + alphalph + wasysym + sectsty + tocloft + newunicodechar + etoc + helvetic + wasy + courier + ] + ) + ); +in +stdenv.mkDerivation (finalAttrs: { + pname = "rocdbgapi"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildDocs [ + "doc" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "ROCdbgapi"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-Rr8+SNeFps0rjk4Jn2+rFmtRJfL42l0tNOz13oZQy+I="; + }; + + nativeBuildInputs = + [ + cmake + rocm-cmake + git + ] + ++ lib.optionals buildDocs [ + latex + doxygen + graphviz + ]; + + buildInputs = [ + rocm-comgr + rocm-runtime + hwdata + ]; + + cmakeFlags = [ + "-DPCI_IDS_PATH=${hwdata}/share/hwdata" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ]; + + # Unfortunately, it seems like we have to call make on this manually + postBuild = lib.optionalString buildDocs '' + export HOME=$(mktemp -d) + make -j$NIX_BUILD_CORES doc + ''; + + postInstall = lib.optionalString buildDocs '' + mv $out/share/html/amd-dbgapi $doc/share/doc/amd-dbgapi/html + rmdir $out/share/html + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "Debugger support for control of execution and inspection state"; + homepage = "https://github.com/ROCm/ROCdbgapi"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocfft/default.nix b/pkgs/rocm-modules/rocfft/default.nix new file mode 100644 index 0000000..86fbc80 --- /dev/null +++ b/pkgs/rocm-modules/rocfft/default.nix @@ -0,0 +1,174 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + clr, + python3, + rocm-cmake, + sqlite, + boost, + fftw, + fftwFloat, + gtest, + openmp, + rocrand, + gpuTargets ? clr.localGpuTargets or clr.gpuTargets, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "rocfft${clr.gpuArchSuffix}"; + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocFFT"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-z8O//2lihXeVnYZklR8uUIgCS9RyNNRb+hM5keR5aYA="; + }; + + nativeBuildInputs = [ + cmake + clr + python3 + rocm-cmake + ]; + + # FIXME: rocfft_aot_helper runs at the end of the build and has a risk of timing it out + # due to a long period with no terminal output + buildInputs = [ sqlite ]; + + cmakeFlags = + [ + "-DCMAKE_C_COMPILER=hipcc" + "-DCMAKE_CXX_COMPILER=hipcc" + "-DSQLITE_USE_SYSTEM_PACKAGE=ON" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ]; + + passthru = { + test = stdenv.mkDerivation { + pname = "${finalAttrs.pname}-test"; + inherit (finalAttrs) version src; + + sourceRoot = "${finalAttrs.src.name}/clients/tests"; + + nativeBuildInputs = [ + cmake + clr + rocm-cmake + ]; + + buildInputs = [ + boost + fftw + fftwFloat + finalAttrs.finalPackage + gtest + openmp + rocrand + ]; + + cmakeFlags = [ + "-DCMAKE_C_COMPILER=hipcc" + "-DCMAKE_CXX_COMPILER=hipcc" + ]; + + postInstall = '' + rm -r "$out/lib/fftw" + rmdir "$out/lib" + ''; + }; + + benchmark = stdenv.mkDerivation { + pname = "${finalAttrs.pname}-benchmark"; + inherit (finalAttrs) version src; + + sourceRoot = "${finalAttrs.src.name}/clients/rider"; + + nativeBuildInputs = [ + cmake + clr + rocm-cmake + ]; + + buildInputs = [ + boost + finalAttrs.finalPackage + openmp + (python3.withPackages ( + ps: with ps; [ + pandas + scipy + ] + )) + rocrand + ]; + + cmakeFlags = [ + "-DCMAKE_C_COMPILER=hipcc" + "-DCMAKE_CXX_COMPILER=hipcc" + ]; + + postInstall = '' + cp -a ../../../scripts/perf "$out/bin" + ''; + }; + + samples = stdenv.mkDerivation { + pname = "${finalAttrs.pname}-samples"; + inherit (finalAttrs) version src; + + sourceRoot = "${finalAttrs.src.name}/clients/samples"; + + nativeBuildInputs = [ + cmake + clr + rocm-cmake + ]; + + buildInputs = [ + boost + finalAttrs.finalPackage + openmp + rocrand + ]; + + cmakeFlags = [ + "-DCMAKE_C_COMPILER=hipcc" + "-DCMAKE_CXX_COMPILER=hipcc" + ]; + + installPhase = '' + runHook preInstall + mkdir "$out" + cp -a bin "$out" + runHook postInstall + ''; + }; + + updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + }; + + requiredSystemFeatures = [ "big-parallel" ]; + + meta = with lib; { + description = "FFT implementation for ROCm"; + homepage = "https://github.com/ROCm/rocFFT"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocgdb/default.nix b/pkgs/rocm-modules/rocgdb/default.nix new file mode 100644 index 0000000..47e7fa4 --- /dev/null +++ b/pkgs/rocm-modules/rocgdb/default.nix @@ -0,0 +1,117 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + pkg-config, + texinfo, + bison, + flex, + glibc, + zlib, + zstd, + gmp, + mpfr, + ncurses, + expat, + rocdbgapi, + perl, + python3, + babeltrace, + sourceHighlight, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "rocgdb"; + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "ROCgdb"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-evDWg2w2FHv6OU5BQOCAXTlDm7JpwdJ3Wh5a2i5r1gQ="; + }; + + nativeBuildInputs = [ + pkg-config + texinfo # For makeinfo + bison + flex + perl # used in mkinstalldirs script during installPhase + python3 + ]; + + buildInputs = [ + zlib + zstd + gmp + mpfr + ncurses + expat + rocdbgapi + python3 + babeltrace + sourceHighlight + ]; + + configureFlags = [ + # Ensure we build the amdgpu target + "--enable-targets=${stdenv.targetPlatform.config},amdgcn-amd-amdhsa" + "--with-amd-dbgapi=yes" + + "--with-iconv-path=${glibc.bin}" + "--enable-tui" + "--with-babeltrace=${babeltrace}" + "--with-python=python3" + "--with-system-zlib" + "--with-system-zstd" + "--enable-64-bit-bfd" + "--with-gmp=${gmp.dev}" + "--with-mpfr=${mpfr.dev}" + "--with-expat=${expat}" + + # So the installed binary is called "rocgdb" instead on plain "gdb" + "--program-prefix=roc" + + # Disable building many components not used or incompatible with the amdgcn target + "--disable-sim" + "--disable-gdbserver" + "--disable-ld" + "--disable-gas" + "--disable-gdbserver" + "--disable-gdbtk" + "--disable-gprofng" + "--disable-shared" + ]; + + postPatch = '' + for file in *; do + if [ -f "$file" ]; then + patchShebangs "$file" + fi + done + ''; + + # The source directory for ROCgdb (based on upstream GDB) contains multiple project + # of GNU’s toolchain (binutils and onther), we only need to install the GDB part. + installPhase = '' + make install-gdb + ''; + + env.CFLAGS = "-Wno-switch -Wno-format-nonliteral -I${zstd.dev}/include -I${zlib.dev}/include -I${expat.dev}/include -I${ncurses.dev}/include"; + env.CXXFLAGS = finalAttrs.env.CFLAGS; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "ROCm source-level debugger for Linux, based on GDB"; + homepage = "https://github.com/ROCm/ROCgdb"; + license = licenses.gpl3Plus; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocm-cmake/default.nix b/pkgs/rocm-modules/rocm-cmake/default.nix new file mode 100644 index 0000000..cbdcf25 --- /dev/null +++ b/pkgs/rocm-modules/rocm-cmake/default.nix @@ -0,0 +1,38 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + rocm-core, + cmake, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "rocm-cmake"; + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocm-cmake"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-wAipNWAB66YNf7exLSNPAzg3NgkGD9LPKfKiulL5yak="; + }; + + nativeBuildInputs = [ cmake ]; + + buildInputs = [ rocm-core ]; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "CMake modules for common build tasks for the ROCm stack"; + homepage = "https://github.com/ROCm/rocm-cmake"; + license = licenses.mit; + teams = [ teams.rocm ]; + platforms = platforms.unix; + }; +}) diff --git a/pkgs/rocm-modules/rocm-comgr/default.nix b/pkgs/rocm-modules/rocm-comgr/default.nix new file mode 100644 index 0000000..482046b --- /dev/null +++ b/pkgs/rocm-modules/rocm-comgr/default.nix @@ -0,0 +1,73 @@ +{ + lib, + stdenv, + fetchpatch, + cmake, + python3, + rocm-merged-llvm, + rocm-device-libs, + zlib, + zstd, + libxml2, +}: + +let + llvmNativeTarget = + if stdenv.hostPlatform.isx86_64 then + "X86" + else if stdenv.hostPlatform.isAarch64 then + "AArch64" + else + throw "Unsupported ROCm LLVM platform"; +in +stdenv.mkDerivation (finalAttrs: { + pname = "rocm-comgr"; + # In-tree with ROCm LLVM + inherit (rocm-merged-llvm) version; + src = rocm-merged-llvm.llvm-src; + + sourceRoot = "${finalAttrs.src.name}/amd/comgr"; + + patches = [ + # [Comgr] Extend ISA compatibility + # (fetchpatch { + # sha256 = "sha256-dgow0kwSWM1TnkqWOZDRQrh5nuF8p5jbYyOLCpQsH4k="; + # url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/a439e4f37ce71de48d4a979594276e3be0e6278f.patch"; + # relative = "amd/comgr"; + # }) + #[Comgr] Extend ISA compatibility for CCOB + # (fetchpatch { + # sha256 = "sha256-6Rwz12Lk4R2JK3olii3cr2Zd0ZLYe7VSpK1YRCOsJWY="; + # url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/2d8c459a4d4c0567a7a275b4b54560d88e5c6919.patch"; + # relative = "amd/comgr"; + # }) + ]; + + nativeBuildInputs = [ + cmake + python3 + ]; + + buildInputs = [ + rocm-device-libs + libxml2 + zlib + zstd + rocm-merged-llvm + ]; + + cmakeFlags = [ + "-DCMAKE_VERBOSE_MAKEFILE=ON" + "-DCMAKE_BUILD_TYPE=Release" + "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}" + ]; + + meta = with lib; { + description = "APIs for compiling and inspecting AMDGPU code objects"; + homepage = "https://github.com/ROCm/ROCm-CompilerSupport/tree/amd-stg-open/lib/comgr"; + license = licenses.ncsa; + maintainers = with maintainers; [ lovesegfault ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocm-core/default.nix b/pkgs/rocm-modules/rocm-core/default.nix new file mode 100644 index 0000000..f0cea7a --- /dev/null +++ b/pkgs/rocm-modules/rocm-core/default.nix @@ -0,0 +1,53 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + writeText, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "rocm-core"; + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocm-core"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-dDTCEAbeA88deLgUbdbulaHpHI9zcTze0mZeS49TsAM="; + }; + + nativeBuildInputs = [ cmake ]; + # FIXME: What's the correct way to set this? + env.ROCM_LIBPATCH_VERSION = "${lib.versions.major finalAttrs.version}0${lib.versions.minor finalAttrs.version}0${lib.versions.patch finalAttrs.version}"; + env.BUILD_ID = "nixos-${finalAttrs.env.ROCM_LIBPATCH_VERSION}"; + env.ROCM_BUILD_ID = "release-${finalAttrs.env.BUILD_ID}"; + cmakeFlags = [ + "-DROCM_LIBPATCH_VERSION=${finalAttrs.env.ROCM_LIBPATCH_VERSION}" + "-DROCM_VERSION=${finalAttrs.version}" + "-DBUILD_ID=${finalAttrs.env.BUILD_ID}" + ]; + + setupHook = writeText "setupHook.sh" '' + export ROCM_LIBPATCH_VERSION="${finalAttrs.env.ROCM_LIBPATCH_VERSION}" + export BUILD_ID="${finalAttrs.env.BUILD_ID}" + export ROCM_BUILD_ID="${finalAttrs.env.ROCM_BUILD_ID}" + ''; + + passthru.ROCM_LIBPATCH_VERSION = finalAttrs.env.ROCM_LIBPATCH_VERSION; + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + page = "tags?per_page=4"; + }; + + meta = with lib; { + description = "Utility for getting the ROCm release version"; + homepage = "https://github.com/ROCm/rocm-core"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocm-device-libs/cmake.patch b/pkgs/rocm-modules/rocm-device-libs/cmake.patch new file mode 100644 index 0000000..de46bdb --- /dev/null +++ b/pkgs/rocm-modules/rocm-device-libs/cmake.patch @@ -0,0 +1,43 @@ +diff --git a/cmake/Packages.cmake b/cmake/Packages.cmake +index 07c60eb..c736b3e 100644 +--- a/amd/device-libs/cmake/Packages.cmake ++++ b/amd/device-libs/cmake/Packages.cmake +@@ -12,24 +12,29 @@ set_target_properties(${target} PROPERTIES + IMPORTED_LOCATION \"${target_path}\")") + endforeach() + configure_file(AMDDeviceLibsConfig.cmake.in +- ${PACKAGE_PREFIX}/AMDDeviceLibsConfig.cmake ++ lib/cmake/AMDDeviceLibs/AMDDeviceLibsConfig.cmake + @ONLY) + + + set(install_path_suffix "amdgcn/bitcode") + + # Generate the install-tree package. +-# We do not know the absolute path to the intall tree until we are installed, +-# so we calculate it dynamically in AMD_DEVICE_LIBS_PREFIX_CODE and use +-# relative paths in the target imports in AMD_DEVICE_LIBS_TARGET_CODE. +-set(AMD_DEVICE_LIBS_PREFIX_CODE " ++if(IS_ABSOLUTE "${CMAKE_INSTALL_PREFIX}") ++ set(AMD_DEVICE_LIBS_PREFIX_CODE "set(AMD_DEVICE_LIBS_PREFIX \"${CMAKE_INSTALL_PREFIX}\")") ++else() ++ # We do not know the absolute path to the install tree until we are installed, ++ # so we calculate it dynamically in AMD_DEVICE_LIBS_PREFIX_CODE and use ++ # relative paths in the target imports in AMD_DEVICE_LIBS_TARGET_CODE. ++ set(AMD_DEVICE_LIBS_PREFIX_CODE " + # Derive absolute install prefix from config file path. + get_filename_component(AMD_DEVICE_LIBS_PREFIX \"\${CMAKE_CURRENT_LIST_FILE}\" PATH)") +-string(REGEX REPLACE "/" ";" count "${PACKAGE_PREFIX}") +-foreach(p ${count}) +- set(AMD_DEVICE_LIBS_PREFIX_CODE "${AMD_DEVICE_LIBS_PREFIX_CODE} ++ string(REGEX REPLACE "/" ";" count "${PACKAGE_PREFIX}") ++ foreach(p ${count}) ++ set(AMD_DEVICE_LIBS_PREFIX_CODE "${AMD_DEVICE_LIBS_PREFIX_CODE} + get_filename_component(AMD_DEVICE_LIBS_PREFIX \"\${AMD_DEVICE_LIBS_PREFIX}\" PATH)") +-endforeach() ++ endforeach() ++endif() ++ + set(AMD_DEVICE_LIBS_TARGET_CODE) + foreach(target ${AMDGCN_LIB_LIST}) + get_target_property(target_name ${target} ARCHIVE_OUTPUT_NAME) diff --git a/pkgs/rocm-modules/rocm-device-libs/default.nix b/pkgs/rocm-modules/rocm-device-libs/default.nix new file mode 100644 index 0000000..748b539 --- /dev/null +++ b/pkgs/rocm-modules/rocm-device-libs/default.nix @@ -0,0 +1,62 @@ +{ + lib, + stdenv, + cmake, + ninja, + libxml2, + zlib, + zstd, + ncurses, + rocm-merged-llvm, + python3, +}: + +let + llvmNativeTarget = + if stdenv.hostPlatform.isx86_64 then + "X86" + else if stdenv.hostPlatform.isAarch64 then + "AArch64" + else + throw "Unsupported ROCm LLVM platform"; +in +stdenv.mkDerivation { + pname = "rocm-device-libs"; + # In-tree with ROCm LLVM + inherit (rocm-merged-llvm) version; + src = rocm-merged-llvm.llvm-src; + + postPatch = '' + cd amd/device-libs + ''; + + patches = [ ./cmake.patch ]; + + nativeBuildInputs = [ + cmake + ninja + python3 + ]; + + buildInputs = [ + libxml2 + zlib + zstd + ncurses + rocm-merged-llvm + ]; + + cmakeFlags = [ + "-DCMAKE_RELEASE_TYPE=Release" + "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}" + ]; + + meta = with lib; { + description = "Set of AMD-specific device-side language runtime libraries"; + homepage = "https://github.com/ROCm/ROCm-Device-Libs"; + license = licenses.ncsa; + maintainers = with maintainers; [ lovesegfault ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +} diff --git a/pkgs/rocm-modules/rocm-docs-core/default.nix b/pkgs/rocm-modules/rocm-docs-core/default.nix new file mode 100644 index 0000000..c8738b6 --- /dev/null +++ b/pkgs/rocm-modules/rocm-docs-core/default.nix @@ -0,0 +1,71 @@ +{ + lib, + fetchFromGitHub, + gitUpdater, + buildPythonPackage, + setuptools, + beautifulsoup4, + gitpython, + pydata-sphinx-theme, + pygithub, + sphinx, + breathe, + myst-nb, + myst-parser, + sphinx-book-theme, + sphinx-copybutton, + sphinx-design, + sphinx-external-toc, + sphinx-notfound-page, + pyyaml, + fastjsonschema, +}: + +# FIXME: Move to rocmPackages_common +buildPythonPackage rec { + pname = "rocm-docs-core"; + version = "1.19.0"; + format = "pyproject"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocm-docs-core"; + rev = "v${version}"; + hash = "sha256-vmtOf9e8RhWQ0ecL+Sn1HJGK+ILCaQxeQvUkQ8W8YX8="; + }; + + buildInputs = [ setuptools ]; + + propagatedBuildInputs = [ + beautifulsoup4 + gitpython + pydata-sphinx-theme + pygithub + sphinx + breathe + myst-nb + myst-parser + sphinx-book-theme + sphinx-copybutton + sphinx-design + sphinx-external-toc + sphinx-notfound-page + pyyaml + fastjsonschema + ]; + + pythonImportsCheck = [ "rocm_docs" ]; + + passthru.updateScript = gitUpdater { rev-prefix = "v"; }; + + meta = with lib; { + description = "ROCm Documentation Python package for ReadTheDocs build standardization"; + homepage = "https://github.com/ROCm/rocm-docs-core"; + license = with licenses; [ + mit + cc-by-40 + ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +} diff --git a/pkgs/rocm-modules/rocm-path/default.nix b/pkgs/rocm-modules/rocm-path/default.nix new file mode 100644 index 0000000..57970ff --- /dev/null +++ b/pkgs/rocm-modules/rocm-path/default.nix @@ -0,0 +1,27 @@ +{ + symlinkJoin, + linkFarm, + clr, + hipblas, + hipblas-common, + rocblas, + rocsolver, + rocsparse, + rocm-device-libs, + rocm-smi, + llvm, +}: +symlinkJoin { + name = "rocm-path-${clr.version}"; + paths = [ + clr + hipblas-common + hipblas + rocblas + rocsolver + rocsparse + rocm-device-libs + rocm-smi + (linkFarm "rocm-llvm-subdir" { llvm = llvm.clang; }) + ]; +} diff --git a/pkgs/rocm-modules/rocm-runtime/default.nix b/pkgs/rocm-modules/rocm-runtime/default.nix new file mode 100644 index 0000000..50c22c2 --- /dev/null +++ b/pkgs/rocm-modules/rocm-runtime/default.nix @@ -0,0 +1,115 @@ +{ + lib, + stdenv, + fetchFromGitHub, + fetchpatch, + rocmUpdateScript, + pkg-config, + cmake, + ninja, + xxd, + rocm-device-libs, + elfutils, + libdrm, + numactl, + valgrind, + libxml2, + rocm-merged-llvm, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "rocm-runtime"; + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "ROCR-Runtime"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-LOILnvjGwlLoB99+TdZib7VJsgp45yGJiEPgrlwXItI="; + }; + + env.CFLAGS = "-I${numactl.dev}/include -I${elfutils.dev}/include -w"; + env.CXXFLAGS = "-I${numactl.dev}/include -I${elfutils.dev}/include -w"; + + nativeBuildInputs = [ + pkg-config + cmake + ninja + xxd + rocm-merged-llvm + ]; + + buildInputs = [ + elfutils + libdrm + numactl + # without valgrind, additional work for "kCodeCopyAligned11" is done in the installPhase + valgrind + libxml2 + ]; + + cmakeFlags = [ + "-DBUILD_SHARED_LIBS=ON" + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ]; + + patches = [ + # Patches for UB at runtime https://github.com/ROCm/ROCR-Runtime/issues/272 + # (fetchpatch { + # # [PATCH] hsa-runtime: set underlying type of hsa_region_info_t and hsa_amd_region_info_t to int + # url = "https://github.com/ROCm/ROCR-Runtime/commit/39a6a168fa07e289a10f6e20e6ead4e303e99ba0.patch"; + # hash = "sha256-CshJJDvII1nNyNmt+YjwMwfBHUTlrdsxkhwfgBwO+WE="; + # }) + # (fetchpatch { + # # [PATCH] rocr: refactor of runtime.cpp based on Coverity + # url = "https://github.com/ROCm/ROCR-Runtime/commit/441bd9fe6c7bdb5c4c31f71524ed642786bc923e.patch"; + # hash = "sha256-7bQXxGkipzgT2aXRxCuh3Sfmo/zc/IOmA0x1zB+fMb0="; + # }) + (fetchpatch { + # [PATCH] queues: fix UB due to 1 << 31 + url = "https://github.com/ROCm/ROCR-Runtime/commit/9b8a0f5dbee1903fa990a7d8accc1c5fbc549636.patch"; + hash = "sha256-KlZWjfngH8yKly08iwC+Bzpvp/4dkaTpRIKdFYwRI+U="; + }) + (fetchpatch { + # [PATCH] topology: fix UB due to 1 << 31 + url = "https://github.com/ROCm/ROCR-Runtime/commit/d1d00bfee386d263e13c2b64fb6ffd1156deda7c.patch"; + hash = "sha256-u70WEZaphQ7qTfgQPFATwdKWtHytu7CFH7Pzv1rOM8w="; + }) + (fetchpatch { + # [PATCH] kfd_ioctl: fix UB due to 1 << 31 + url = "https://github.com/ROCm/ROCR-Runtime/commit/41bfc66aef437a5b349f71105fa4b907cc7e17d5.patch"; + hash = "sha256-A7VhPR3eSsmjq2cTBSjBIz9i//WiNjoXm0EsRKtF+ns="; + }) + ./remove-hsa-aqlprofile-dep.patch + ]; + + postPatch = '' + patchShebangs --build \ + runtime/hsa-runtime/core/runtime/trap_handler/create_trap_handler_header.sh \ + runtime/hsa-runtime/core/runtime/blit_shaders/create_blit_shader_header.sh \ + runtime/hsa-runtime/image/blit_src/create_hsaco_ascii_file.sh + patchShebangs --host image core runtime + + substituteInPlace CMakeLists.txt \ + --replace 'hsa/include/hsa' 'include/hsa' + + export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode" + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "Platform runtime for ROCm"; + homepage = "https://github.com/ROCm/ROCR-Runtime"; + license = with licenses; [ ncsa ]; + maintainers = with maintainers; [ lovesegfault ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocm-runtime/remove-hsa-aqlprofile-dep.patch b/pkgs/rocm-modules/rocm-runtime/remove-hsa-aqlprofile-dep.patch new file mode 100644 index 0000000..b8ed57d --- /dev/null +++ b/pkgs/rocm-modules/rocm-runtime/remove-hsa-aqlprofile-dep.patch @@ -0,0 +1,27 @@ +libhsa-amd-aqlprofile64 library is unfree +Bug: https://github.com/ROCm/ROCm/issues/1781 +--- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp ++++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +@@ -1333,11 +1333,6 @@ hsa_status_t GpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const { + setFlag(HSA_EXTENSION_AMD_PC_SAMPLING); + } + +- if (os::LibHandle lib = os::LoadLib(kAqlProfileLib)) { +- os::CloseLib(lib); +- setFlag(HSA_EXTENSION_AMD_AQLPROFILE); +- } +- + setFlag(HSA_EXTENSION_AMD_PROFILER); + + break; +--- a/runtime/hsa-runtime/core/runtime/hsa.cpp ++++ b/runtime/hsa-runtime/core/runtime/hsa.cpp +@@ -490,7 +490,7 @@ hsa_status_t hsa_system_get_major_extension_table(uint16_t extension, uint16_t v + return HSA_STATUS_SUCCESS; + } + +- if (extension == HSA_EXTENSION_AMD_AQLPROFILE) { ++ if (0) { + if (version_major != hsa_ven_amd_aqlprofile_VERSION_MAJOR) { + debug_print("aqlprofile API incompatible ver %d, current ver %d\n", + version_major, hsa_ven_amd_aqlprofile_VERSION_MAJOR); diff --git a/pkgs/rocm-modules/rocm-smi/cmake.patch b/pkgs/rocm-modules/rocm-smi/cmake.patch new file mode 100644 index 0000000..2634ea4 --- /dev/null +++ b/pkgs/rocm-modules/rocm-smi/cmake.patch @@ -0,0 +1,89 @@ +diff --git a/rocm_smi-backward-compat.cmake b/rocm_smi-backward-compat.cmake +index aa8fd9c..59afce5 100644 +--- a/rocm_smi-backward-compat.cmake ++++ b/rocm_smi-backward-compat.cmake +@@ -72,7 +72,12 @@ function(generate_wrapper_header) + set(include_guard "${include_guard}COMGR_WRAPPER_INCLUDE_${INC_GAURD_NAME}_H") + #set #include statement + get_filename_component(file_name ${header_file} NAME) +- set(include_statements "${include_statements}#include \"../../../${CMAKE_INSTALL_INCLUDEDIR}/${ROCM_SMI}/${file_name}\"\n") ++ if(IS_ABSOLUTE ${CMAKE_INSTALL_INCLUDEDIR}) ++ set(include_dir "${CMAKE_INSTALL_INCLUDEDIR}") ++ else() ++ set(include_dir "../../../${CMAKE_INSTALL_INCLUDEDIR}") ++ endif() ++ set(include_statements "${include_statements}#include \"${include_dir}/${ROCM_SMI}/${file_name}\"\n") + configure_file(${RSMI_WRAPPER_DIR}/header.hpp.in ${RSMI_WRAPPER_INC_DIR}/${file_name}) + unset(include_guard) + unset(include_statements) +@@ -90,7 +95,12 @@ function(generate_wrapper_header) + set(include_guard "${include_guard}COMGR_WRAPPER_INCLUDE_${INC_GAURD_NAME}_H") + #set #include statement + get_filename_component(file_name ${header_file} NAME) +- set(include_statements "${include_statements}#include \"../../../${CMAKE_INSTALL_INCLUDEDIR}/${OAM_TARGET_NAME}/${file_name}\"\n") ++ if(IS_ABSOLUTE ${CMAKE_INSTALL_INCLUDEDIR}) ++ set(include_dir "${CMAKE_INSTALL_INCLUDEDIR}") ++ else() ++ set(include_dir "../../../${CMAKE_INSTALL_INCLUDEDIR}") ++ endif() ++ set(include_statements "${include_statements}#include \"${include_dir}/${OAM_TARGET_NAME}/${file_name}\"\n") + configure_file(${RSMI_WRAPPER_DIR}/header.hpp.in ${OAM_WRAPPER_INC_DIR}/${file_name}) + unset(include_guard) + unset(include_statements) +@@ -123,11 +133,16 @@ function(create_library_symlink) + set(library_files "${LIB_RSMI}") + endif() + ++ if(IS_ABSOLUTE ${CMAKE_INSTALL_LIBDIR}) ++ set(install_libdir "${CMAKE_INSTALL_LIBDIR}") ++ else() ++ set(install_libdir "../../${CMAKE_INSTALL_LIBDIR}") ++ endif() + foreach(file_name ${library_files}) + add_custom_target(link_${file_name} ALL + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink +- ../../${CMAKE_INSTALL_LIBDIR}/${file_name} ${RSMI_WRAPPER_LIB_DIR}/${file_name}) ++ ${install_libdir}/${file_name} ${RSMI_WRAPPER_LIB_DIR}/${file_name}) + endforeach() + + file(MAKE_DIRECTORY ${OAM_WRAPPER_LIB_DIR}) +@@ -151,11 +166,16 @@ function(create_library_symlink) + set(library_files "${LIB_OAM}") + endif() + ++ if(IS_ABSOLUTE ${CMAKE_INSTALL_LIBDIR}) ++ set(install_libdir "${CMAKE_INSTALL_LIBDIR}") ++ else() ++ set(install_libdir "../../${CMAKE_INSTALL_LIBDIR}") ++ endif() + foreach(file_name ${library_files}) + add_custom_target(link_${file_name} ALL + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink +- ../../${CMAKE_INSTALL_LIBDIR}/${file_name} ${OAM_WRAPPER_LIB_DIR}/${file_name}) ++ ${install_libdir}/${file_name} ${OAM_WRAPPER_LIB_DIR}/${file_name}) + endforeach() + + endfunction() +diff --git a/rocm_smi/CMakeLists.txt b/rocm_smi/CMakeLists.txt +index c594eeb..d3ed39d 100755 +--- a/rocm_smi/CMakeLists.txt ++++ b/rocm_smi/CMakeLists.txt +@@ -105,10 +105,15 @@ endif () + #file reorganization changes + #rocm_smi.py moved to libexec/rocm_smi. so creating rocm-smi symlink + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin) ++if(IS_ABSOLUTE ${CMAKE_INSTALL_LIBEXECDIR}) ++ set(install_libexecdir "${CMAKE_INSTALL_LIBEXECDIR}") ++else() ++ set(install_libexecdir "../${CMAKE_INSTALL_LIBEXECDIR}") ++endif() + add_custom_target(link-rocm-smi ALL + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink +- ../${CMAKE_INSTALL_LIBEXECDIR}/${ROCM_SMI}/rocm_smi.py ${CMAKE_CURRENT_BINARY_DIR}/bin/rocm-smi) ++ ${install_libexecdir}/${ROCM_SMI}/rocm_smi.py ${CMAKE_CURRENT_BINARY_DIR}/bin/rocm-smi) + + ## Add the install directives for the runtime library. + install(TARGETS ${ROCM_SMI_TARGET} diff --git a/pkgs/rocm-modules/rocm-smi/default.nix b/pkgs/rocm-modules/rocm-smi/default.nix new file mode 100644 index 0000000..ea3238b --- /dev/null +++ b/pkgs/rocm-modules/rocm-smi/default.nix @@ -0,0 +1,58 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + wrapPython, + libdrm, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "rocm-smi"; + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocm_smi_lib"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-qshAMVhHJIA06fGOTJx5/l1t8wKv7KbmvdCSKNuEIKM="; + }; + + patches = [ ./cmake.patch ]; + + nativeBuildInputs = [ + cmake + wrapPython + libdrm + ]; + + cmakeFlags = [ + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ]; + + postInstall = '' + wrapPythonProgramsIn $out + mv $out/libexec/rocm_smi/.rsmiBindingsInit.py-wrapped $out/libexec/rocm_smi/rsmiBindingsInit.py + mv $out/libexec/rocm_smi/.rsmiBindings.py-wrapped $out/libexec/rocm_smi/rsmiBindings.py + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "System management interface for AMD GPUs supported by ROCm"; + homepage = "https://github.com/ROCm/rocm_smi_lib"; + license = with licenses; [ mit ]; + maintainers = with maintainers; [ lovesegfault ]; + teams = [ teams.rocm ]; + platforms = [ "x86_64-linux" ]; + }; +}) diff --git a/pkgs/rocm-modules/rocm-tests/default.nix b/pkgs/rocm-modules/rocm-tests/default.nix new file mode 100644 index 0000000..84762ff --- /dev/null +++ b/pkgs/rocm-modules/rocm-tests/default.nix @@ -0,0 +1,32 @@ +{ + clr, + ollama, + python3Packages, + rocmPackages, + magma-hip, + emptyDirectory, + stdenv, +}: +# This package exists purely to have a bunch of passthru.tests attrs +stdenv.mkDerivation { + name = "rocm-tests"; + nativeBuildInputs = [ + clr + ]; + src = emptyDirectory; + postInstall = "mkdir -p $out"; + passthru.tests = { + ollama = ollama.override { + inherit rocmPackages; + acceleration = "rocm"; + }; + torch = python3Packages.torch.override { + inherit rocmPackages; + rocmSupport = true; + cudaSupport = false; + magma-hip = magma-hip.override { + inherit rocmPackages; + }; + }; + }; +} diff --git a/pkgs/rocm-modules/rocminfo/default.nix b/pkgs/rocm-modules/rocminfo/default.nix new file mode 100644 index 0000000..616bf6f --- /dev/null +++ b/pkgs/rocm-modules/rocminfo/default.nix @@ -0,0 +1,65 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + rocm-runtime, + busybox, + python3, + gnugrep, + clr, # Only for localGpuTargets + # rocminfo requires that the calling user have a password and be in + # the video group. If we let rocm_agent_enumerator rely upon + # rocminfo's output, then it, too, has those requirements. Instead, + # we can specify the GPU targets for this system (e.g. "gfx803" for + # Polaris) such that no system call is needed for downstream + # compilers to determine the desired target. + defaultTargets ? (clr.localGpuTargets or [ ]), +}: + +stdenv.mkDerivation (finalAttrs: { + version = "6.4.1"; + pname = "rocminfo"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocminfo"; + rev = "rocm-${finalAttrs.version}"; + sha256 = "sha256-YscZ5sFsLOVBg98w2X6vTzniTvl9NfCkIE+HAH6vv5Y="; + }; + + nativeBuildInputs = [ + cmake + rocm-cmake + ]; + + buildInputs = [ rocm-runtime ]; + propagatedBuildInputs = [ python3 ]; + cmakeFlags = [ "-DROCRTST_BLD_TYPE=Release" ]; + + prePatch = '' + patchShebangs rocm_agent_enumerator + sed 's,lsmod | grep ,${busybox}/bin/lsmod | ${gnugrep}/bin/grep ,' -i rocminfo.cc + ''; + + postInstall = lib.optionalString (defaultTargets != [ ]) '' + echo '${lib.concatStringsSep "\n" defaultTargets}' > $out/bin/target.lst + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "ROCm Application for Reporting System Info"; + homepage = "https://github.com/ROCm/rocminfo"; + license = licenses.ncsa; + maintainers = with maintainers; [ lovesegfault ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocmlir/default.nix b/pkgs/rocm-modules/rocmlir/default.nix new file mode 100644 index 0000000..8e7e6cc --- /dev/null +++ b/pkgs/rocm-modules/rocmlir/default.nix @@ -0,0 +1,165 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + rocminfo, + clr, + git, + libxml2, + libedit, + zstd, + zlib, + ncurses, + python3Packages, + buildRockCompiler ? false, + buildTests ? false, # `argument of type 'NoneType' is not iterable` +}: + +# FIXME: rocmlir has an entire separate LLVM build in a subdirectory this is silly +# It seems to be forked from AMD's own LLVM +# If possible reusing the rocmPackages.llvm build would be better +# Would have to confirm it is compatible with ROCm's tagged LLVM. +# Fairly likely it's not given AMD's track record with forking their own software in incompatible ways +# in subdirs + +# Theoretically, we could have our MLIR have an output +# with the source and built objects so that we can just +# use it as the external LLVM repo for this +let + suffix = if buildRockCompiler then "-rock" else ""; + + llvmNativeTarget = + if stdenv.hostPlatform.isx86_64 then + "X86" + else if stdenv.hostPlatform.isAarch64 then + "AArch64" + else + throw "Unsupported ROCm LLVM platform"; +in +stdenv.mkDerivation (finalAttrs: { + pname = "rocmlir${suffix}"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals (!buildRockCompiler) [ + "external" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocMLIR"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-p/gvr1Z6yZtO5N+ecSouXiCrf520jt1HMOy/tohUHfI="; + }; + + nativeBuildInputs = [ + cmake + rocm-cmake + clr + python3Packages.python + python3Packages.tomli + ]; + + buildInputs = [ + git + libxml2 + libedit + ]; + + propagatedBuildInputs = [ + zstd + zlib + ncurses + ]; + + patches = [ + ./initparamdata-sort-const.patch + ]; + + cmakeFlags = + [ + "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}" + "-DCMAKE_BUILD_TYPE=Release" + "-DLLVM_USE_LINKER=lld" + "-DLLVM_ENABLE_ZSTD=FORCE_ON" + "-DLLVM_ENABLE_ZLIB=FORCE_ON" + "-DLLVM_ENABLE_LIBCXX=ON" + "-DLLVM_ENABLE_TERMINFO=ON" + "-DROCM_PATH=${clr}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + (lib.cmakeBool "BUILD_FAT_LIBROCKCOMPILER" buildRockCompiler) + ] + ++ lib.optionals (!buildRockCompiler) [ + "-DROCM_TEST_CHIPSET=gfx000" + ]; + + postPatch = '' + patchShebangs mlir + patchShebangs external/llvm-project/mlir/lib/Dialect/GPU/AmdDeviceLibsIncGen.py + + # Fixes mlir/lib/Analysis/BufferDependencyAnalysis.cpp:41:19: error: redefinition of 'read' + substituteInPlace mlir/lib/Analysis/BufferDependencyAnalysis.cpp \ + --replace-fail "enum EffectType { read, write, unknown };" "enum class EffectType { read, write, unknown };" + + # remove when no longer required + substituteInPlace mlir/test/{e2e/generateE2ETest.py,fusion/e2e/generate-fusion-tests.py} \ + --replace-fail "\"/opt/rocm/bin" "\"${rocminfo}/bin" + + substituteInPlace mlir/utils/performance/common/CMakeLists.txt \ + --replace-fail "/opt/rocm" "${clr}" + ''; + + dontBuild = true; + doCheck = true; + + # Certain libs aren't being generated, try enabling tests next update + checkTarget = + if buildRockCompiler then + "librockCompiler" + else if buildTests then + "check-rocmlir" + else + "check-rocmlir-build-only"; + + postInstall = + let + libPath = lib.makeLibraryPath [ + zstd + zlib + ncurses + clr + stdenv.cc.cc + ]; + in + lib.optionals (!buildRockCompiler) '' + mkdir -p $external/lib + cp -a external/llvm-project/llvm/lib/{*.a*,*.so*} $external/lib + patchelf --set-rpath $external/lib:$out/lib:${libPath} $external/lib/*.so* + patchelf --set-rpath $out/lib:$external/lib:${libPath} $out/{bin/*,lib/*.so*} + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + page = "tags?per_page=4"; + }; + + meta = with lib; { + description = "MLIR-based convolution and GEMM kernel generator"; + homepage = "https://github.com/ROCm/rocMLIR"; + license = with licenses; [ asl20 ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocmlir/initparamdata-sort-const.patch b/pkgs/rocm-modules/rocmlir/initparamdata-sort-const.patch new file mode 100644 index 0000000..37f521f --- /dev/null +++ b/pkgs/rocm-modules/rocmlir/initparamdata-sort-const.patch @@ -0,0 +1,13 @@ +diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h b/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h +index 3f5ee596819a..590d53788822 100644 +--- a/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h ++++ b/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h +@@ -209,7 +209,7 @@ private: + size_t original_pos; + int64_t padding_amount; + +- bool operator<(const InitParamData &rhs) { ++ bool operator<(const InitParamData &rhs) const { + if (this->padding_amount < rhs.padding_amount) { + return true; + } else if (this->padding_amount == rhs.padding_amount) { diff --git a/pkgs/rocm-modules/rocprim/default.nix b/pkgs/rocm-modules/rocprim/default.nix new file mode 100644 index 0000000..d7dd7cf --- /dev/null +++ b/pkgs/rocm-modules/rocprim/default.nix @@ -0,0 +1,98 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + clr, + gtest, + gbenchmark, + buildTests ? false, + buildBenchmarks ? false, + gpuTargets ? [ ], +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "rocprim"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocPRIM"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-EP61n4syYMWjTDkjC0dWLj9yau6KL2qu1Bs5IBtw580="; + }; + + nativeBuildInputs = [ + cmake + rocm-cmake + clr + ]; + + buildInputs = + lib.optionals buildTests [ + gtest + ] + ++ lib.optionals buildBenchmarks [ + gbenchmark + ]; + + cmakeFlags = + [ + "-DCMAKE_BUILD_TYPE=Release" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_TEST=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_BENCHMARK=ON" + ]; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/test_* $test/bin + mv $out/bin/rocprim $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/benchmark_* $benchmark/bin + '' + + lib.optionalString (buildTests || buildBenchmarks) '' + rmdir $out/bin + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "ROCm parallel primitives"; + homepage = "https://github.com/ROCm/rocPRIM"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocprofiler-register/default.nix b/pkgs/rocm-modules/rocprofiler-register/default.nix new file mode 100644 index 0000000..46713b4 --- /dev/null +++ b/pkgs/rocm-modules/rocprofiler-register/default.nix @@ -0,0 +1,79 @@ +{ + lib, + stdenv, + rocm-runtime, + rocprofiler, + numactl, + libpciaccess, + libxml2, + elfutils, + fetchFromGitHub, + rocmUpdateScript, + cmake, + clang, + clr, + python3Packages, + gpuTargets ? clr.gpuTargets, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "rocprofiler-register"; + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocprofiler-register"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-HaN4XMHuCRDfKOpfuZ2SkOEQfAZKouh6luqbtATUYm0="; + fetchSubmodules = true; + }; + + nativeBuildInputs = [ + cmake + clang + clr + ]; + + buildInputs = [ + numactl + libpciaccess + libxml2 + elfutils + rocm-runtime + + rocprofiler.rocmtoolkit-merged + + python3Packages.lxml + python3Packages.cppheaderparser + python3Packages.pyyaml + python3Packages.barectf + python3Packages.pandas + ]; + cmakeFlags = [ + "-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip" + "-DHIP_ROOT_DIR=${clr}" + "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + "-DBUILD_TEST=OFF" + "-DROCPROFILER_BUILD_TESTS=0" + "-DROCPROFILER_BUILD_SAMPLES=0" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ]; + + passthru.updateScript = rocmUpdateScript { + name = "rocprofiler-register"; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "Profiling with perf-counters and derived metrics"; + homepage = "https://github.com/ROCm/rocprofiler"; + license = with licenses; [ mit ]; # mitx11 + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocprofiler/0000-dont-install-tests-hsaco.patch b/pkgs/rocm-modules/rocprofiler/0000-dont-install-tests-hsaco.patch new file mode 100644 index 0000000..70550d0 --- /dev/null +++ b/pkgs/rocm-modules/rocprofiler/0000-dont-install-tests-hsaco.patch @@ -0,0 +1,15 @@ +diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt +index 46efbd5..ca2cc3b 100644 +--- a/test/CMakeLists.txt ++++ b/test/CMakeLists.txt +@@ -127,10 +127,6 @@ function(generate_hsaco TARGET_ID INPUT_FILE OUTPUT_FILE) + DEPENDS ${INPUT_FILE} clang + COMMENT "Building ${OUTPUT_FILE}..." + VERBATIM) +- install( +- FILES ${PROJECT_BINARY_DIR}/${OUTPUT_FILE} +- DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests-v1 +- COMPONENT tests) + set(HSACO_TARGET_LIST + ${HSACO_TARGET_LIST} ${PROJECT_BINARY_DIR}/${OUTPUT_FILE} + PARENT_SCOPE) diff --git a/pkgs/rocm-modules/rocprofiler/default.nix b/pkgs/rocm-modules/rocprofiler/default.nix new file mode 100644 index 0000000..9cdd5e7 --- /dev/null +++ b/pkgs/rocm-modules/rocprofiler/default.nix @@ -0,0 +1,142 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + symlinkJoin, + cmake, + clang, + clr, + rocm-core, + rocm-runtime, + rocm-device-libs, + roctracer, + rocdbgapi, + numactl, + libpciaccess, + libxml2, + elfutils, + mpi, + systemd, + gtest, + git, + python3Packages, + gpuTargets ? clr.gpuTargets, +}: + +let + rocmtoolkit-merged = symlinkJoin { + name = "rocmtoolkit-merged"; + + paths = [ + rocm-core + rocm-runtime + rocm-device-libs + roctracer + rocdbgapi + clr + ]; + + postBuild = '' + rm -rf $out/nix-support + ''; + }; +in +stdenv.mkDerivation (finalAttrs: { + pname = "rocprofiler"; + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocprofiler"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-CgW8foM4W3K19kUK/l8IsH2Q9DHi/z88viXTxhNqlHQ="; + fetchSubmodules = true; + }; + + patches = [ + # These just simply won't build + ./0000-dont-install-tests-hsaco.patch + ./optional-aql-in-cmake.patch + ]; + + nativeBuildInputs = [ + cmake + clang + clr + git + python3Packages.lxml + python3Packages.cppheaderparser + python3Packages.pyyaml + python3Packages.barectf + python3Packages.pandas + ]; + + buildInputs = [ + numactl + libpciaccess + libxml2 + elfutils + mpi + systemd + gtest + ]; + + propagatedBuildInputs = [ rocmtoolkit-merged ]; + + # HACK: allow building without aqlprofile, probably explodes at runtime if use profiling + env.LDFLAGS = "-z nodefs -Wl,-undefined,dynamic_lookup,--unresolved-symbols=ignore-all"; + #HACK: rocprofiler's cmake doesn't add these deps properly + env.CXXFLAGS = "-I${libpciaccess}/include -I${numactl.dev}/include -I${rocmtoolkit-merged}/include -I${elfutils.dev}/include -w"; + + cmakeFlags = [ + "-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip" + "-DHIP_ROOT_DIR=${clr}" + "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DBUILD_TEST=OFF" + "-DROCPROFILER_BUILD_TESTS=0" + "-DROCPROFILER_BUILD_SAMPLES=0" + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ]; + + postPatch = '' + patchShebangs . + + substituteInPlace cmake_modules/rocprofiler_utils.cmake \ + --replace-fail 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE)' 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE) + return()' + + substituteInPlace CMakeLists.txt \ + --replace-fail 'set(ROCPROFILER_BUILD_TESTS ON)' "" + + substituteInPlace tests-v2/featuretests/profiler/CMakeLists.txt \ + --replace "--build-id=sha1" "--build-id=sha1 --rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode" + + substituteInPlace test/CMakeLists.txt \ + --replace "\''${ROCM_ROOT_DIR}/amdgcn/bitcode" "${rocm-device-libs}/amdgcn/bitcode" + ''; + + postInstall = '' + # Why do these have the executable bit set? + chmod -x $out/libexec/rocprofiler/counters/*.xml + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + passthru.rocmtoolkit-merged = rocmtoolkit-merged; + + meta = with lib; { + description = "Profiling with perf-counters and derived metrics"; + homepage = "https://github.com/ROCm/rocprofiler"; + license = with licenses; [ mit ]; # mitx11 + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocprofiler/optional-aql-in-cmake.patch b/pkgs/rocm-modules/rocprofiler/optional-aql-in-cmake.patch new file mode 100644 index 0000000..83bf88f --- /dev/null +++ b/pkgs/rocm-modules/rocprofiler/optional-aql-in-cmake.patch @@ -0,0 +1,147 @@ +From https://raw.githubusercontent.com/AphidGit/rocm_compile/refs/heads/main/rocprofiler.patch +diff --git a/cmake_modules/rocprofiler_env.cmake b/cmake_modules/rocprofiler_env.cmake +index 7b7c472..0aba3ed 100644 +--- a/cmake_modules/rocprofiler_env.cmake ++++ b/cmake_modules/rocprofiler_env.cmake +@@ -36,6 +36,7 @@ if(ROCPROFILER_DEBUG_TRACE) + target_compile_definitions(rocprofiler-build-flags INTERFACE DEBUG_TRACE=1) + endif() + ++set(ROCPROFILER_LD_AQLPROFILE false) + # Enable direct loading of AQL-profile HSA extension + if(ROCPROFILER_LD_AQLPROFILE) + target_compile_definitions(rocprofiler-build-flags INTERFACE ROCP_LD_AQLPROFILE=1) +@@ -80,9 +81,3 @@ if("${ROCM_ROOT_DIR}" STREQUAL "") + message(FATAL_ERROR "ROCM_ROOT_DIR is not found.") + endif() + +-find_library( +- HSA_AMD_AQLPROFILE_LIBRARY +- NAMES hsa-amd-aqlprofile64 +- HINTS ${CMAKE_PREFIX_PATH} +- PATHS ${ROCM_ROOT_DIR} +- PATH_SUFFIXES lib REQUIRED) +diff --git a/src/api/CMakeLists.txt b/src/api/CMakeLists.txt +index 61782f0..16c83bf 100644 +--- a/src/api/CMakeLists.txt ++++ b/src/api/CMakeLists.txt +@@ -51,15 +51,6 @@ find_file( + NO_DEFAULT_PATH REQUIRED) + get_filename_component(HSA_RUNTIME_INC_PATH ${HSA_H} DIRECTORY) + +-find_library( +- AQLPROFILE_LIB "libhsa-amd-aqlprofile64.so" +- HINTS ${CMAKE_PREFIX_PATH} +- PATHS ${ROCM_PATH} +- PATH_SUFFIXES lib) +- +-if(NOT AQLPROFILE_LIB) +- message(FATAL_ERROR "AQL_PROFILE not installed. Please install hsa-amd-aqlprofile!") +-endif() + + # ######################################################################################## + # Adding Old Library Files +@@ -247,7 +238,7 @@ target_include_directories( + PUBLIC $ + PRIVATE ${LIB_DIR} ${ROOT_DIR} ${PROJECT_SOURCE_DIR}/include/rocprofiler) + target_link_libraries( +- ${ROCPROFILER_TARGET} PRIVATE ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 c stdc++ ++ ${ROCPROFILER_TARGET} PRIVATE hsa-runtime64::hsa-runtime64 c stdc++ + dl rocprofiler::build-flags rocprofiler::memcheck) + + get_target_property(ROCPROFILER_LIBRARY_V1_NAME ${ROCPROFILER_TARGET} NAME) +@@ -325,8 +316,7 @@ target_link_options( + -Wl,--no-undefined) + target_link_libraries( + rocprofiler-v2 +- PRIVATE ${AQLPROFILE_LIB} +- hsa-runtime64::hsa-runtime64 ++ PRIVATE hsa-runtime64::hsa-runtime64 + Threads::Threads + atomic + numa +diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp +index 2c47186..6b39634 100644 +--- a/src/util/hsa_rsrc_factory.cpp ++++ b/src/util/hsa_rsrc_factory.cpp +@@ -155,17 +155,6 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize + if (kern_arg_pool_ == nullptr) + CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR); + +- // Get AqlProfile API table +- aqlprofile_api_ = {}; +-#ifdef ROCP_LD_AQLPROFILE +- status = LoadAqlProfileLib(&aqlprofile_api_); +-#else +- status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, +- hsa_ven_amd_aqlprofile_VERSION_MAJOR, +- sizeof(aqlprofile_api_), &aqlprofile_api_); +-#endif +- CHECK_STATUS("aqlprofile API table load failed", status); +- + // Get Loader API table + loader_api_ = {}; + status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, +diff --git a/test/util/hsa_rsrc_factory.cpp b/test/util/hsa_rsrc_factory.cpp +index 0a44d18..fab5b75 100644 +--- a/test/util/hsa_rsrc_factory.cpp ++++ b/test/util/hsa_rsrc_factory.cpp +@@ -137,17 +137,6 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize + if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR); + if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR); + +- // Get AqlProfile API table +- aqlprofile_api_ = {0}; +-#ifdef ROCP_LD_AQLPROFILE +- status = LoadAqlProfileLib(&aqlprofile_api_); +-#else +- status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, +- hsa_ven_amd_aqlprofile_VERSION_MAJOR, +- sizeof(aqlprofile_api_), &aqlprofile_api_); +-#endif +- CHECK_STATUS("aqlprofile API table load failed", status); +- + // Get Loader API table + loader_api_ = {0}; + status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, +diff --git a/tests-v2/unittests/core/CMakeLists.txt b/tests-v2/unittests/core/CMakeLists.txt +index 107cb51..0f6d4bf 100644 +--- a/tests-v2/unittests/core/CMakeLists.txt ++++ b/tests-v2/unittests/core/CMakeLists.txt +@@ -235,8 +235,7 @@ set_target_properties(runCoreUnitTests PROPERTIES + INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests") + target_link_libraries( + runCoreUnitTests +- PRIVATE ${AQLPROFILE_LIB} +- test_hsatool_library ++ PRIVATE test_hsatool_library + hsa-runtime64::hsa-runtime64 + Threads::Threads + GTest::gtest GTest::gtest_main +@@ -285,4 +284,4 @@ endif() + # for the *_FilePlugin tests + if(NOT EXISTS "${PROJECT_BINARY_DIR}/test-output") + file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/test-output") +-endif() +\ No newline at end of file ++endif() +diff --git a/tests-v2/unittests/profiler/CMakeLists.txt b/tests-v2/unittests/profiler/CMakeLists.txt +index 53180d5..0c4d4a7 100644 +--- a/tests-v2/unittests/profiler/CMakeLists.txt ++++ b/tests-v2/unittests/profiler/CMakeLists.txt +@@ -122,7 +122,7 @@ target_compile_definitions( + PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1) + + target_link_libraries( +- runUnitTests PRIVATE rocprofiler-v2 ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 ++ runUnitTests PRIVATE rocprofiler-v2 hsa-runtime64::hsa-runtime64 + GTest::gtest GTest::gtest_main stdc++fs ${PCIACCESS_LIBRARIES} dw elf c dl) + + add_dependencies(tests runUnitTests) +@@ -158,4 +158,4 @@ endif() + # for the *_FilePlugin tests + if(NOT EXISTS "${PROJECT_BINARY_DIR}/test-output") + file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/test-output") +-endif() +\ No newline at end of file ++endif() diff --git a/pkgs/rocm-modules/rocr-debug-agent/default.nix b/pkgs/rocm-modules/rocr-debug-agent/default.nix new file mode 100644 index 0000000..54dc2f6 --- /dev/null +++ b/pkgs/rocm-modules/rocr-debug-agent/default.nix @@ -0,0 +1,59 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + clr, + git, + rocdbgapi, + elfutils, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "rocr-debug-agent"; + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocr_debug_agent"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-otoxZ2NHkPDIFhvn4/nvaQ/W4LF38Nx9MZ9IYEf1DyY="; + }; + + nativeBuildInputs = [ + cmake + clr + git + ]; + + buildInputs = [ + rocdbgapi + elfutils + ]; + + cmakeFlags = [ + "-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip" + "-DHIP_ROOT_DIR=${clr}" + "-DHIP_PATH=${clr}" + ]; + + # Weird install target + postInstall = '' + rm -rf $out/src + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "Library that provides some debugging functionality for ROCr"; + homepage = "https://github.com/ROCm/rocr_debug_agent"; + license = with licenses; [ ncsa ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocrand/default.nix b/pkgs/rocm-modules/rocrand/default.nix new file mode 100644 index 0000000..219c5ee --- /dev/null +++ b/pkgs/rocm-modules/rocrand/default.nix @@ -0,0 +1,99 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + clr, + gtest, + gbenchmark, + buildTests ? false, + buildBenchmarks ? false, + gpuTargets ? clr.localGpuTargets or [ ], +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "rocrand${clr.gpuArchSuffix}"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocRAND"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-XqGPXx+LqjJs602vtG5u578B1hitGlsopA0izbClHro="; + }; + + nativeBuildInputs = [ + cmake + rocm-cmake + clr + ]; + + buildInputs = + lib.optionals buildTests [ + gtest + ] + ++ lib.optionals buildBenchmarks [ + gbenchmark + ]; + + cmakeFlags = + [ + "-DHIP_ROOT_DIR=${clr}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_TEST=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_BENCHMARK=ON" + ]; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/test_* $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/benchmark_* $benchmark/bin + '' + + lib.optionalString (buildTests || buildBenchmarks) '' + rm -r $out/bin/rocRAND + # Fail if bin/ isn't actually empty + rmdir $out/bin + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "Generate pseudo-random and quasi-random numbers"; + homepage = "https://github.com/ROCm/rocRAND"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocsolver/default.nix b/pkgs/rocm-modules/rocsolver/default.nix new file mode 100644 index 0000000..9ea24ec --- /dev/null +++ b/pkgs/rocm-modules/rocsolver/default.nix @@ -0,0 +1,135 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + rocblas, + rocprim, + rocsparse, + clr, + fmt, + gtest, + gfortran, + lapack-reference, + buildTests ? false, + buildBenchmarks ? false, + gpuTargets ? ( + clr.localGpuTargets or [ + "gfx900" + "gfx906" + "gfx908" + "gfx90a" + "gfx942" + "gfx1010" + "gfx1030" + "gfx1100" + "gfx1101" + "gfx1102" + "gfx1201" + ] + ), +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "rocsolver${clr.gpuArchSuffix}"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocSOLVER"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-u5GRWetMnhEBJ9HZcXoEaqpdO8f0cuSPnq+XawljfUs="; + }; + + nativeBuildInputs = + [ + cmake + # no ninja, it buffers console output and nix times out long periods of no output + rocm-cmake + clr + ] + ++ lib.optionals (buildTests || buildBenchmarks) [ + gfortran + ]; + + buildInputs = + [ + # FIXME: rocblas and rocsolver can't build in parallel + # but rocsolver doesn't need rocblas' offload builds at build time + # could we build against a rocblas-minimal? + rocblas + rocprim + rocsparse + fmt + ] + ++ lib.optionals buildTests [ + gtest + ] + ++ lib.optionals (buildTests || buildBenchmarks) [ + lapack-reference + ]; + + cmakeFlags = + [ + "-DHIP_CLANG_NUM_PARALLEL_JOBS=4" + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_VERBOSE_MAKEFILE=ON" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_CLIENTS_TESTS=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_CLIENTS_BENCHMARKS=ON" + ]; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/rocsolver-test $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/rocsolver-bench $benchmark/bin + '' + + lib.optionalString (buildTests || buildBenchmarks) '' + rmdir $out/bin + ''; + + passthru.updateScript = rocmUpdateScript { + name = "rocsolver"; + inherit (finalAttrs.src) owner repo; + }; + + requiredSystemFeatures = [ "big-parallel" ]; + + meta = with lib; { + description = "ROCm LAPACK implementation"; + homepage = "https://github.com/ROCm/rocSOLVER"; + license = with licenses; [ bsd2 ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + timeout = 14400; # 4 hours + maxSilent = 14400; # 4 hours + }; +}) diff --git a/pkgs/rocm-modules/rocsparse/default.nix b/pkgs/rocm-modules/rocsparse/default.nix new file mode 100644 index 0000000..5c3e4ff --- /dev/null +++ b/pkgs/rocm-modules/rocsparse/default.nix @@ -0,0 +1,161 @@ +{ + lib, + stdenv, + fetchFromGitHub, + fetchzip, + rocmUpdateScript, + cmake, + rocm-cmake, + rocprim, + clr, + gfortran, + git, + gtest, + boost, + python3Packages, + buildTests ? false, + buildBenchmarks ? false, # Seems to depend on tests + gpuTargets ? clr.localGpuTargets or clr.gpuTargets, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "rocsparse${clr.gpuArchSuffix}"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals (buildTests || buildBenchmarks) [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocSPARSE"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-l7rOuVthfrSO5bnhgm49cjPnRbV/2sFhSRT+mShkBek="; + }; + + nativeBuildInputs = [ + cmake + # no ninja, it buffers console output and nix times out long periods of no output + rocm-cmake + clr + gfortran + ]; + + buildInputs = + [ + rocprim + git + ] + ++ lib.optionals (buildTests || buildBenchmarks) [ + gtest + boost + python3Packages.python + python3Packages.pyyaml + ]; + + cmakeFlags = + [ + "-DCMAKE_BUILD_TYPE=Release" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals (buildTests || buildBenchmarks) [ + "-DBUILD_CLIENTS_TESTS=ON" + "-DCMAKE_MATRICES_DIR=/build/source/matrices" + "-Dpython=python3" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_CLIENTS_BENCHMARKS=ON" + ]; + + # We have to manually generate the matrices + postPatch = lib.optionalString (buildTests || buildBenchmarks) '' + mkdir -p matrices + + ln -s ${finalAttrs.passthru.matrices.matrix-01}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-02}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-03}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-04}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-05}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-06}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-07}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-08}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-09}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-10}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-11}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-12}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-13}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-14}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-15}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-16}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-17}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-18}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-19}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-20}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-21}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-22}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-23}/*.mtx matrices + ln -s ${finalAttrs.passthru.matrices.matrix-24}/*.mtx matrices + + # Not used by the original cmake, causes an error + rm matrices/*_b.mtx + + echo "deps/convert.cpp -> deps/mtx2csr" + hipcc deps/convert.cpp -O3 -o deps/mtx2csr + + for mat in $(ls -1 matrices | cut -d "." -f 1); do + echo "mtx2csr: $mat.mtx -> $mat.csr" + deps/mtx2csr matrices/$mat.mtx matrices/$mat.csr + unlink matrices/$mat.mtx + done + ''; + + postInstall = + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + cp -a $out/bin/* $benchmark/bin + rm $benchmark/bin/rocsparse-test + '' + + lib.optionalString (buildTests || buildBenchmarks) '' + mkdir -p $test/bin + mv $out/bin/* $test/bin + rm $test/bin/rocsparse-bench || true + mv /build/source/matrices $test + rmdir $out/bin + ''; + + passthru = { + matrices = import ./deps.nix { + inherit fetchzip; + mirror1 = "https://sparse.tamu.edu/MM"; + mirror2 = "https://www.cise.ufl.edu/research/sparse/MM"; + }; + + updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + }; + + meta = with lib; { + description = "ROCm SPARSE implementation"; + homepage = "https://github.com/ROCm/rocSPARSE"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocsparse/deps.nix b/pkgs/rocm-modules/rocsparse/deps.nix new file mode 100644 index 0000000..e39b6de --- /dev/null +++ b/pkgs/rocm-modules/rocsparse/deps.nix @@ -0,0 +1,223 @@ +{ + fetchzip, + mirror1, + mirror2, +}: + +{ + matrix-01 = fetchzip { + sha256 = "sha256-AHur5ZIDZTFRrO2GV0ieXrffq4KUiGWiZ59pv0fUtEQ="; + + urls = [ + "${mirror1}/SNAP/amazon0312.tar.gz" + "${mirror2}/SNAP/amazon0312.tar.gz" + ]; + }; + + matrix-02 = fetchzip { + sha256 = "sha256-0rSxaN4lQcdaCLsvlgicG70FXUxXeERPiEmQ4MzbRdE="; + + urls = [ + "${mirror1}/Muite/Chebyshev4.tar.gz" + "${mirror2}/Muite/Chebyshev4.tar.gz" + ]; + }; + + matrix-03 = fetchzip { + sha256 = "sha256-hDzDWDUnHEyFedX/tMNq83ZH8uWyM4xtZYUUAD3rizo="; + + urls = [ + "${mirror1}/FEMLAB/sme3Dc.tar.gz" + "${mirror2}/FEMLAB/sme3Dc.tar.gz" + ]; + }; + + matrix-04 = fetchzip { + sha256 = "sha256-GmN2yOt/MoX01rKe05aTyB3ypUP4YbQGOITZ0BqPmC0="; + + urls = [ + "${mirror1}/Williams/webbase-1M.tar.gz" + "${mirror2}/Williams/webbase-1M.tar.gz" + ]; + }; + + matrix-05 = fetchzip { + sha256 = "sha256-gQNjfVyWzNM9RwImJGhkhahRmZz74LzDs1oijL7mI7k="; + + urls = [ + "${mirror1}/Williams/mac_econ_fwd500.tar.gz" + "${mirror2}/Williams/mac_econ_fwd500.tar.gz" + ]; + }; + + matrix-06 = fetchzip { + sha256 = "sha256-87cdZjntNcTuz5BtO59irhcuRbPllWSbhCEX3Td02qc="; + + urls = [ + "${mirror1}/Williams/mc2depi.tar.gz" + "${mirror2}/Williams/mc2depi.tar.gz" + ]; + }; + + matrix-07 = fetchzip { + sha256 = "sha256-WRamuJX3D8Tm+k0q67RjUDG3DeNAxhKiaPkk5afY5eU="; + + urls = [ + "${mirror1}/Bova/rma10.tar.gz" + "${mirror2}/Bova/rma10.tar.gz" + ]; + }; + + matrix-08 = fetchzip { + sha256 = "sha256-5dhkm293Mc3lzakKxHy5W5XIn4Rw+gihVh7gyrjEHXo="; + + urls = [ + "${mirror1}/JGD_BIBD/bibd_22_8.tar.gz" + "${mirror2}/JGD_BIBD/bibd_22_8.tar.gz" + ]; + }; + + matrix-09 = fetchzip { + sha256 = "sha256-czjLWCjXAjZCk5TGYHaEkwSAzQu3TQ3QyB6eNKR4G88="; + + urls = [ + "${mirror1}/Hamm/scircuit.tar.gz" + "${mirror2}/Hamm/scircuit.tar.gz" + ]; + }; + + matrix-10 = fetchzip { + sha256 = "sha256-bYuLnJViAIcIejAkh69/bsNAVIDU4wfTLtD+nmHd6FM="; + + urls = [ + "${mirror1}/Sandia/ASIC_320k.tar.gz" + "${mirror2}/Sandia/ASIC_320k.tar.gz" + ]; + }; + + matrix-11 = fetchzip { + sha256 = "sha256-aDwn8P1khYjo2Agbq5m9ZBInJUxf/knJNvyptt0fak0="; + + urls = [ + "${mirror1}/GHS_psdef/bmwcra_1.tar.gz" + "${mirror2}/GHS_psdef/bmwcra_1.tar.gz" + ]; + }; + + matrix-12 = fetchzip { + sha256 = "sha256-8OJqA/byhlAZd869TPUzZFdsOiwOoRGfKyhM+RMjXoY="; + + urls = [ + "${mirror1}/HB/nos1.tar.gz" + "${mirror2}/HB/nos1.tar.gz" + ]; + }; + + matrix-13 = fetchzip { + sha256 = "sha256-FS0rKqmg+uHwsM/yGfQLBdd7LH/rUrdutkNGBD/Mh1I="; + + urls = [ + "${mirror1}/HB/nos2.tar.gz" + "${mirror2}/HB/nos2.tar.gz" + ]; + }; + + matrix-14 = fetchzip { + sha256 = "sha256-DANnlrNJikrI7Pst9vRedtbuxepyHmCIu2yhltc4Qcs="; + + urls = [ + "${mirror1}/HB/nos3.tar.gz" + "${mirror2}/HB/nos3.tar.gz" + ]; + }; + + matrix-15 = fetchzip { + sha256 = "sha256-21mUgqjWGUfYgiWwSrKh9vH8Vdt3xzcefmqYNYRpxiY="; + + urls = [ + "${mirror1}/HB/nos4.tar.gz" + "${mirror2}/HB/nos4.tar.gz" + ]; + }; + + matrix-16 = fetchzip { + sha256 = "sha256-FOuXvGqBBFNkVS6cexmkluret54hCfCOdK+DOZllE4c="; + + urls = [ + "${mirror1}/HB/nos5.tar.gz" + "${mirror2}/HB/nos5.tar.gz" + ]; + }; + + matrix-17 = fetchzip { + sha256 = "sha256-+7NI1rA/qQxYPpjXKHvAaCZ+LSaAJ4xuJvMRMBEUYxg="; + + urls = [ + "${mirror1}/HB/nos6.tar.gz" + "${mirror2}/HB/nos6.tar.gz" + ]; + }; + + matrix-18 = fetchzip { + sha256 = "sha256-q3NxJjbwGGcFiQ9nhWfUKgZmdVwCfPmgQoqy0AqOsNc="; + + urls = [ + "${mirror1}/HB/nos7.tar.gz" + "${mirror2}/HB/nos7.tar.gz" + ]; + }; + + matrix-19 = fetchzip { + sha256 = "sha256-0GAN6qmVfD+tprIigzuUUUwm5KVhkN9X65wMEvFltDY="; + + urls = [ + "${mirror1}/DNVS/shipsec1.tar.gz" + "${mirror2}/DNVS/shipsec1.tar.gz" + ]; + }; + + matrix-20 = fetchzip { + sha256 = "sha256-f28Du/Urxsiq5NkRmRO10Zz9vvGRjEchquzHzbZpZ7U="; + + urls = [ + "${mirror1}/Cote/mplate.tar.gz" + "${mirror2}/Cote/mplate.tar.gz" + ]; + }; + + matrix-21 = fetchzip { + sha256 = "sha256-O+Wy0NfCU1hVUOfNR1dJpvDHLBwwa301IRJDrQJnhak="; + + urls = [ + "${mirror1}/Bai/qc2534.tar.gz" + "${mirror2}/Bai/qc2534.tar.gz" + ]; + }; + + matrix-22 = fetchzip { + sha256 = "sha256-oxMnt8U5Cf1ILWcBdU6W9jdSMMm+U6bIVl8nm3n3+OA="; + + urls = [ + "${mirror1}/Chevron/Chevron2.tar.gz" + "${mirror2}/Chevron/Chevron2.tar.gz" + ]; + }; + + matrix-23 = fetchzip { + sha256 = "sha256-MFD9BxFI/3IS7yatW121BAI04fbqrXpgYDT5UKjeKcU="; + + urls = [ + "${mirror1}/Chevron/Chevron3.tar.gz" + "${mirror2}/Chevron/Chevron3.tar.gz" + ]; + }; + + matrix-24 = fetchzip { + sha256 = "sha256-ikS8O51pe1nt3BNyhvfvqCbVL0+bg/da9bqGqeBDkTg="; + + urls = [ + "${mirror1}/Chevron/Chevron4.tar.gz" + "${mirror2}/Chevron/Chevron4.tar.gz" + ]; + }; +} diff --git a/pkgs/rocm-modules/rocthrust/default.nix b/pkgs/rocm-modules/rocthrust/default.nix new file mode 100644 index 0000000..cbbe5bd --- /dev/null +++ b/pkgs/rocm-modules/rocthrust/default.nix @@ -0,0 +1,94 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + rocprim, + clr, + gtest, + buildTests ? false, + buildBenchmarks ? false, + gpuTargets ? [ ], +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "rocthrust"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocThrust"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-bHyqG0pSt6bc6cDMnd1uY+0o+V3cxdp0mUEzWYRdd20="; + }; + + nativeBuildInputs = [ + cmake + rocm-cmake + rocprim + clr + ]; + + buildInputs = lib.optionals buildTests [ + gtest + ]; + + cmakeFlags = + [ + "-DHIP_ROOT_DIR=${clr}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_TEST=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_BENCHMARKS=ON" + ]; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/{test_*,*.hip} $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/benchmark_* $benchmark/bin + '' + + lib.optionalString (buildTests || buildBenchmarks) '' + rm -rf $out/bin + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "ROCm parallel algorithm library"; + homepage = "https://github.com/ROCm/rocThrust"; + license = with licenses; [ asl20 ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/roctracer/default.nix b/pkgs/rocm-modules/roctracer/default.nix new file mode 100644 index 0000000..d328f49 --- /dev/null +++ b/pkgs/rocm-modules/roctracer/default.nix @@ -0,0 +1,120 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + clr, + rocm-device-libs, + libxml2, + doxygen, + graphviz, + gcc-unwrapped, + libbacktrace, + rocm-runtime, + python3Packages, + buildDocs ? false, # Nothing seems to be generated, so not making the output + buildTests ? false, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "roctracer"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals buildDocs [ + "doc" + ] + ++ lib.optionals buildTests [ + "test" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "roctracer"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-Dwk5cBZLysmsVA2kwpQM0FQt2KXOGcaZcAw/d8VUaXw="; + }; + + nativeBuildInputs = + [ + cmake + clr + ] + ++ lib.optionals buildDocs [ + doxygen + graphviz + ]; + + buildInputs = [ + libxml2 + libbacktrace + python3Packages.python + python3Packages.cppheaderparser + ]; + + cmakeFlags = [ + "-DCMAKE_MODULE_PATH=${clr}/hip/cmake" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ]; + + env.NIX_CFLAGS_COMPILE = toString [ + # Needed with GCC 12 + "-Wno-error=array-bounds" + ]; + + postPatch = + '' + export HIP_DEVICE_LIB_PATH=${rocm-device-libs}/amdgcn/bitcode + '' + + lib.optionalString (!buildTests) '' + substituteInPlace CMakeLists.txt \ + --replace "add_subdirectory(test)" "" + ''; + + # Tests always fail, probably need GPU + # doCheck = buildTests; + + postInstall = + lib.optionalString buildDocs '' + mkdir -p $doc + '' + + lib.optionalString buildTests '' + mkdir -p $test/bin + # Not sure why this is an install target + find $out/test -executable -type f -exec mv {} $test/bin \; + rm $test/bin/{*.sh,*.py} + patchelf --set-rpath $out/lib:${ + lib.makeLibraryPath ( + finalAttrs.buildInputs + ++ [ + clr + gcc-unwrapped.lib + rocm-runtime + ] + ) + } $test/bin/* + rm -rf $out/test + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "Tracer callback/activity library"; + homepage = "https://github.com/ROCm/roctracer"; + license = with licenses; [ mit ]; # mitx11 + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rocwmma/0000-dont-fetch-googletest.patch b/pkgs/rocm-modules/rocwmma/0000-dont-fetch-googletest.patch new file mode 100644 index 0000000..fa47a3c --- /dev/null +++ b/pkgs/rocm-modules/rocwmma/0000-dont-fetch-googletest.patch @@ -0,0 +1,35 @@ +diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt +index 0d00883..86ce282 100644 +--- a/test/CMakeLists.txt ++++ b/test/CMakeLists.txt +@@ -30,30 +30,6 @@ cmake_dependent_option( ROCWMMA_BUILD_VALIDATION_TESTS "Build validation tests" + cmake_dependent_option( ROCWMMA_BUILD_BENCHMARK_TESTS "Build benchmarking tests" OFF "ROCWMMA_BUILD_TESTS" OFF ) + cmake_dependent_option( ROCWMMA_BUILD_EXTENDED_TESTS "Build extended test parameter coverage" OFF "ROCWMMA_BUILD_TESTS" OFF ) + +-# Test/benchmark requires additional dependencies +-include( FetchContent ) +- +-FetchContent_Declare( +- googletest +- GIT_REPOSITORY https://github.com/google/googletest.git +- GIT_TAG release-1.12.1 +-) +-FetchContent_GetProperties(googletest) +-if(NOT googletest_POPULATED) +- +- # Fetch the content using default details +- FetchContent_Populate(googletest) +- # Save the shared libs setting, then force to static libs +- set(BUILD_SHARED_LIBS_OLD ${BUILD_SHARED_LIBS}) +- set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "Build SHARED libraries" FORCE) +- +- # Add gtest targets as static libs +- add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR}) +- +- # Restore shared libs setting +- set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_OLD} CACHE INTERNAL "Build SHARED libraries" FORCE) +-endif() +- + set(ROCWMMA_TEST_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}) + set(ROCWMMA_COMMON_TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/hip_device.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/rocwmma_gtest_main.cpp) diff --git a/pkgs/rocm-modules/rocwmma/default.nix b/pkgs/rocm-modules/rocwmma/default.nix new file mode 100644 index 0000000..2098de9 --- /dev/null +++ b/pkgs/rocm-modules/rocwmma/default.nix @@ -0,0 +1,121 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + rocm-smi, + clr, + openmp, + gtest, + rocblas, + buildTests ? false, # Will likely fail building because wavefront shifts are not supported for certain archs + buildExtendedTests ? false, + buildBenchmarks ? false, + buildSamples ? false, + gpuTargets ? [ ], +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "rocwmma"; + version = "6.4.1"; + + outputs = + [ + "out" + ] + ++ lib.optionals (buildTests || buildBenchmarks) [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ] + ++ lib.optionals buildSamples [ + "sample" + ]; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocWMMA"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-fjyxMrzt74rE7Gf4v4WawYltuw1fvahwZUpauMIE3qc="; + }; + + patches = lib.optionals (buildTests || buildBenchmarks) [ + ./0000-dont-fetch-googletest.patch + ]; + + nativeBuildInputs = [ + cmake + rocm-cmake + clr + ]; + + buildInputs = + [ + openmp + ] + ++ lib.optionals (buildTests || buildBenchmarks) [ + rocm-smi + gtest + rocblas + ]; + + cmakeFlags = + [ + "-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_omp_LIBRARY=${openmp}/lib" + "-DROCWMMA_BUILD_TESTS=${if buildTests || buildBenchmarks then "ON" else "OFF"}" + "-DROCWMMA_BUILD_SAMPLES=${if buildSamples then "ON" else "OFF"}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildExtendedTests [ + "-DROCWMMA_BUILD_EXTENDED_TESTS=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DROCWMMA_BUILD_BENCHMARK_TESTS=ON" + "-DROCWMMA_BENCHMARK_WITH_ROCBLAS=ON" + ]; + + postInstall = + lib.optionalString (buildTests || buildBenchmarks) '' + mkdir -p $test/bin + mv $out/bin/{*_test,*-validate} $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/*-bench $benchmark/bin + '' + + lib.optionalString buildSamples '' + mkdir -p $sample/bin + mv $out/bin/sgemmv $sample/bin + mv $out/bin/simple_gemm $sample/bin + mv $out/bin/simple_dlrm $sample/bin + '' + + lib.optionalString (buildTests || buildBenchmarks || buildSamples) '' + rm -rf $out/bin + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "Mixed precision matrix multiplication and accumulation"; + homepage = "https://github.com/ROCm/rocWMMA"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/rocm-modules/rpp/default.nix b/pkgs/rocm-modules/rpp/default.nix new file mode 100644 index 0000000..2455d9f --- /dev/null +++ b/pkgs/rocm-modules/rpp/default.nix @@ -0,0 +1,104 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + rocm-docs-core, + half, + clr, + openmp, + boost, + python3Packages, + buildDocs ? false, # Needs internet + useOpenCL ? false, + useCPU ? false, + gpuTargets ? [ ], +}: + +stdenv.mkDerivation (finalAttrs: { + pname = + "rpp-" + + ( + if (!useOpenCL && !useCPU) then + "hip" + else if (!useOpenCL && !useCPU) then + "opencl" + else + "cpu" + ); + + version = "6.4.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rpp"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-rccVjSrOVIe4ZDtloCoCCI3u9UIcUqdirHIzS7ffAas="; + }; + + nativeBuildInputs = + [ + cmake + rocm-cmake + clr + ] + ++ lib.optionals buildDocs [ + rocm-docs-core + python3Packages.python + ]; + + buildInputs = [ + half + openmp + boost + ]; + + CFLAGS = "-I${openmp.dev}/include"; + CXXFLAGS = "-I${openmp.dev}/include"; + cmakeFlags = + [ + "-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_omp_LIBRARY=${openmp}/lib" + "-DROCM_PATH=${clr}" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals (!useOpenCL && !useCPU) [ + "-DBACKEND=HIP" + ] + ++ lib.optionals (useOpenCL && !useCPU) [ + "-DBACKEND=OCL" + ] + ++ lib.optionals useCPU [ + "-DBACKEND=CPU" + ]; + + postPatch = lib.optionalString (!useOpenCL && !useCPU) '' + # Bad path + substituteInPlace CMakeLists.txt \ + --replace "COMPILER_FOR_HIP \''${ROCM_PATH}/llvm/bin/clang++" "COMPILER_FOR_HIP ${clr}/bin/hipcc" + ''; + + postBuild = lib.optionalString buildDocs '' + python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "Comprehensive high-performance computer vision library for AMD processors"; + homepage = "https://github.com/ROCm/rpp"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + broken = useOpenCL; + }; +}) diff --git a/pkgs/rocm-modules/tensile/default.nix b/pkgs/rocm-modules/tensile/default.nix new file mode 100644 index 0000000..7b55699 --- /dev/null +++ b/pkgs/rocm-modules/tensile/default.nix @@ -0,0 +1,115 @@ +{ + lib, + fetchFromGitHub, + fetchpatch, + rocmUpdateScript, + buildPythonPackage, + pytestCheckHook, + setuptools, + pyyaml, + msgpack, + simplejson, + ujson, + distro, + orjson, + pandas, + joblib, + filelock, + clr, + rich, + isTensileLite ? false, +}: + +buildPythonPackage rec { + pname = if isTensileLite then "tensilelite" else "tensile"; + # Using a specific commit which has code object compression support from after the 6.3 release + # Without compression packages are too large for hydra + version = "6.4.1"; + format = "pyproject"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "Tensile"; + rev = "1752af518190500891a865379a4569b8abf6ba01"; + hash = "sha256-Wvz4PVs//3Ox7ykZHpjPzOVwlyATyc+MmVVenfTzWK4="; + }; + + # TODO: It should be possible to run asm caps test ONCE for all supported arches + # We currently disable the test because it's slow and runs each time tensile launches + + postPatch = + lib.optionalString (!isTensileLite) '' + if grep -F .SafeLoader Tensile/LibraryIO.py; then + substituteInPlace Tensile/LibraryIO.py \ + --replace-fail "yaml.SafeLoader" "yaml.CSafeLoader" + fi + # See TODO above about asm caps test + substituteInPlace Tensile/Common.py \ + --replace-fail 'if globalParameters["AssemblerPath"] is not None:' "if False:" + '' + + '' + # Add an assert that the fallback 9,0,0 is supported before setting the kernel to it + # If it's not detected as supported we have an issue with compiler paths or the compiler is broken + # and it's better to stop immediately + substituteInPlace Tensile/KernelWriter.py \ + --replace-fail '= (9,0,0)' '= (9,0,0);assert(globalParameters["AsmCaps"][(9,0,0)]["SupportedISA"])' + find . -type f -iname "*.sh" -exec chmod +x {} \; + patchShebangs Tensile + ''; + + buildInputs = [ setuptools ]; + + propagatedBuildInputs = + [ + pyyaml + msgpack + pandas + joblib + distro + ] + ++ lib.optionals (!isTensileLite) [ + rich + ] + ++ lib.optionals isTensileLite [ + simplejson + ujson + orjson + ]; + + patches = + lib.optional (!isTensileLite) ./tensile-solutionstructs-perf-fix.diff + ++ lib.optional (!isTensileLite) ./tensile-create-library-dont-copy-twice.diff + ++ lib.optional (!isTensileLite) (fetchpatch { + # [PATCH] Extend Tensile HIP ISA compatibility + sha256 = "sha256-d+fVf/vz+sxGqJ96vuxe0jRMgbC5K6j5FQ5SJ1e3Sl8="; + url = "https://github.com/GZGavinZhao/Tensile/commit/855cb15839849addb0816a6dde45772034a3e41f.patch"; + }) + ++ lib.optional isTensileLite ./tensilelite-create-library-dont-copy-twice.diff + ++ lib.optional isTensileLite ./tensilelite-gen_assembly-venv-err-handling.diff; + # ++ lib.optional isTensileLite ./tensilelite-compression.diff; + + doCheck = false; # Too many errors, not sure how to set this up properly + + nativeCheckInputs = [ + pytestCheckHook + filelock + clr + ]; + + env.ROCM_PATH = "${clr}"; + + pythonImportsCheck = [ "Tensile" ]; + + passthru.updateScript = rocmUpdateScript { + name = pname; + inherit (src) owner repo; + }; + + meta = with lib; { + description = "GEMMs and tensor contractions"; + homepage = "https://github.com/ROCm/Tensile"; + license = with licenses; [ mit ]; + teams = [ teams.rocm ]; + platforms = platforms.linux; + }; +} diff --git a/pkgs/rocm-modules/tensile/tensile-create-library-dont-copy-twice.diff b/pkgs/rocm-modules/tensile/tensile-create-library-dont-copy-twice.diff new file mode 100644 index 0000000..2b68024 --- /dev/null +++ b/pkgs/rocm-modules/tensile/tensile-create-library-dont-copy-twice.diff @@ -0,0 +1,20 @@ +diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py +index a1644606..c6ca2882 100644 +--- a/Tensile/TensileCreateLibrary.py ++++ b/Tensile/TensileCreateLibrary.py +@@ -852,9 +852,14 @@ def copyStaticFiles(outputPath=None): + "KernelHeader.h", + ] + ++ import filecmp + for fileName in libraryStaticFiles: + # copy file +- shutil.copy(os.path.join(globalParameters["SourcePath"], fileName), outputPath) ++ # no need to copy twice if it has already been copied ++ src = os.path.join(globalParameters["SourcePath"], fileName) ++ dst = os.path.join(outputPath, os.path.basename(src)) ++ if not os.path.isfile(dst) or not filecmp.cmp(src, dst): ++ shutil.copyfile(src, dst) + + return libraryStaticFiles + diff --git a/pkgs/rocm-modules/tensile/tensile-solutionstructs-perf-fix.diff b/pkgs/rocm-modules/tensile/tensile-solutionstructs-perf-fix.diff new file mode 100644 index 0000000..7157238 --- /dev/null +++ b/pkgs/rocm-modules/tensile/tensile-solutionstructs-perf-fix.diff @@ -0,0 +1,48 @@ +diff --git a/Tensile/SolutionStructs.py b/Tensile/SolutionStructs.py +index f663c6f1..17bcf897 100644 +--- a/Tensile/SolutionStructs.py ++++ b/Tensile/SolutionStructs.py +@@ -4828,24 +4828,26 @@ class Solution(collections.abc.Mapping): + # create a dictionary of lists of parameter values + @staticmethod + def getSerialNaming(objs): ++ valid_params = sorted(validParameters.keys()) + data = {} +- for objIdx in range(0, len(objs)): +- obj = objs[objIdx] +- for paramName in sorted(obj.keys()): +- if paramName in list(validParameters.keys()): +- paramValue = obj[paramName] +- if paramName in data: +- if paramValue not in data[paramName]: +- data[paramName].append(paramValue) +- else: +- data[paramName] = [ paramValue ] +- maxObjs = 1 +- for paramName in data: +- if not isinstance(data[paramName][0],dict): +- data[paramName] = sorted(data[paramName]) +- maxObjs *= len(data[paramName]) +- numDigits = len(str(maxObjs)) +- return [ data, numDigits ] ++ ++ objs = [getattr(obj, "_state", obj) for obj in objs] ++ ++ for param in valid_params: ++ d = [] ++ for obj in objs: ++ if param in obj: ++ v = obj[param] ++ if v not in d: ++ d.append(v) ++ if len(d): ++ if not isinstance(d[0], dict): d.sort() ++ data[param] = d ++ ++ # Calculate max objects using prod() from math module ++ max_objs = math.prod(len(values) for values in data.values()) ++ num_digits = len(str(max_objs)) ++ return data, num_digits + + ######################################## + # Get Name Serial \ No newline at end of file diff --git a/pkgs/rocm-modules/tensile/tensilelite-compression.diff b/pkgs/rocm-modules/tensile/tensilelite-compression.diff new file mode 100644 index 0000000..bfc0146 --- /dev/null +++ b/pkgs/rocm-modules/tensile/tensilelite-compression.diff @@ -0,0 +1,345 @@ +diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py +index b8cea84558..1bc24bd1dd 100644 +--- a/Tensile/TensileCreateLibrary.py ++++ b/Tensile/TensileCreateLibrary.py +@@ -41,6 +41,7 @@ + from .SolutionLibrary import MasterSolutionLibrary + from .SolutionStructs import Solution + from .CustomYamlLoader import load_logic_gfx_arch ++from .Utilities.Profile import profile + + import argparse + import collections +@@ -1233,7 +1234,7 @@ def validateLibrary(masterLibraries: MasterSolutionLibrary, + ################################################################################ + # Tensile Create Library + ################################################################################ +-@timing ++@profile + def TensileCreateLibrary(): + print1("") + print1(HR) +@@ -1558,7 +1559,6 @@ def param(key, value): + + print1("# Check if generated files exists.") + +- @timing + def checkFileExistence(files): + for filePath in files: + if not os.path.exists(filePath): +diff --git a/Tensile/Utilities/Profile.py b/Tensile/Utilities/Profile.py +new file mode 100644 +index 0000000000..cc3c7eb44c +--- /dev/null ++++ b/Tensile/Utilities/Profile.py +@@ -0,0 +1,77 @@ ++################################################################################ ++# ++# Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. ++# ++# Permission is hereby granted, free of charge, to any person obtaining a copy ++# of this software and associated documentation files (the "Software"), to deal ++# in the Software without restriction, including without limitation the rights ++# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++# copies of the Software, and to permit persons to whom the Software is ++# furnished to do so, subject to the following conditions: ++# ++# The above copyright notice and this permission notice shall be included in ++# all copies or substantial portions of the Software. ++# ++# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++# SOFTWARE. ++# ++################################################################################ ++ ++import cProfile ++import pstats ++import os ++ ++from pathlib import Path ++from datetime import datetime, timezone ++from typing import Callable, Tuple ++ ++PROFILE_ENV_VAR = "TENSILE_PROFILE" ++ ++def profile(func: Callable) -> Callable: ++ """Profiling decorator. ++ ++ Add ``@profile`` to mark a function for profiling; set the environment variable ++ TENSILE_PROFILE=ON to enable profiling decorated functions. ++ """ ++ if not envVariableIsSet(PROFILE_ENV_VAR): ++ return func ++ def wrapper(*args, **kwargs): ++ path, filename = initProfileArtifacts(func.__name__) ++ ++ prof = cProfile.Profile() ++ output = prof.runcall(func, *args, **kwargs) ++ result = pstats.Stats(prof) ++ result.sort_stats(pstats.SortKey.TIME) ++ result.dump_stats(path/filename) ++ ++ return output ++ return wrapper ++ ++def envVariableIsSet(varName: str) -> bool: ++ """Checks if the provided environment variable is set to "ON", "TRUE", or "1" ++ Args: ++ varName: Environment variable name. ++ Returns: ++ True if the environment variable is set, otherwise False. ++ """ ++ value = os.environ.get(varName, "").upper() ++ return True if value in ["ON", "TRUE", "1"] else False ++ ++def initProfileArtifacts(funcName: str) -> Tuple[Path, str]: ++ """Initializes filenames and paths for profiling artifacts based on the current datetime ++ Args: ++ funcName: The name of the function being profiled, nominally passed via func.__name__ ++ Returns: ++ A tuple (path, filename) where the path is the artifact directory and filename is ++ a .prof file with the profiling results. ++ """ ++ dt = datetime.now(timezone.utc) ++ filename = f"{funcName}-{dt.strftime('%Y-%m-%dT%H-%M-%SZ')}.prof" ++ path = Path().cwd()/f"profiling-results-{dt.strftime('%Y-%m-%d')}" ++ path.mkdir(exist_ok=True) ++ return path, filename + +diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py +index e62b0072df..2c843ba936 100644 +--- a/Tensile/TensileCreateLibrary.py ++++ b/Tensile/TensileCreateLibrary.py +@@ -56,7 +56,7 @@ + import sys + from timeit import default_timer as timer + from pathlib import Path +-from typing import Sequence, List ++from typing import Sequence, List, Union + + def timing(func): + def wrapper(*args, **kwargs): +@@ -90,87 +90,142 @@ def processKernelSource(kernel, kernelWriterAssembly, ti): + + return (err, src, header, kernelName, filename) + +-def getAssemblyCodeObjectFiles(kernels, kernelWriterAssembly, outputPath): +- destDir = ensurePath(os.path.join(outputPath, 'library')) +- asmDir = kernelWriterAssembly.getAssemblyDirectory() +- archs = collections.defaultdict(list) ++def linkIntoCodeObject( ++ objFiles: List[str], coPathDest: Union[Path, str], kernelWriterAssembly: KernelWriterAssembly ++): ++ """Links object files into a code object file. + +- for k in filter(lambda k: k['KernelLanguage'] == 'Assembly', kernels): +- archs[tuple(k['ISA'])].append(k) ++ Args: ++ objectFiles: A list of object files to be linked. ++ coPathDest: The destination path for the code object file. ++ kernelWriterAssembly: An instance of KernelWriterAssembly to get link arguments. + +- coFiles = [] ++ Raises: ++ RuntimeError: If linker invocation fails. ++ """ ++ if os.name == "nt": ++ # On Windows, the objectFiles list command line (including spaces) ++ # exceeds the limit of 8191 characters, so using response file ++ ++ responseFile = os.path.join('/tmp', 'clangArgs.txt') ++ with open(responseFile, 'wt') as file: ++ file.write(" ".join(objFiles)) ++ file.flush() ++ ++ args = [globalParameters['AssemblerPath'], '-target', 'amdgcn-amd-amdhsa', '-o', coFileRaw, '@clangArgs.txt'] ++ subprocess.check_call(args, cwd=asmDir) ++ else: ++ numObjFiles = len(objFiles) ++ maxObjFiles = 10000 ++ ++ if numObjFiles > maxObjFiles: ++ batchedObjFiles = [objFiles[i:i+maxObjFiles] for i in range(0, numObjFiles, maxObjFiles)] ++ batchSize = int(math.ceil(numObjFiles / maxObjFiles)) ++ ++ newObjFiles = [str(coPathDest) + "." + str(i) for i in range(0, batchSize)] ++ newObjFilesOutput = [] ++ ++ for batch, filename in zip(batchedObjFiles, newObjFiles): ++ if len(batch) > 1: ++ args = [globalParameters["ROCmLdPath"], "-r"] + batch + [ "-o", filename] ++ print2(f"Linking object files into fewer object files: {' '.join(args)}") ++ subprocess.check_call(args) ++ newObjFilesOutput.append(filename) ++ else: ++ newObjFilesOutput.append(batchedObjFiles[0]) ++ ++ args = kernelWriterAssembly.getLinkCodeObjectArgs(newObjFilesOutput, str(coPathDest)) ++ print2(f"Linking object files into code object: {' '.join(args)}") ++ subprocess.check_call(args) ++ else: ++ args = kernelWriterAssembly.getLinkCodeObjectArgs(objFiles, str(coPathDest)) ++ print2(f"Linking object files into code object: {' '.join(args)}") ++ subprocess.check_call(args) ++ ++ ++def compressCodeObject( ++ coPathSrc: Union[Path, str], coPathDest: Union[Path, str], gfx: str, bundler: str ++): ++ """Compresses a code object file using the provided bundler. ++ ++ Args: ++ coPathSrc: The source path of the code object file to be compressed. ++ coPathDest: The destination path for the compressed code object file. ++ gfx: The target GPU architecture. ++ bundler: The path to the Clang Offload Bundler executable. ++ ++ Raises: ++ RuntimeError: If compressing the code object file fails. ++ """ ++ args = [ ++ bundler, ++ "--compress", ++ "--type=o", ++ "--bundle-align=4096", ++ f"--targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--{gfx}", ++ "--input=/dev/null", ++ f"--input={str(coPathSrc)}", ++ f"--output={str(coPathDest)}", ++ ] ++ ++ print2(f"Bundling/compressing code objects: {' '.join(args)}") ++ try: ++ out = subprocess.check_output(args, stderr=subprocess.STDOUT) ++ print2(f"Output: {out}") ++ except subprocess.CalledProcessError as err: ++ raise RuntimeError( ++ f"Error compressing code object via bundling: {err.output}\nFailed command: {' '.join(args)}" ++ ) ++ ++def buildAssemblyCodeObjectFiles(kernels, kernelWriterAssembly, outputPath): ++ ++ isAsm = lambda k: k["KernelLanguage"] == "Assembly" ++ ++ extObj = ".o" ++ extCo = ".co" ++ extCoRaw = ".co.raw" + +- for arch, archKernels in archs.items(): ++ destDir = Path(ensurePath(os.path.join(outputPath, 'library'))) ++ asmDir = Path(kernelWriterAssembly.getAssemblyDirectory()) ++ ++ archKernelMap = collections.defaultdict(list) ++ for k in filter(isAsm, kernels): ++ archKernelMap[tuple(k['ISA'])].append(k) ++ ++ coFiles = [] ++ for arch, archKernels in archKernelMap.items(): + if len(archKernels) == 0: + continue + +- archName = getGfxName(arch) ++ gfx = getGfxName(arch) + + if globalParameters["MergeFiles"] or globalParameters["NumMergedFiles"] > 1 or globalParameters["LazyLibraryLoading"]: +- objectFiles = [kernelWriterAssembly.getKernelFileBase(k) + '.o' for k in archKernels if 'codeObjectFile' not in k] ++ objectFiles = [str(asmDir / (kernelWriterAssembly.getKernelFileBase(k) + extObj)) for k in archKernels if 'codeObjectFile' not in k] + +- #Group kernels from placeholder libraries + coFileMap = collections.defaultdict(list) ++ + if len(objectFiles): +- coFileMap[os.path.join(destDir, "TensileLibrary_"+archName+".co")] = objectFiles ++ coFileMap[asmDir / ("TensileLibrary_"+ gfx + extCoRaw)] = objectFiles + + for kernel in archKernels: + coName = kernel.get("codeObjectFile", None) + if coName: +- coFileMap[os.path.join(destDir, coName+".co")] += [kernelWriterAssembly.getKernelFileBase(kernel) + '.o'] ++ coFileMap[asmDir / (coName + extCoRaw)].append(str(asmDir / (kernelWriterAssembly.getKernelFileBase(kernel) + extObj))) + +- for coFile, objectFiles in coFileMap.items(): +- if os.name == "nt": +- # On Windows, the objectFiles list command line (including spaces) +- # exceeds the limit of 8191 characters, so using response file ++ for coFileRaw, objFiles in coFileMap.items(): + +- responseArgs = objectFiles +- responseFile = os.path.join(asmDir, 'clangArgs.txt') +- with open(responseFile, 'wt') as file: +- file.write( " ".join(responseArgs) ) +- file.flush() +- +- args = [globalParameters['AssemblerPath'], '-target', 'amdgcn-amd-amdhsa', '-o', coFile, '@clangArgs.txt'] +- subprocess.check_call(args, cwd=asmDir) +- else: +- numOfObjectFiles = len(objectFiles) +- splitFiles = 10000 +- if numOfObjectFiles > splitFiles: +- slicedObjectFilesList = [objectFiles[x:x+splitFiles] for x in range(0, numOfObjectFiles, splitFiles)] +- objectFileBasename = os.path.split(coFile)[-1].split('.')[0] +- numOfOneSliceOfObjectFiles = int(math.ceil(numOfObjectFiles / splitFiles)) +- newObjectFiles = [ objectFileBasename + "_" + str(i) + ".o" for i in range(0, numOfOneSliceOfObjectFiles)] +- newObjectFilesOutput = [] +- for slicedObjectFiles, objectFile in zip(slicedObjectFilesList, newObjectFiles): +- if len(slicedObjectFiles) > 1: +- args = [globalParameters["ROCmLdPath"], "-r"] + slicedObjectFiles + [ "-o", objectFile ] +- if globalParameters["PrintCodeCommands"]: +- print(asmDir) +- print(' '.join(args)) +- subprocess.check_call(args, cwd=asmDir) +- newObjectFilesOutput.append(objectFile) +- else: +- newObjectFilesOutput.append(slicedObjectFiles[0]) +- args = kernelWriterAssembly.getLinkCodeObjectArgs(newObjectFilesOutput, coFile) +- if globalParameters["PrintCodeCommands"]: +- print(asmDir) +- print(' '.join(args)) +- subprocess.check_call(args, cwd=asmDir) +- else: +- args = kernelWriterAssembly.getLinkCodeObjectArgs(objectFiles, coFile) +- if globalParameters["PrintCodeCommands"]: +- print(asmDir) +- print(' '.join(args)) +- subprocess.check_call(args, cwd=asmDir) ++ linkIntoCodeObject(objFiles, coFileRaw, kernelWriterAssembly) ++ coFile = destDir / coFileRaw.name.replace(extCoRaw, extCo) ++ compressCodeObject(coFileRaw, coFile, gfx, globalParameters["ClangOffloadBundlerPath"]) + + coFiles.append(coFile) + else: + # no mergefiles + def newCoFileName(kName): + if globalParameters["PackageLibrary"]: +- return os.path.join(destDir, archName, kName + '.co') ++ return os.path.join(destDir, gfx, kName + '.co') + else: +- return os.path.join(destDir, kName + '_' + archName + '.co') ++ return os.path.join(destDir, kName + '_' + gfx + '.co') + + def orgCoFileName(kName): + return os.path.join(asmDir, kName + '.co') +@@ -179,6 +234,8 @@ def orgCoFileName(kName): + map(lambda k: kernelWriterAssembly.getKernelFileBase(k), archKernels)), "Copying code objects"): + shutil.copyfile(src, dst) + coFiles.append(dst) ++ printWarning("Code object files are not compressed in `--no-merge-files` build mode.") ++ + return coFiles + + def which(p): +@@ -645,7 +702,7 @@ def success(kernel): + + if not globalParameters["GenerateSourcesAndExit"]: + codeObjectFiles += buildSourceCodeObjectFiles(CxxCompiler, kernelFiles, outputPath) +- codeObjectFiles += getAssemblyCodeObjectFiles(kernelsToBuild, kernelWriterAssembly, outputPath) ++ codeObjectFiles += buildAssemblyCodeObjectFiles(kernelsToBuild, kernelWriterAssembly, outputPath) + + Common.popWorkingPath() # build_tmp + Common.popWorkingPath() # workingDir + diff --git a/pkgs/rocm-modules/tensile/tensilelite-create-library-dont-copy-twice.diff b/pkgs/rocm-modules/tensile/tensilelite-create-library-dont-copy-twice.diff new file mode 100644 index 0000000..c630803 --- /dev/null +++ b/pkgs/rocm-modules/tensile/tensilelite-create-library-dont-copy-twice.diff @@ -0,0 +1,37 @@ +diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py +index 2b9da394..b001fa7c 100644 +--- a/Tensile/TensileCreateLibrary.py ++++ b/Tensile/TensileCreateLibrary.py +@@ -808,10 +808,13 @@ def copyStaticFiles(outputPath=None): + "ReductionTemplate.h", + "memory_gfx.h" ] + ++ import filecmp + for fileName in libraryStaticFiles: +- # copy file +- shutil.copy( os.path.join(globalParameters["SourcePath"], fileName), \ +- outputPath ) ++ src = os.path.join(globalParameters["SourcePath"], fileName) ++ dst = os.path.join(outputPath, os.path.basename(src)) ++ # no need to copy twice if it has already been copied ++ if not os.path.isfile(dst) or not filecmp.cmp(src, dst): ++ shutil.copyfile(src, dst) + + return libraryStaticFiles + +@@ -1417,9 +1420,13 @@ def TensileCreateLibrary(): + writeCMake(outputPath, solutionFiles, sourceKernelFiles, staticFiles, masterLibraries) + + # Make sure to copy the library static files. ++ import filecmp + for fileName in staticFiles: +- shutil.copy( os.path.join(globalParameters["SourcePath"], fileName), \ +- outputPath ) ++ src = os.path.join(globalParameters["SourcePath"], fileName) ++ dst = os.path.join(outputPath, os.path.basename(src)) ++ # no need to copy twice if it has already been copied ++ if not os.path.isfile(dst) or not filecmp.cmp(src, dst): ++ shutil.copyfile(src, dst) + + # write solutions and kernels + codeObjectFiles = writeSolutionsAndKernels(outputPath, CxxCompiler, None, solutions, diff --git a/pkgs/rocm-modules/tensile/tensilelite-gen_assembly-venv-err-handling.diff b/pkgs/rocm-modules/tensile/tensilelite-gen_assembly-venv-err-handling.diff new file mode 100644 index 0000000..0667599 --- /dev/null +++ b/pkgs/rocm-modules/tensile/tensilelite-gen_assembly-venv-err-handling.diff @@ -0,0 +1,36 @@ +diff --git a/Tensile/Ops/gen_assembly.sh b/Tensile/Ops/gen_assembly.sh +index 0b21b6c6..609f1dd1 100755 +--- a/Tensile/Ops/gen_assembly.sh ++++ b/Tensile/Ops/gen_assembly.sh +@@ -23,6 +23,8 @@ + # + ################################################################################ + ++set -x ++ + archStr=$1 + dst=$2 + venv=$3 +@@ -35,7 +37,13 @@ fi + + toolchain=${rocm_path}/llvm/bin/clang++ + +-. ${venv}/bin/activate ++if ! [ -z ${TENSILE_GEN_ASSEMBLY_TOOLCHAIN+x} ]; then ++ toolchain="${TENSILE_GEN_ASSEMBLY_TOOLCHAIN}" ++fi ++ ++if [ -f ${venv}/bin/activate ]; then ++ . ${venv}/bin/activate ++fi + + IFS=';' read -r -a archs <<< "$archStr" + +@@ -77,4 +85,6 @@ for arch in "${archs[@]}"; do + python3 ./ExtOpCreateLibrary.py --src=$dst --co=$dst/extop_$arch.co --output=$dst --arch=$arch + done + +-deactivate ++if [ -f ${venv}/bin/activate ]; then ++ deactivate ++fi diff --git a/pkgs/rocm-modules/triton/default.nix b/pkgs/rocm-modules/triton/default.nix new file mode 100644 index 0000000..8dc2062 --- /dev/null +++ b/pkgs/rocm-modules/triton/default.nix @@ -0,0 +1,56 @@ +{ + triton-no-cuda, + rocmPackages, + fetchFromGitHub, +}: +(triton-no-cuda.override (_old: { + inherit rocmPackages; + rocmSupport = true; + stdenv = rocmPackages.llvm.rocmClangStdenv; + llvm = rocmPackages.triton-llvm; +})).overridePythonAttrs + (old: { + doCheck = false; + stdenv = rocmPackages.llvm.rocmClangStdenv; + version = "3.2.0"; + src = fetchFromGitHub { + owner = "triton-lang"; + repo = "triton"; + rev = "9641643da6c52000c807b5eeed05edaec4402a67"; # "release/3.2.x"; + hash = "sha256-V1lpARwOLn28ZHfjiWR/JJWGw3MB34c+gz6Tq1GOVfo="; + }; + buildInputs = old.buildInputs ++ [ + rocmPackages.clr + ]; + dontStrip = true; + env = old.env // { + CXXFLAGS = "-O3 -I${rocmPackages.clr}/include -I/build/source/third_party/triton/third_party/nvidia/backend/include"; + TRITON_OFFLINE_BUILD = 1; + }; + patches = [ ]; + postPatch = '' + # Remove nvidia backend so we don't depend on unfree nvidia headers + # when we only want to target ROCm + rm -rf third_party/nvidia + substituteInPlace CMakeLists.txt \ + --replace-fail "add_subdirectory(test)" "" + sed -i '/nvidia\|NVGPU\|registerConvertTritonGPUToLLVMPass\|mlir::test::/Id' bin/RegisterTritonDialects.h + sed -i '/TritonTestAnalysis/Id' bin/CMakeLists.txt + substituteInPlace python/setup.py \ + --replace-fail 'backends = [*BackendInstaller.copy(["nvidia", "amd"]), *BackendInstaller.copy_externals()]' \ + 'backends = [*BackendInstaller.copy(["amd"]), *BackendInstaller.copy_externals()]' + find . -type f -exec sed -i 's|[<]cupti.h[>]|"cupti.h"|g' {} + + find . -type f -exec sed -i 's|[<]cuda.h[>]|"cuda.h"|g' {} + + # remove any downloads + substituteInPlace python/setup.py \ + --replace-fail "[get_json_package_info()]" "[]"\ + --replace-fail "[get_llvm_package_info()]" "[]"\ + --replace-fail "curr_version != version" "False" + # Don't fetch googletest + substituteInPlace cmake/AddTritonUnitTest.cmake \ + --replace-fail 'include(''${PROJECT_SOURCE_DIR}/unittest/googletest.cmake)' "" \ + --replace-fail "include(GoogleTest)" "find_package(GTest REQUIRED)" + substituteInPlace third_party/amd/backend/compiler.py \ + --replace-fail '"/opt/rocm/llvm/bin/ld.lld"' "os.environ['ROCM_PATH']"' + "/llvm/bin/ld.lld"' + ''; + }) diff --git a/pkgs/rocm-modules/update.nix b/pkgs/rocm-modules/update.nix new file mode 100644 index 0000000..69a45f9 --- /dev/null +++ b/pkgs/rocm-modules/update.nix @@ -0,0 +1,62 @@ +{ + lib, + writeScript, +}: + +{ + name ? "", + owner ? "", + repo ? "", + page ? "releases", + # input: array of [ { tag_name: "rocm-6.x.x", }, ... ]. some entries may have bad names like rocm-test-date we want to skip + # output: first tag_name/name that's a proper version if any + filter ? "map(.tag_name // .name) | map(select(test(\"^rocm-[0-9]+\\\\.[0-9]+(\\\\.[0-9]+)?$\"))) | first | ltrimstr(\"rocm-\")", +}: + +let + pname = + if lib.hasPrefix "rocm-llvm-" name then "llvm.${lib.removePrefix "rocm-llvm-" name}" else name; + + updateScript = writeScript "update.sh" '' + #!/usr/bin/env nix-shell + #!nix-shell -i bash -p curl jq common-updater-scripts + set -euo pipefail + + fetch_releases() { + local api_url="https://api.github.com/repos/${owner}/${repo}/${page}" + if [ "${page}" = "releases" ]; then + api_url="$api_url?per_page=4" + fi + >&2 echo $api_url + curl ''${GITHUB_TOKEN:+-u ":$GITHUB_TOKEN"} -sL "$api_url" + } + + find_valid_version() { + local releases="$1" + >&2 echo "$releases" + # Wrap in array if not already an array to make handline specific release or tags page the same + >&2 echo jq -r 'if type == "array" then . else [.] end | ${filter}' + echo "$releases" | jq -r 'if type == "array" then . else [.] end | ${filter}' + } + + releases="$(fetch_releases)" + version="$(find_valid_version "$releases")" + + if [ -z "$version" ]; then + echo "No valid version found in the fetched release(s)." >&2 + exit 1 + fi + + IFS='.' read -ra version_arr <<< "$version" + + >&2 echo parsed version "$version_arr" from "$version" + + if (( ''${version_arr[0]} > 6 )); then + echo "'rocmPackages_6.${pname}' is already at its maximum allowed version.''\nAny further upgrades should go into 'rocmPackages_X.${pname}'." >&2 + exit 1 + fi + + update-source-version rocmPackages_6.${pname} "$version" --ignore-same-hash + ''; +in +[ updateScript ]