Initial commit

This commit is contained in:
2025-05-30 21:52:25 +04:00
commit 14f2056227
91 changed files with 9605 additions and 0 deletions

61
flake.lock generated Normal file
View File

@@ -0,0 +1,61 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1748370509,
"narHash": "sha256-QlL8slIgc16W5UaI3w7xHQEP+Qmv/6vSNTpoZrrSlbk=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "4faa5f5321320e49a78ae7848582f684d64783e9",
"type": "github"
},
"original": {
"owner": "nixos",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

45
flake.nix Normal file
View File

@@ -0,0 +1,45 @@
{
description = "A very basic flake";
inputs = {
nixpkgs.url = "github:nixos/nixpkgs?ref=nixos-unstable";
flake-utils.url = "github:numtide/flake-utils";
};
outputs =
{
self,
nixpkgs,
flake-utils,
...
}:
flake-utils.lib.eachDefaultSystem (
system:
let
pkgs = import nixpkgs {
inherit system;
# overlays = [
# (final: prev: {
# rocmPackages_6 = final.callPackage ./pkgs/rocm-modules { };
# })
# ];
};
in
{
packages = pkgs.callPackage ./pkgs/rocm-modules { };
overlays.default = (
final: prev: {
rocmPackages = final.callPackage ./pkgs/rocm-modules { };
rocmPackages_6 = final.rocmPackages;
}
);
# build all pkgs as check
# defaultPackage = self.packages.${system}.rocm-modules;
#
# devShells.default = pkgs.mkShell {
# buildInputs = [ self.packages.${system}.rocm-modules ];
# };
}
);
}

View File

@@ -0,0 +1,104 @@
From 4a0584f7c05641143151ebdc1be1163bebf9d35d Mon Sep 17 00:00:00 2001
From: Las <las@protonmail.ch>
Date: Sun, 3 Jan 2021 18:35:37 +0000
Subject: [PATCH] Compile transupp.c as part of the library
The exported symbols are made weak to not conflict with users
of the library that already vendor this functionality.
---
CMakeLists.txt | 4 ++--
transupp.c | 14 +++++++-------
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0ca6f98..a9a0fae 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -533,7 +533,7 @@ set(JPEG_SOURCES jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c
jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c jdicc.c jdinput.c
jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c jdpostct.c jdsample.c
jdtrans.c jerror.c jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c
- jidctint.c jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c)
+ jidctint.c jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c transupp.c)
if(WITH_ARITH_ENC OR WITH_ARITH_DEC)
set(JPEG_SOURCES ${JPEG_SOURCES} jaricom.c)
@@ -1489,7 +1489,7 @@ install(EXPORT ${CMAKE_PROJECT_NAME}Targets
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/jconfig.h
${CMAKE_CURRENT_SOURCE_DIR}/jerror.h ${CMAKE_CURRENT_SOURCE_DIR}/jmorecfg.h
- ${CMAKE_CURRENT_SOURCE_DIR}/jpeglib.h
+ ${CMAKE_CURRENT_SOURCE_DIR}/jpeglib.h ${CMAKE_CURRENT_SOURCE_DIR}/transupp.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
include(cmakescripts/BuildPackages.cmake)
diff --git a/transupp.c b/transupp.c
index 6e86077..2da49a7 100644
--- a/transupp.c
+++ b/transupp.c
@@ -1386,7 +1386,7 @@ jt_read_integer(const char **strptr, JDIMENSION *result)
* This code is loosely based on XParseGeometry from the X11 distribution.
*/
-GLOBAL(boolean)
+GLOBAL(boolean) __attribute__((weak))
jtransform_parse_crop_spec(jpeg_transform_info *info, const char *spec)
{
info->crop = FALSE;
@@ -1486,7 +1486,7 @@ trim_bottom_edge(jpeg_transform_info *info, JDIMENSION full_height)
* and transformation is not perfect. Otherwise returns TRUE.
*/
-GLOBAL(boolean)
+GLOBAL(boolean) __attribute__((weak))
jtransform_request_workspace(j_decompress_ptr srcinfo,
jpeg_transform_info *info)
{
@@ -2033,7 +2033,7 @@ adjust_exif_parameters(JOCTET *data, unsigned int length, JDIMENSION new_width,
* to jpeg_write_coefficients().
*/
-GLOBAL(jvirt_barray_ptr *)
+GLOBAL(jvirt_barray_ptr *) __attribute__((weak))
jtransform_adjust_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
jvirt_barray_ptr *src_coef_arrays,
jpeg_transform_info *info)
@@ -2152,7 +2152,7 @@ jtransform_adjust_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
* Note that some transformations will modify the source data arrays!
*/
-GLOBAL(void)
+GLOBAL(void) __attribute__((weak))
jtransform_execute_transform(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
jvirt_barray_ptr *src_coef_arrays,
jpeg_transform_info *info)
@@ -2264,7 +2264,7 @@ jtransform_execute_transform(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
* (may use custom action then)
*/
-GLOBAL(boolean)
+GLOBAL(boolean) __attribute__((weak))
jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height,
int MCU_width, int MCU_height,
JXFORM_CODE transform)
@@ -2303,7 +2303,7 @@ jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height,
* This must be called before jpeg_read_header() to have the desired effect.
*/
-GLOBAL(void)
+GLOBAL(void) __attribute__((weak))
jcopy_markers_setup(j_decompress_ptr srcinfo, JCOPY_OPTION option)
{
#ifdef SAVE_MARKERS_SUPPORTED
@@ -2331,7 +2331,7 @@ jcopy_markers_setup(j_decompress_ptr srcinfo, JCOPY_OPTION option)
* JFIF APP0 or Adobe APP14 markers if selected.
*/
-GLOBAL(void)
+GLOBAL(void) __attribute__((weak))
jcopy_markers_execute(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
JCOPY_OPTION option)
{
--
2.29.2

View File

@@ -0,0 +1,90 @@
{
lib,
stdenv,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
cmake,
pkg-config,
libdrm,
wrapPython,
autoPatchelfHook,
}:
let
esmi_ib_src = fetchFromGitHub {
owner = "amd";
repo = "esmi_ib_library";
rev = "esmi_pkg_ver-4.1.2";
hash = "sha256-wj3krY/6AdmnoNOSqN9EE/Yxbx++0AW2vu7dovQrQ9I=";
};
in
stdenv.mkDerivation (finalAttrs: {
pname = "amdsmi";
version = "6.4.1";
src = fetchFromGitHub {
owner = "rocm";
repo = "amdsmi";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-K6FVvieJnOCfbyNNwOWs3t836ihtvELJvcCjXQoHuRY=";
};
postPatch = ''
substituteInPlace goamdsmi_shim/CMakeLists.txt \
--replace-fail "amd_smi)" ${"'"}''${AMD_SMI_TARGET})' \
--replace-fail 'target_link_libraries(''${GOAMDSMI_SHIM_TARGET} -L' '#'
# Manually unpack esmi_ib_src and add amd_hsmp.h so execute-process git clone doesn't run
cp -rf --no-preserve=mode ${esmi_ib_src} ./esmi_ib_library
mkdir -p ./esmi_ib_library/include/asm
cp ./include/amd_smi/impl/amd_hsmp.h ./esmi_ib_library/include/asm/amd_hsmp.h
'';
patches = [
# Fix ld.lld undefined reference: drmGetVersion
# (fetchpatch {
# url = "https://github.com/ROCm/amdsmi/commit/c3864bf6171970d86dc50fd23f06377736823997.patch";
# hash = "sha256-zRG1tBD8sIQCWdKfCbXC/Z/6d6NTrRYvRpddPWdM4j8=";
# })
];
nativeBuildInputs = [
cmake
pkg-config
wrapPython
autoPatchelfHook
];
buildInputs = [
libdrm
];
cmakeFlags = [
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
postInstall = ''
wrapPythonProgramsIn $out
rm $out/bin/amd-smi
ln -sf $out/libexec/amdsmi_cli/amdsmi_cli.py $out/bin/amd-smi
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "System management interface for AMD GPUs supported by ROCm";
homepage = "https://github.com/ROCm/rocm_smi_lib";
license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = [ "x86_64-linux" ];
};
})

View File

@@ -0,0 +1,230 @@
{
lib,
stdenv,
fetchFromGitHub,
cmake,
rocm-cmake,
clr,
rocblas,
rocsolver,
gtest,
msgpack,
libxml2,
python3,
python3Packages,
openmp,
hipblas-common,
hipblas,
nlohmann_json,
triton-llvm,
rocmlir,
lapack-reference,
ninja,
ncurses,
libffi,
zlib,
zstd,
xz,
pkg-config,
buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
gpuTargets ? [
# aotriton GPU support list:
# https://github.com/ROCm/aotriton/blob/main/v2python/gpu_targets.py
"gfx90a"
"gfx942"
"gfx1100"
"gfx1101"
],
}:
stdenv.mkDerivation (
finalAttrs:
let
py = python3.withPackages (ps: [
ps.pyyaml
ps.distutils
ps.setuptools
ps.packaging
ps.numpy
ps.wheel
ps.filelock
ps.iniconfig
ps.pluggy
ps.pybind11
]);
gpuTargets' = lib.concatStringsSep ";" gpuTargets;
compiler = "amdclang++";
cFlags = "-O3 -DNDEBUG";
cxxFlags = "${cFlags} -Wno-c++11-narrowing";
triton-llvm' = triton-llvm;
in
{
pname = "aotriton";
version = "0.9.2b";
src = fetchFromGitHub {
owner = "ROCm";
repo = "aotriton";
rev = "${finalAttrs.version}";
hash = "sha256-1Cf0olD3zRg9JESD6s/WaGifm3kfD12VUvjTZHpmGAE=";
fetchSubmodules = true;
};
env.CXX = compiler;
env.ROCM_PATH = "${clr}";
requiredSystemFeatures = [ "big-parallel" ];
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
]
++ lib.optionals buildSamples [
"sample"
];
# Need an empty cuda.h for this to compile
# Better than pulling in unfree cuda headers
postPatch = ''
touch third_party/triton/third_party/nvidia/include/cuda.h
'';
doCheck = false;
doInstallCheck = false;
nativeBuildInputs = [
cmake
rocm-cmake
pkg-config
py
clr
ninja
];
buildInputs =
[
rocblas
rocsolver
hipblas-common
hipblas
openmp
libffi
ncurses
xz
nlohmann_json
rocmlir
msgpack
libxml2
python3Packages.msgpack
zlib
zstd
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals (buildTests || buildBenchmarks) [
lapack-reference
];
env.TRITON_OFFLINE_BUILD = 1;
env.LLVM_SYSPATH = "${triton-llvm'}";
env.JSON_SYSPATH = nlohmann_json;
env.MLIR_DIR = "${triton-llvm'}/lib/cmake/mlir";
env.CXXFLAGS = "-I/build/source/third_party/triton/third_party/nvidia/backend/include";
# Fix up header issues in triton: https://github.com/triton-lang/triton/pull/3985/files
preConfigure = ''
mkdir third_party/triton/third_party/nvidia/backend/include/
touch third_party/triton/third_party/nvidia/backend/include/cuda.h
find third_party/triton -type f -exec sed -i 's|[<]cupti.h[>]|"cupti.h"|g' {} +
find third_party/triton -type f -exec sed -i 's|[<]cuda.h[>]|"cuda.h"|g' {} +
sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' CMakeLists.txt
sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' CMakeLists.txt
sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt
sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt
substituteInPlace third_party/triton/python/setup.py \
--replace-fail "from distutils.command.clean import clean" "import setuptools;from distutils.command.clean import clean" \
--replace-fail 'system == "Linux"' 'False'
# Fix 'ld: error: unable to insert .comment after .comment'
substituteInPlace v2python/ld_script.py \
--replace-fail 'INSERT AFTER .comment;' ""
cmakeFlagsArray+=(
'-DCMAKE_C_FLAGS_RELEASE=${cFlags}'
'-DCMAKE_CXX_FLAGS_RELEASE=${cxxFlags}'
)
prependToVar cmakeFlags "-GNinja"
mkdir -p /build/tmp-home
export HOME=/build/tmp-home
'';
# Excerpt from README:
# Note: do not run ninja separately, due to the limit of the current build system,
# ninja install will run the whole build process unconditionally.
dontBuild = true;
installPhase = ''
runHook preInstall
ninja -v install
runHook postInstall
'';
cmakeFlags =
[
"-Wno-dev"
"-DAOTRITON_NOIMAGE_MODE=ON" # FIXME: Should be able to build with object code but generate_shim is failing
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_VERBOSE_MAKEFILE=ON"
"-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}"
"-DCMAKE_CXX_COMPILER=${compiler}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DAMDGPU_TARGETS=${gpuTargets'}"
"-DGPU_TARGETS=${gpuTargets'}"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_BENCHMARKS=ON"
]
++ lib.optionals buildSamples [
"-DBUILD_CLIENTS_SAMPLES=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/hipblas-test $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/hipblas-bench $benchmark/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv $out/bin/example-* $sample/bin
''
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
rmdir $out/bin
'';
meta = with lib; {
description = "ROCm Ahead of Time (AOT) Triton Math Library ";
homepage = "https://github.com/ROCm/aotriton";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
}
)

View File

@@ -0,0 +1,40 @@
diff --git a/rocclr/cmake/ROCclr.cmake b/rocclr/cmake/ROCclr.cmake
index 3f233b72f..67bdc62ee 100644
--- a/rocclr/cmake/ROCclr.cmake
+++ b/rocclr/cmake/ROCclr.cmake
@@ -44,6 +44,19 @@ find_package(Threads REQUIRED)
find_package(AMD_OPENCL)
+# Find X11 package
+find_package(X11 REQUIRED)
+if(NOT X11_FOUND)
+ message(FATAL_ERROR "X11 libraries not found")
+endif()
+
+# Find OpenGL package
+find_package(OpenGL REQUIRED)
+if(NOT OpenGL_FOUND)
+ message(FATAL_ERROR "OpenGL not found")
+endif()
+
+
add_library(rocclr STATIC)
include(ROCclrCompilerOptions)
@@ -123,9 +136,14 @@ target_include_directories(rocclr PUBLIC
${ROCCLR_SRC_DIR}/device
${ROCCLR_SRC_DIR}/elf
${ROCCLR_SRC_DIR}/include
+ ${X11_INCLUDE_DIR}
+ ${OPENGL_INCLUDE_DIR}
${AMD_OPENCL_INCLUDE_DIRS})
-target_link_libraries(rocclr PUBLIC Threads::Threads)
+target_link_libraries(rocclr PUBLIC
+ Threads::Threads
+ ${X11_LIBRARIES}
+ ${OPENGL_LIBRARIES})
# IPC on Windows is not supported
if(UNIX)
target_link_libraries(rocclr PUBLIC rt)

View File

@@ -0,0 +1,287 @@
{
lib,
stdenv,
callPackage,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
makeWrapper,
cmake,
perl,
hip-common,
hipcc,
rocm-device-libs,
rocm-comgr,
rocm-runtime,
rocm-core,
roctracer,
rocminfo,
rocm-smi,
numactl,
libffi,
zstd,
zlib,
libGL,
libxml2,
libX11,
python3Packages,
rocm-merged-llvm,
khronos-ocl-icd-loader,
gcc-unwrapped,
writeShellScriptBin,
localGpuTargets ? null,
}:
let
inherit (rocm-core) ROCM_LIBPATCH_VERSION;
hipClang = rocm-merged-llvm;
hipClangPath = "${hipClang}/bin";
wrapperArgs = [
"--prefix PATH : $out/bin"
"--prefix LD_LIBRARY_PATH : ${rocm-runtime}"
"--set HIP_PLATFORM amd"
"--set HIP_PATH $out"
"--set HIP_CLANG_PATH ${hipClangPath}"
"--set DEVICE_LIB_PATH ${rocm-device-libs}/amdgcn/bitcode"
"--set HSA_PATH ${rocm-runtime}"
"--set ROCM_PATH $out"
];
amdclang = writeShellScriptBin "amdclang" ''
exec clang "$@"
'';
amdclangxx = writeShellScriptBin "amdclang++" ''
exec clang++ "$@"
'';
in
stdenv.mkDerivation (finalAttrs: {
pname = "clr";
version = "6.4.1";
outputs = [
"out"
"icd"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "clr";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-MA9MS/HF3j5iqpFuatHQJZ+nNkcGgzCvpkiNO6CjoPg=";
};
nativeBuildInputs = [
makeWrapper
cmake
perl
python3Packages.python
python3Packages.cppheaderparser
amdclang
amdclangxx
];
buildInputs = [
numactl
libGL
libxml2
libX11
khronos-ocl-icd-loader
hipClang
libffi
zstd
zlib
];
propagatedBuildInputs = [
rocm-core
rocm-device-libs
rocm-comgr
rocm-runtime
rocminfo
];
cmakeFlags = [
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_POLICY_DEFAULT_CMP0072=NEW" # Prefer newer OpenGL libraries
"-DCLR_BUILD_HIP=ON"
"-DCLR_BUILD_OCL=ON"
"-DHIP_COMMON_DIR=${hip-common}"
"-DHIPCC_BIN_DIR=${hipcc}/bin"
"-DHIP_PLATFORM=amd"
"-DPROF_API_HEADER_PATH=${roctracer.src}/inc/ext"
"-DROCM_PATH=${rocminfo}"
"-DBUILD_ICD=ON"
"-DHIP_ENABLE_ROCPROFILER_REGISTER=OFF" # circular dep - may need -minimal and -full builds?
"-DAMD_ICD_LIBRARY_DIR=${khronos-ocl-icd-loader}"
# Temporarily set variables to work around upstream CMakeLists issue
# Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DCMAKE_INSTALL_LIBDIR=lib"
];
env.LLVM_DIR = "";
patches = [
./cmake-find-x11-libgl.patch
# (fetchpatch {
# # Fix handling of old fatbin version https://github.com/ROCm/clr/issues/99
# sha256 = "sha256-CK/QwgWJQEruiG4DqetF9YM0VEWpSiUMxAf1gGdJkuA=";
# url = "https://src.fedoraproject.org/rpms/rocclr/raw/rawhide/f/0001-handle-v1-of-compressed-fatbins.patch";
# })
# (fetchpatch {
# # improve rocclr isa compatibility check
# sha256 = "sha256-wUrhpYN68AbEXeFU5f366C6peqHyq25kujJXY/bBJMs=";
# url = "https://github.com/GZGavinZhao/clr/commit/22c17a0ac09c6b77866febf366591f669a1ed133.patch";
# })
# (fetchpatch {
# # [PATCH] Improve hipamd compat check
# sha256 = "sha256-uZQ8rMrWH61CCbxwLqQGggDmXFmYTi6x8OcgYPrZRC8=";
# url = "https://github.com/GZGavinZhao/clr/commit/63c6ee630966744d4199fdfb854e98d2da9e1122.patch";
# })
# (fetchpatch {
# # [PATCH] SWDEV-504340 - Move cast of cl_mem inside the condition
# # Fixes crash due to UB in KernelBlitManager::setArgument
# sha256 = "sha256-nL4CZ7EOXqsTVUtYhuu9DLOMpnMeMRUhkhylEQLTg9I=";
# url = "https://github.com/ROCm/clr/commit/fa63919a6339ea2a61111981ba2362c97fbdf743.patch";
# })
# (fetchpatch {
# # [PATCH] SWDEV-507104 - Removes alignment requirement for Semaphore class to resolve runtime misaligned memory issues
# sha256 = "sha256-nStJ22B/CM0fzQTvYjbHDbQt0GlE8DXxVK+UDU9BAx4=";
# url = "https://github.com/ROCm/clr/commit/21d764518363d74187deaef2e66c1a127bc5aa64.patch";
# })
];
postPatch = ''
patchShebangs hipamd/*.sh
patchShebangs hipamd/src
# We're not on Windows so these are never installed to hipcc...
substituteInPlace hipamd/CMakeLists.txt \
--replace-fail "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipcc.bat DESTINATION bin)" "" \
--replace-fail "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipconfig.bat DESTINATION bin)" ""
substituteInPlace hipamd/src/hip_embed_pch.sh \
--replace-fail "\''$LLVM_DIR/bin/clang" "${hipClangPath}/clang"
substituteInPlace opencl/khronos/icd/loader/icd_platform.h \
--replace-fail '#define ICD_VENDOR_PATH "/etc/OpenCL/vendors/";' \
'#define ICD_VENDOR_PATH "/run/opengl-driver/etc/OpenCL/vendors/";'
# new unbundler has better error messages, defaulting it on
substituteInPlace rocclr/utils/flags.hpp \
--replace-fail "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, false" "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, true"
'';
postInstall = ''
chmod +x $out/bin/*
patchShebangs $out/bin
cp ${amdclang}/bin/* $out/bin/
cp ${amdclangxx}/bin/* $out/bin/
for prog in hip{cc,config}{,.pl}; do
wrapProgram $out/bin/$prog ${lib.concatStringsSep " " wrapperArgs}
done
mkdir -p $out/nix-support/
echo '
export HIP_PATH="${placeholder "out"}"
export HIP_PLATFORM=amd
export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode"
export NIX_CC_USE_RESPONSE_FILE=0
export HIP_CLANG_PATH="${hipClangPath}"
export ROCM_LIBPATCH_VERSION="${ROCM_LIBPATCH_VERSION}"
export HSA_PATH="${rocm-runtime}"' > $out/nix-support/setup-hook
# Just link rocminfo, it's easier
ln -s ${rocminfo}/bin/* $out/bin
ln -s ${rocm-core}/include/* $out/include/
# Replace rocm-opencl-icd functionality
mkdir -p $icd/etc/OpenCL/vendors
echo "$out/lib/libamdocl64.so" > $icd/etc/OpenCL/vendors/amdocl64.icd
# add version info to output (downstream rocmPackages look for this)
ln -s ${rocm-core}/.info/ $out/.info
ln -s ${hipClang} $out/llvm
'';
disallowedRequisites = [
gcc-unwrapped
];
passthru =
{
# All known and valid general GPU targets
# We cannot use this for each ROCm library, as each defines their own supported targets
# See: https://github.com/ROCm/ROCm/blob/77cbac4abab13046ee93d8b5bf410684caf91145/README.md#library-target-matrix
# Generic targets are not yet available in rocm-6.3.1 llvm
gpuTargets = lib.forEach [
# "9-generic"
"900" # MI25, Vega 56/64
"906" # MI50/60, Radeon VII
"908" # MI100
"90a" # MI210 / MI250
# "9-4-generic"
# 940/1 - never released publicly, maybe HPE cray specific MI3xx?
"942" # MI300
# "10-1-generic"
"1010"
"1012"
# "10-3-generic"
"1030" # W6800, various Radeon cards
# "11-generic"
"1100"
"1101"
"1102"
"1201"
] (target: "gfx${target}");
inherit hipClangPath;
updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
page = "tags?per_page=4";
};
impureTests = {
rocm-smi = callPackage ./test-rocm-smi.nix {
inherit rocm-smi;
clr = finalAttrs.finalPackage;
};
opencl-example = callPackage ./test-opencl-example.nix {
clr = finalAttrs.finalPackage;
};
};
selectGpuTargets =
{
supported ? [ ],
}:
supported;
gpuArchSuffix = "";
}
// lib.optionalAttrs (localGpuTargets != null) {
inherit localGpuTargets;
gpuArchSuffix = "-" + (builtins.concatStringsSep "-" localGpuTargets);
selectGpuTargets =
{
supported ? [ ],
}:
if supported == [ ] then localGpuTargets else lib.lists.intersectLists localGpuTargets supported;
};
meta = with lib; {
description = "AMD Common Language Runtime for hipamd, opencl, and rocclr";
homepage = "https://github.com/ROCm/clr";
license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,88 @@
{
lib,
stdenv,
makeImpureTest,
fetchFromGitHub,
clr,
cmake,
pkg-config,
glew,
libglut,
opencl-headers,
ocl-icd,
}:
let
examples = stdenv.mkDerivation {
pname = "amd-app-samples";
version = "2018-06-10";
src = fetchFromGitHub {
owner = "OpenCL";
repo = "AMD_APP_samples";
rev = "54da6ca465634e78fc51fc25edf5840467ee2411";
hash = "sha256-qARQpUiYsamHbko/I1gPZE9pUGJ+3396Vk2n7ERSftA=";
};
nativeBuildInputs = [
cmake
pkg-config
];
buildInputs = [
glew
libglut
opencl-headers
ocl-icd
];
installPhase = ''
runHook preInstall
mkdir -p $out/bin
# Example path is bin/x86_64/Release/cl/Reduction/Reduction
cp -r bin/*/*/*/*/* $out/bin/
runHook postInstall
'';
cmakeFlags = [ "-DBUILD_CPP_CL=OFF" ];
meta = with lib; {
description = "Samples from the AMD APP SDK (with OpenCRun support)";
homepage = "https://github.com/OpenCL/AMD_APP_samples";
license = licenses.bsd2;
platforms = platforms.linux;
teams = [ lib.teams.rocm ];
};
};
in
makeImpureTest {
name = "opencl-example";
testedPackage = "rocmPackages_6.clr";
sandboxPaths = [
"/sys"
"/dev/dri"
"/dev/kfd"
];
nativeBuildInputs = [ examples ];
OCL_ICD_VENDORS = "${clr.icd}/etc/OpenCL/vendors";
testScript = ''
# Examples load resources from current directory
cd ${examples}/bin
echo OCL_ICD_VENDORS=$OCL_ICD_VENDORS
pwd
HelloWorld | grep HelloWorld
'';
meta = with lib; {
teams = [ teams.rocm ];
};
}

View File

@@ -0,0 +1,27 @@
{
lib,
makeImpureTest,
clinfo,
clr,
rocm-smi,
}:
makeImpureTest {
name = "rocm-smi";
testedPackage = "rocmPackages_6.clr";
nativeBuildInputs = [
clinfo
rocm-smi
];
OCL_ICD_VENDORS = "${clr.icd}/etc/OpenCL/vendors";
testScript = ''
# Test fails if the number of platforms is 0
clinfo | grep -E 'Number of platforms * [1-9]'
rocm-smi | grep -A1 GPU
'';
meta = with lib; {
teams = [ teams.rocm ];
};
}

View File

@@ -0,0 +1,167 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocm-merged-llvm,
clr,
rocminfo,
hipify,
gitMinimal,
gtest,
zstd,
buildTests ? false,
buildExamples ? false,
gpuTargets ? (
clr.localGpuTargets or [
"gfx1010"
"gfx1012"
"gfx1030"
"gfx1100"
"gfx1101"
"gfx1102"
"gfx1201"
]
),
}:
stdenv.mkDerivation (finalAttrs: {
preBuild = ''
echo "This derivation isn't intended to be built directly and only exists to be overridden and built in chunks";
exit 1
'';
pname = "composable_kernel_base";
# Picked this version over 6.3 because much easier to get to build
# and it matches the version torch 2.6 wants
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildExamples [
"example"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "composable_kernel";
rev = "07339c738396ebeae57374771ded4dcf11bddf1e";
hash = "sha256-EvEBxlOpQ71BF57VW79WBo/cdxAwTKFXFMiYKyGyyEs=";
};
nativeBuildInputs = [
# Deliberately not using ninja
# because we're jankily composing build outputs from multiple drvs
# ninja won't believe they're up to date
gitMinimal
cmake
rocminfo
clr
hipify
zstd
];
buildInputs = [
rocm-cmake
clr
zstd
];
strictDeps = true;
enableParallelBuilding = true;
env.ROCM_PATH = clr;
env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin";
cmakeFlags =
[
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_POLICY_DEFAULT_CMP0069=NEW"
# "-DDL_KERNELS=ON" # Not needed, slow to build
# CK_USE_CODEGEN Required for migraphx which uses device_gemm_multiple_d.hpp
# but migraphx requires an incompatible fork of CK and fails anyway
# "-DCK_USE_CODEGEN=ON"
# It might be worth skipping fp64 in future with this:
# "-DDTYPES=fp32;fp16;fp8;bf16;int8"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DBUILD_DEV=OFF"
"-DROCM_PATH=${clr}"
"-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}"
# FP8 can build for 908/90a but very slow build
# and produces unusably slow kernels that are huge
"-DCK_USE_FP8_ON_UNSUPPORTED_ARCH=OFF"
]
++ lib.optionals (gpuTargets != [ ]) [
# We intentionally set GPU_ARCHS and not AMD/GPU_TARGETS
# per readme this is required if archs are dissimilar
# In rocm-6.3.x not setting any arch flag worked
# but setting dissimilar arches always failed
"-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names
];
# No flags to build selectively it seems...
postPatch =
# Reduce configure time by preventing thousands of clang-tidy targets being added
# We will never call them
# Never build profiler
''
substituteInPlace library/src/utility/CMakeLists.txt library/src/tensor_operation_instance/gpu/CMakeLists.txt \
--replace-fail clang_tidy_check '#clang_tidy_check'
substituteInPlace CMakeLists.txt \
--replace-fail "add_subdirectory(profiler)" ""
''
# Optionally remove tests
+ lib.optionalString (!buildTests) ''
substituteInPlace CMakeLists.txt \
--replace-fail "add_subdirectory(test)" ""
substituteInPlace codegen/CMakeLists.txt \
--replace-fail "include(ROCMTest)" ""
''
# Optionally remove examples
+ lib.optionalString (!buildExamples) ''
substituteInPlace CMakeLists.txt \
--replace-fail "add_subdirectory(example)" ""
'';
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
''
+ lib.optionalString buildExamples ''
mkdir -p $example/bin
mv $out/bin/example_* $example/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
passthru.anyGfx9Target = lib.lists.any (lib.strings.hasPrefix "gfx9") gpuTargets;
meta = with lib; {
description = "Performance portable programming model for machine learning tensor operators";
homepage = "https://github.com/ROCm/composable_kernel";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
broken = true;
};
})

View File

@@ -0,0 +1,161 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocm-merged-llvm,
clr,
rocminfo,
hipify,
gitMinimal,
gtest,
zstd,
buildTests ? false,
buildExamples ? false,
gpuTargets ? (
clr.localGpuTargets or [
"gfx1201"
]
),
}:
stdenv.mkDerivation (finalAttrs: {
preBuild = ''
echo "This derivation isn't intended to be built directly and only exists to be overridden and built in chunks";
exit 1
'';
pname = "composable_kernel_base";
# Picked this version over 6.3 because much easier to get to build
# and it matches the version torch 2.6 wants
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildExamples [
"example"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "composable_kernel";
rev = "07339c738396ebeae57374771ded4dcf11bddf1e";
hash = "sha256-EvEBxlOpQ71BF57VW79WBo/cdxAwTKFXFMiYKyGyyEs=";
};
nativeBuildInputs = [
# Deliberately not using ninja
# because we're jankily composing build outputs from multiple drvs
# ninja won't believe they're up to date
gitMinimal
cmake
rocminfo
clr
hipify
zstd
];
buildInputs = [
rocm-cmake
clr
zstd
];
strictDeps = true;
enableParallelBuilding = true;
env.ROCM_PATH = clr;
env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin";
cmakeFlags =
[
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_POLICY_DEFAULT_CMP0069=NEW"
# "-DDL_KERNELS=ON" # Not needed, slow to build
# CK_USE_CODEGEN Required for migraphx which uses device_gemm_multiple_d.hpp
# but migraphx requires an incompatible fork of CK and fails anyway
# "-DCK_USE_CODEGEN=ON"
# It might be worth skipping fp64 in future with this:
# "-DDTYPES=fp32;fp16;fp8;bf16;int8"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DBUILD_DEV=OFF"
"-DROCM_PATH=${clr}"
"-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}"
# FP8 can build for 908/90a but very slow build
# and produces unusably slow kernels that are huge
"-DCK_USE_FP8_ON_UNSUPPORTED_ARCH=OFF"
]
++ lib.optionals (gpuTargets != [ ]) [
# We intentionally set GPU_ARCHS and not AMD/GPU_TARGETS
# per readme this is required if archs are dissimilar
# In rocm-6.3.x not setting any arch flag worked
# but setting dissimilar arches always failed
"-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names
];
# No flags to build selectively it seems...
postPatch =
# Reduce configure time by preventing thousands of clang-tidy targets being added
# We will never call them
# Never build profiler
''
substituteInPlace library/src/utility/CMakeLists.txt library/src/tensor_operation_instance/gpu/CMakeLists.txt \
--replace-fail clang_tidy_check '#clang_tidy_check'
substituteInPlace CMakeLists.txt \
--replace-fail "add_subdirectory(profiler)" ""
''
# Optionally remove tests
+ lib.optionalString (!buildTests) ''
substituteInPlace CMakeLists.txt \
--replace-fail "add_subdirectory(test)" ""
substituteInPlace codegen/CMakeLists.txt \
--replace-fail "include(ROCMTest)" ""
''
# Optionally remove examples
+ lib.optionalString (!buildExamples) ''
substituteInPlace CMakeLists.txt \
--replace-fail "add_subdirectory(example)" ""
'';
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
''
+ lib.optionalString buildExamples ''
mkdir -p $example/bin
mv $out/bin/example_* $example/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
passthru.anyGfx9Target = lib.lists.any (lib.strings.hasPrefix "gfx9") gpuTargets;
meta = with lib; {
description = "Performance portable programming model for machine learning tensor operators";
homepage = "https://github.com/ROCm/composable_kernel";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
broken = true;
};
})

View File

@@ -0,0 +1,43 @@
{
buildPythonPackage,
python,
composable_kernel,
lib,
setuptools,
setuptools-scm,
rocm-merged-llvm,
}:
buildPythonPackage {
pyproject = true;
pname = "ck4inductor";
build-system = [
setuptools
setuptools-scm
];
version = "6.4.0";
inherit (composable_kernel) src;
pythonImportsCheck = [
"ck4inductor"
"ck4inductor.universal_gemm.gen_instances"
"ck4inductor.universal_gemm.gen_instances"
"ck4inductor.universal_gemm.op"
];
propagatedBuildInputs = [
# At runtime will fail to compile anything with ck4inductor without this
# can't easily use in checks phase because most of the compiler machinery is in torch
rocm-merged-llvm
];
checkPhase = ''
if [ ! -d "$out/${python.sitePackages}/ck4inductor" ]; then
echo "ck4inductor isn't at the expected location in $out/${python.sitePackages}/ck4inductor"
exit 1
fi
'';
meta = with lib; {
description = "pytorch inductor backend which uses composable_kernel universal GEMM implementations";
homepage = "https://github.com/ROCm/composable_kernel";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
}

View File

@@ -0,0 +1,249 @@
{
lib,
clr,
composable_kernel_base,
}:
let
parts = {
_mha = {
# mha takes ~3hrs on 64 cores on an EPYC milan system at ~2.5GHz
# big-parallel builders are one gen newer and clocked ~30% higher but only 24 cores
# Should be <10h timeout but might be cutting it close
# TODO: work out how to split this into smaller chunks instead of all 3k mha instances together
# mha_0,1,2, search ninja target file for the individual instances, split by the index?
# TODO: can we prune the generated instances down to only what in practice are used with popular models
# when using flash-attention + MHA kernels?
targets = [
"device_mha_instance"
];
requiredSystemFeatures = [ "big-parallel" ];
extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ];
};
gemm_multiply_multiply = {
targets = [
"device_gemm_multiply_multiply_instance"
];
requiredSystemFeatures = [ "big-parallel" ];
extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ];
};
grouped_conv = {
targets = [
"device_grouped_conv1d_bwd_weight_instance"
"device_grouped_conv2d_bwd_data_instance"
"device_grouped_conv2d_bwd_weight_instance"
"device_grouped_conv1d_fwd_instance"
"device_grouped_conv2d_fwd_instance"
"device_grouped_conv2d_fwd_dynamic_op_instance"
];
requiredSystemFeatures = [ "big-parallel" ];
};
grouped_conv_bwd_3d = {
targets = [
"device_grouped_conv3d_bwd_data_instance"
"device_grouped_conv3d_bwd_data_bilinear_instance"
"device_grouped_conv3d_bwd_data_scale_instance"
"device_grouped_conv3d_bwd_weight_instance"
"device_grouped_conv3d_bwd_weight_bilinear_instance"
"device_grouped_conv3d_bwd_weight_scale_instance"
];
requiredSystemFeatures = [ "big-parallel" ];
};
grouped_conv_fwd_3d = {
targets = [
"device_grouped_conv3d_fwd_instance"
"device_grouped_conv3d_fwd_bilinear_instance"
"device_grouped_conv3d_fwd_convinvscale_instance"
"device_grouped_conv3d_fwd_convscale_instance"
"device_grouped_conv3d_fwd_convscale_add_instance"
"device_grouped_conv3d_fwd_convscale_relu_instance"
"device_grouped_conv3d_fwd_dynamic_op_instance"
"device_grouped_conv3d_fwd_scale_instance"
"device_grouped_conv3d_fwd_scaleadd_ab_instance"
"device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance"
];
requiredSystemFeatures = [ "big-parallel" ];
};
batched_gemm = {
targets = [
"device_batched_gemm_instance"
"device_batched_gemm_add_relu_gemm_add_instance"
"device_batched_gemm_bias_permute_instance"
"device_batched_gemm_gemm_instance"
"device_batched_gemm_reduce_instance"
"device_batched_gemm_softmax_gemm_instance"
"device_batched_gemm_softmax_gemm_permute_instance"
"device_grouped_gemm_instance"
"device_grouped_gemm_bias_instance"
"device_grouped_gemm_fastgelu_instance"
"device_grouped_gemm_fixed_nk_instance"
"device_grouped_gemm_fixed_nk_multi_abd_instance"
"device_grouped_gemm_tile_loop_instance"
];
requiredSystemFeatures = [ "big-parallel" ];
};
gemm_universal = {
targets = [
"device_gemm_universal_instance"
"device_gemm_universal_batched_instance"
"device_gemm_universal_reduce_instance"
"device_gemm_universal_streamk_instance"
];
requiredSystemFeatures = [ "big-parallel" ];
extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ];
};
gemm_other = {
targets = [
"device_gemm_instance"
"device_gemm_ab_scale_instance"
"device_gemm_add_instance"
"device_gemm_add_add_fastgelu_instance"
"device_gemm_add_fastgelu_instance"
"device_gemm_add_multiply_instance"
"device_gemm_add_relu_instance"
"device_gemm_add_relu_add_layernorm_instance"
"device_gemm_add_silu_instance"
"device_gemm_bias_add_reduce_instance"
"device_gemm_bilinear_instance"
"device_gemm_fastgelu_instance"
"device_gemm_multi_abd_instance"
"device_gemm_multiply_add_instance"
"device_gemm_reduce_instance"
"device_gemm_splitk_instance"
"device_gemm_streamk_instance"
];
requiredSystemFeatures = [ "big-parallel" ];
};
conv = {
targets = [
"device_conv1d_bwd_data_instance"
"device_conv2d_bwd_data_instance"
"device_conv2d_fwd_instance"
"device_conv2d_fwd_bias_relu_instance"
"device_conv2d_fwd_bias_relu_add_instance"
"device_conv3d_bwd_data_instance"
];
requiredSystemFeatures = [ "big-parallel" ];
};
pool = {
targets = [
"device_avg_pool2d_bwd_instance"
"device_avg_pool3d_bwd_instance"
"device_pool2d_fwd_instance"
"device_pool3d_fwd_instance"
"device_max_pool_bwd_instance"
];
};
other1 = {
targets = [
"device_batchnorm_instance"
"device_contraction_bilinear_instance"
"device_contraction_scale_instance"
"device_elementwise_instance"
"device_elementwise_normalization_instance"
"device_normalization_bwd_data_instance"
"device_normalization_bwd_gamma_beta_instance"
"device_normalization_fwd_instance"
];
requiredSystemFeatures = [ "big-parallel" ];
};
other2 = {
targets = [
"device_column_to_image_instance"
"device_image_to_column_instance"
"device_permute_scale_instance"
"device_quantization_instance"
"device_reduce_instance"
"device_softmax_instance"
"device_transpose_instance"
];
requiredSystemFeatures = [ "big-parallel" ];
};
};
tensorOpBuilder =
{
part,
targets,
extraCmakeFlags ? [ ],
requiredSystemFeatures ? [ ],
}:
composable_kernel_base.overrideAttrs (old: {
inherit requiredSystemFeatures;
pname = "composable_kernel${clr.gpuArchSuffix}-${part}";
makeTargets = targets;
preBuild = ''
echo "Building ${part}"
makeFlagsArray+=($makeTargets)
substituteInPlace Makefile \
--replace-fail '.NOTPARALLEL:' ""
'';
# Compile parallelism adjusted based on available RAM
# Never uses less than NIX_BUILD_CORES/4, never uses more than NIX_BUILD_CORES
# CK uses an unusually high amount of memory per core in the build step
# Nix/nixpkgs doesn't really have any infra to tell it that this build is unusually memory hungry
# So, bodge. Otherwise you end up having to build all of ROCm with a low core limit when
# it's only this package that has trouble.
preConfigure =
old.preConfigure or ""
+ ''
MEM_GB_TOTAL=$(awk '/MemTotal/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo)
MEM_GB_AVAILABLE=$(awk '/MemAvailable/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo)
APPX_GB=$((MEM_GB_AVAILABLE > MEM_GB_TOTAL ? MEM_GB_TOTAL : MEM_GB_AVAILABLE))
MAX_CORES=$((1 + APPX_GB/3))
MAX_CORES=$((MAX_CORES < NIX_BUILD_CORES/3 ? NIX_BUILD_CORES/3 : MAX_CORES))
export NIX_BUILD_CORES="$((NIX_BUILD_CORES > MAX_CORES ? MAX_CORES : NIX_BUILD_CORES))"
echo "Picked new core limit NIX_BUILD_CORES=$NIX_BUILD_CORES based on available mem: $APPX_GB GB"
cmakeFlagsArray+=(
"-DCK_PARALLEL_COMPILE_JOBS=$NIX_BUILD_CORES"
)
'';
cmakeFlags = old.cmakeFlags ++ extraCmakeFlags;
# Early exit after build phase with success, skips fixups etc
# Will get copied back into /build of the final CK
postBuild = ''
find . -name "*.o" -type f | while read -r file; do
mkdir -p "$out/$(dirname "$file")"
cp --reflink=auto "$file" "$out/$file"
done
exit 0
'';
meta = old.meta // {
broken = false;
};
});
composable_kernel_parts = builtins.mapAttrs (
part: targets: tensorOpBuilder (targets // { inherit part; })
) parts;
in
composable_kernel_base.overrideAttrs (
finalAttrs: old: {
pname = "composable_kernel${clr.gpuArchSuffix}";
parts_dirs = builtins.attrValues composable_kernel_parts;
disallowedReferences = builtins.attrValues composable_kernel_parts;
preBuild = ''
for dir in $parts_dirs; do
find "$dir" -type f -name "*.o" | while read -r file; do
# Extract the relative path by removing the output directory prefix
rel_path="''${file#"$dir/"}"
# Create parent directory if it doesn't exist
mkdir -p "$(dirname "$rel_path")"
# Copy the file back to its original location, give it a future timestamp
# so make treats it as up to date
cp --reflink=auto --no-preserve=all "$file" "$rel_path"
touch -d "now +10 hours" "$rel_path"
done
done
'';
passthru = old.passthru // {
parts = composable_kernel_parts;
};
meta = old.meta // {
# Builds which don't don't target any gfx9 cause cmake errors in dependent projects
broken = !finalAttrs.passthru.anyGfx9Target;
};
}
)

View File

@@ -0,0 +1,513 @@
{
lib,
config,
callPackage,
newScope,
recurseIntoAttrs,
symlinkJoin,
fetchFromGitHub,
ffmpeg_4,
boost179,
opencv,
libjpeg_turbo,
python3Packages,
triton-llvm,
openmpi,
rocmGpuArches ? [ ],
}:
let
outer = lib.makeScope newScope (
self:
let
inherit (self) llvm;
pyPackages = python3Packages;
openmpi-orig = openmpi;
in
{
inherit rocmGpuArches;
buildTests = false;
buildBenchmarks = false;
stdenv = llvm.rocmClangStdenv;
rocmPath = self.callPackage ./rocm-path { };
rocmUpdateScript = self.callPackage ./update.nix { };
## ROCm ##
llvm = recurseIntoAttrs (
callPackage ./llvm/default.nix {
inherit (self) rocm-device-libs rocm-runtime;
}
);
inherit (self.llvm) rocm-merged-llvm clang openmp;
rocm-core = self.callPackage ./rocm-core { };
amdsmi = pyPackages.callPackage ./amdsmi {
inherit (self) rocmUpdateScript;
};
rocm-cmake = self.callPackage ./rocm-cmake { };
rocm-smi = pyPackages.callPackage ./rocm-smi {
inherit (self) rocmUpdateScript;
};
rocm-device-libs = self.callPackage ./rocm-device-libs {
inherit (llvm) rocm-merged-llvm;
};
rocm-runtime = self.callPackage ./rocm-runtime {
inherit (llvm) rocm-merged-llvm;
};
rocm-comgr = self.callPackage ./rocm-comgr {
inherit (llvm) rocm-merged-llvm;
};
rocminfo = self.callPackage ./rocminfo { };
# Unfree
hsa-amd-aqlprofile-bin = self.callPackage ./hsa-amd-aqlprofile-bin { };
rdc = self.callPackage ./rdc { };
rocm-docs-core = python3Packages.callPackage ./rocm-docs-core { };
hip-common = self.callPackage ./hip-common { };
# Eventually will be in the LLVM repo
hipcc = self.callPackage ./hipcc {
inherit (llvm) rocm-merged-llvm;
};
# Replaces hip, opencl-runtime, and rocclr
clr = self.callPackage ./clr { };
aotriton = self.callPackage ./aotriton { };
hipify = self.callPackage ./hipify {
inherit (llvm)
clang
rocm-merged-llvm
;
};
# hsakmt was merged into rocm-runtime
hsakmt = self.rocm-runtime;
rocprofiler = self.callPackage ./rocprofiler {
inherit (llvm) clang;
};
rocprofiler-register = self.callPackage ./rocprofiler-register {
inherit (llvm) clang;
};
# Needs GCC
roctracer = self.callPackage ./roctracer { };
rocgdb = self.callPackage ./rocgdb { };
rocdbgapi = self.callPackage ./rocdbgapi { };
rocr-debug-agent = self.callPackage ./rocr-debug-agent { };
rocprim = self.callPackage ./rocprim { };
rocsparse = self.callPackage ./rocsparse { };
rocthrust = self.callPackage ./rocthrust { };
rocrand = self.callPackage ./rocrand { };
hiprand = self.callPackage ./hiprand { };
rocfft = self.callPackage ./rocfft { };
mscclpp = self.callPackage ./mscclpp { };
rccl = self.callPackage ./rccl { };
# RCCL with sanitizers and tests
# Can't have with sanitizer build as dep of other packages without
# runtime crashes due to ASAN not loading first
rccl-tests = self.callPackage ./rccl {
buildTests = true;
};
hipcub = self.callPackage ./hipcub { };
hipsparse = self.callPackage ./hipsparse { };
hipfort = self.callPackage ./hipfort { };
hipfft = self.callPackage ./hipfft { };
hiprt = self.callPackage ./hiprt { };
tensile = pyPackages.callPackage ./tensile {
inherit (self)
rocmUpdateScript
clr
;
};
rocblas = self.callPackage ./rocblas {
buildTests = true;
buildBenchmarks = true;
inherit (self) roctracer;
};
rocsolver = self.callPackage ./rocsolver { };
rocwmma = self.callPackage ./rocwmma { };
rocalution = self.callPackage ./rocalution { };
rocmlir-rock = self.callPackage ./rocmlir {
buildRockCompiler = true;
};
rocmlir = self.rocmlir-rock;
hipsolver = self.callPackage ./hipsolver { };
hipblas-common = self.callPackage ./hipblas-common { };
hipblas = self.callPackage ./hipblas { };
hipblaslt = self.callPackage ./hipblaslt { };
# hipTensor - Only supports GFX9
composable_kernel_base = self.callPackage ./composable_kernel/base.nix { };
composable_kernel = self.callPackage ./composable_kernel { };
ck4inductor = pyPackages.callPackage ./composable_kernel/ck4inductor.nix {
inherit (self) composable_kernel;
inherit (llvm) rocm-merged-llvm;
};
half = self.callPackage ./half { };
miopen = self.callPackage ./miopen {
boost = boost179.override { enableStatic = true; };
};
miopen-hip = self.miopen;
migraphx = self.callPackage ./migraphx { };
rpp = self.callPackage ./rpp { };
rpp-hip = self.rpp.override {
useOpenCL = false;
useCPU = false;
};
rpp-opencl = self.rpp.override {
useOpenCL = true;
useCPU = false;
};
rpp-cpu = self.rpp.override {
useOpenCL = false;
useCPU = true;
};
mivisionx = self.callPackage ./mivisionx {
opencv = opencv.override { enablePython = true; };
# TODO: Remove this pin in ROCm 6.4+
# FFMPEG support was improved in https://github.com/ROCm/MIVisionX/pull/1460
ffmpeg = ffmpeg_4;
# Unfortunately, rocAL needs a custom libjpeg-turbo until further notice
# See: https://github.com/ROCm/MIVisionX/issues/1051
libjpeg_turbo = libjpeg_turbo.overrideAttrs {
version = "2.0.6.1";
src = fetchFromGitHub {
owner = "rrawther";
repo = "libjpeg-turbo";
rev = "640d7ee1917fcd3b6a5271aa6cf4576bccc7c5fb";
sha256 = "sha256-T52whJ7nZi8jerJaZtYInC2YDN0QM+9tUDqiNr6IsNY=";
};
# overwrite all patches, since patches for newer version do not apply
patches = [ ./0001-Compile-transupp.c-as-part-of-the-library.patch ];
};
};
mivisionx-hip = self.mivisionx.override {
rpp = self.rpp-hip;
useOpenCL = false;
useCPU = false;
};
mivisionx-cpu = self.mivisionx.override {
rpp = self.rpp-cpu;
useOpenCL = false;
useCPU = true;
};
# Even if config.rocmSupport is false we need rocmSupport true
# version of ucc/ucx in openmpi in this package set
openmpi = openmpi-orig.override (
prev:
let
ucx = prev.ucx.override {
enableCuda = false;
enableRocm = true;
};
in
{
inherit ucx;
ucc = prev.ucc.override {
enableCuda = false;
inherit ucx;
};
}
);
mpi = self.openmpi;
triton-llvm = triton-llvm.overrideAttrs {
src = fetchFromGitHub {
owner = "llvm";
repo = "llvm-project";
# make sure this matches triton llvm rel branch hash for now
# https://github.com/triton-lang/triton/blob/release/3.2.x/cmake/llvm-hash.txt
rev = "86b69c31642e98f8357df62c09d118ad1da4e16a";
hash = "sha256-W/mQwaLGx6/rIBjdzUTIbWrvGjdh7m4s15f70fQ1/hE=";
};
pname = "triton-llvm-rocm";
patches = [ ]; # FIXME: https://github.com/llvm/llvm-project//commit/84837e3cc1cf17ed71580e3ea38299ed2bfaa5f6.patch doesn't apply, may need to rebase
};
triton = pyPackages.callPackage ./triton { rocmPackages = self; };
## Meta ##
# Emulate common ROCm meta layout
# These are mainly for users. I strongly suggest NOT using these in nixpkgs derivations
# Don't put these into `propagatedBuildInputs` unless you want PATH/PYTHONPATH issues!
# See: https://rocm.docs.amd.com/en/docs-5.7.1/_images/image.004.png
# See: https://rocm.docs.amd.com/en/docs-5.7.1/deploy/linux/os-native/package_manager_integration.html
meta = with self; rec {
rocm-developer-tools = symlinkJoin {
name = "rocm-developer-tools-meta";
paths = [
hsa-amd-aqlprofile-bin
rocm-core
rocr-debug-agent
roctracer
rocdbgapi
rocprofiler
rocgdb
rocm-language-runtime
];
};
rocm-ml-sdk = symlinkJoin {
name = "rocm-ml-sdk-meta";
paths = [
rocm-core
miopen-hip
rocm-hip-sdk
rocm-ml-libraries
];
};
rocm-ml-libraries = symlinkJoin {
name = "rocm-ml-libraries-meta";
paths = [
llvm.clang
llvm.mlir
llvm.openmp
rocm-core
miopen-hip
rocm-hip-libraries
];
};
rocm-hip-sdk = symlinkJoin {
name = "rocm-hip-sdk-meta";
paths = [
rocprim
rocalution
hipfft
rocm-core
hipcub
hipblas
hipblaslt
rocrand
rocfft
hiprt
rocsparse
rccl
rocthrust
rocblas
hipsparse
hipfort
rocwmma
hipsolver
rocsolver
rocm-hip-libraries
rocm-hip-runtime-devel
];
};
rocm-hip-libraries = symlinkJoin {
name = "rocm-hip-libraries-meta";
paths = [
rocblas
hipfort
rocm-core
rocsolver
rocalution
rocrand
hipblas
hipblaslt
rocfft
hipfft
hiprt
rccl
rocsparse
hipsparse
hipsolver
rocm-hip-runtime
];
};
rocm-openmp-sdk = symlinkJoin {
name = "rocm-openmp-sdk-meta";
paths = [
rocm-core
llvm.clang
llvm.mlir
llvm.openmp # openmp-extras-devel (https://github.com/ROCm/aomp)
rocm-language-runtime
];
};
rocm-opencl-sdk = symlinkJoin {
name = "rocm-opencl-sdk-meta";
paths = [
rocm-core
rocm-runtime
clr
clr.icd
rocm-opencl-runtime
];
};
rocm-opencl-runtime = symlinkJoin {
name = "rocm-opencl-runtime-meta";
paths = [
rocm-core
clr
clr.icd
rocm-language-runtime
];
};
rocm-hip-runtime-devel = symlinkJoin {
name = "rocm-hip-runtime-devel-meta";
paths = [
clr
rocm-core
hipify
rocm-cmake
llvm.clang
llvm.mlir
llvm.openmp
rocm-runtime
rocm-hip-runtime
];
};
rocm-hip-runtime = symlinkJoin {
name = "rocm-hip-runtime-meta";
paths = [
rocm-core
rocminfo
clr
rocm-language-runtime
];
};
rocm-language-runtime = symlinkJoin {
name = "rocm-language-runtime-meta";
paths = [
rocm-runtime
rocm-core
rocm-comgr
llvm.openmp # openmp-extras-runtime (https://github.com/ROCm/aomp)
];
};
rocm-all = symlinkJoin {
name = "rocm-all-meta";
paths = [
rocm-developer-tools
rocm-ml-sdk
rocm-ml-libraries
rocm-hip-sdk
rocm-hip-libraries
rocm-openmp-sdk
rocm-opencl-sdk
rocm-opencl-runtime
rocm-hip-runtime-devel
rocm-hip-runtime
rocm-language-runtime
];
};
};
rocm-tests = self.callPackage ./rocm-tests {
rocmPackages = self;
};
}
// lib.optionalAttrs config.allowAliases {
rocm-thunk = throw ''
'rocm-thunk' has been removed. It's now part of the ROCm runtime.
''; # Added 2025-3-16
clang-ocl = throw ''
'clang-ocl' has been deprecated upstream. Use ROCm's clang directly.
''; # Added 2025-3-16
miopengemm = throw ''
'miopengemm' has been deprecated.
''; # Added 2024-3-3
miopen-opencl = throw ''
'miopen-opencl' has been deprecated.
''; # Added 2024-3-3
mivisionx-opencl = throw ''
'mivisionx-opencl' has been deprecated.
Other versions of mivisionx are still available.
''; # Added 2024-3-24
}
);
scopeForArches =
arches:
outer.overrideScope (
_final: prev: {
clr = prev.clr.override {
localGpuTargets = arches;
};
}
);
in
outer
// builtins.listToAttrs (
builtins.map (arch: {
name = arch;
value = scopeForArches [ arch ];
}) outer.clr.gpuTargets
)
// {
gfx9 = scopeForArches [
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
];
gfx10 = scopeForArches [
"gfx1010"
"gfx1030"
];
gfx11 = scopeForArches [
"gfx1100"
"gfx1101"
"gfx1102"
];
gfx12 = scopeForArches [
"gfx1201"
];
}

View File

@@ -0,0 +1,39 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "half";
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "half";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-H8Ogm4nxaxDB0WHx+KhRjUO3vzp3AwCqrIQ6k8R+xkc=";
};
nativeBuildInputs = [
cmake
rocm-cmake
];
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "C++ library for half precision floating point arithmetics";
homepage = "https://github.com/ROCm/half";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.unix;
};
})

View File

@@ -0,0 +1,45 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hip-common";
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "HIP";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-2Iekju0t12c6iiqb104j+Lh53FvZwyqYtST12RkkuKc=";
};
dontConfigure = true;
dontBuild = true;
installPhase = ''
runHook preInstall
mkdir -p $out
mv * $out
runHook postInstall
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "C++ Heterogeneous-Compute Interface for Portability";
homepage = "https://github.com/ROCm/HIP";
license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,42 @@
{
lib,
stdenv,
cmake,
fetchFromGitHub,
rocm-cmake,
rocmUpdateScript,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hipblas-common";
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipBLAS-common";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-eTwoAXH2HGdSAOLTZHJUFHF+c2wWHixqeMqr60KxJrc=";
};
nativeBuildInputs = [
cmake
];
buildInputs = [
rocm-cmake
];
strictDeps = true;
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Common files shared by hipBLAS and hipBLASLt";
homepage = "https://github.com/ROCm/hipBLASlt";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,140 @@
{
lib,
stdenv,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
gfortran,
hipblas-common,
rocblas,
rocsolver,
rocsparse,
rocprim,
gtest,
lapack-reference,
buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
}:
# Can also use cuBLAS
stdenv.mkDerivation (finalAttrs: {
pname = "hipblas";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
]
++ lib.optionals buildSamples [
"sample"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipBLAS";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-lQv8Ik6+0ldqyeJ05CSGB0309nIpzlRL3CRYeQxVfd0=";
};
patches = [
# https://github.com/ROCm/hipBLAS/pull/952
# (fetchpatch {
# name = "transitively-depend-hipblas-common.patch";
# url = "https://github.com/ROCm/hipBLAS/commit/54220fdaebf0fb4fd0921ee9e418ace5b143ec8f.patch";
# hash = "sha256-MFEhv8Bkrd2zD0FFIDg9oJzO7ztdyMAF+R9oYA0rmwQ=";
# })
];
postPatch = ''
substituteInPlace library/CMakeLists.txt \
--replace-fail "find_package(Git REQUIRED)" ""
'';
nativeBuildInputs = [
cmake
rocm-cmake
clr
gfortran
];
propagatedBuildInputs = [ hipblas-common ];
buildInputs =
[
rocblas
rocprim
rocsparse
rocsolver
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals (buildTests || buildBenchmarks) [
lapack-reference
];
cmakeFlags =
[
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_CXX_COMPILER=${lib.getExe' clr "hipcc"}"
# Upstream is migrating to amdclang++, it is likely this will be correct in next version bump
#"-DCMAKE_CXX_COMPILER=${lib.getBin clr}/bin/amdclang++"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DAMDGPU_TARGETS=${rocblas.amdgpu_targets}"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_BENCHMARKS=ON"
]
++ lib.optionals buildSamples [
"-DBUILD_CLIENTS_SAMPLES=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/hipblas-test $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/hipblas-bench $benchmark/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv $out/bin/example-* $sample/bin
''
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "ROCm BLAS marshalling library";
homepage = "https://github.com/ROCm/hipBLAS";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,235 @@
{
lib,
stdenv,
fetchpatch,
fetchFromGitHub,
cmake,
rocm-cmake,
clr,
gfortran,
gtest,
msgpack,
libxml2,
python3,
python3Packages,
openmp,
hipblas-common,
tensile,
lapack-reference,
ncurses,
libffi,
zlib,
zstd,
rocmUpdateScript,
buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
# hipblaslt supports only devices with MFMA or WMMA
# WMMA on gfx1100 may be broken
# MFMA on MI100 may be broken
# MI200/MI300 known to work
gpuTargets ? (
clr.localGpuTargets or [
# "gfx908" FIXME: confirm MFMA on MI100 works
"gfx90a"
"gfx942"
# "gfx1100" FIXME: confirm WMMA targets work
]
),
}:
stdenv.mkDerivation (
finalAttrs:
let
supportsTargetArches =
(builtins.any (lib.strings.hasPrefix "gfx9") gpuTargets)
|| (builtins.any (lib.strings.hasPrefix "gfx11") gpuTargets);
tensile' = (tensile.override { isTensileLite = true; }).overrideAttrs {
inherit (finalAttrs) src;
sourceRoot = "${finalAttrs.src.name}/tensilelite";
};
py = python3.withPackages (ps: [
ps.pyyaml
ps.setuptools
ps.packaging
]);
gpuTargets' = lib.optionalString supportsTargetArches (lib.concatStringsSep ";" gpuTargets);
compiler = "amdclang++";
cFlags = "-O3 -I${msgpack}/include"; # FIXME: cmake files need patched to include this properly
in
{
pname = "hipblaslt${clr.gpuArchSuffix}";
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipBLASLt";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-ojNa3jt5285gsPwo4icATJD9JdxmbJBjfCF4A1ttCQ4=";
};
env.CXX = compiler;
env.CFLAGS = cFlags;
env.CXXFLAGS = cFlags;
env.ROCM_PATH = "${clr}";
env.TENSILE_ROCM_ASSEMBLER_PATH = lib.getExe' clr "amdclang++";
env.TENSILE_GEN_ASSEMBLY_TOOLCHAIN = lib.getExe' clr "amdclang++";
# Some tensile scripts look for this as an env var rather than a cmake flag
env.CMAKE_CXX_COMPILER = lib.getExe' clr "amdclang++";
requiredSystemFeatures = [ "big-parallel" ];
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
]
++ lib.optionals buildSamples [
"sample"
];
postPatch = ''
mkdir -p build/Tensile/library
# git isn't needed and we have no .git
substituteInPlace cmake/Dependencies.cmake \
--replace-fail "find_package(Git REQUIRED)" ""
substituteInPlace CMakeLists.txt \
--replace-fail "include(virtualenv)" "" \
--replace-fail "virtualenv_install(\''${Tensile_TEST_LOCAL_PATH})" "" \
--replace-fail "virtualenv_install(\''${CMAKE_SOURCE_DIR}/tensilelite)" "" \
--replace-fail 'find_package(Tensile 4.33.0 EXACT REQUIRED HIP LLVM OpenMP PATHS "''${INSTALLED_TENSILE_PATH}")' "find_package(Tensile)" \
--replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
# FIXME: TensileCreateExtOpLibraries build failure due to unsupported null operand
# Working around for now by disabling the ExtOp libs
substituteInPlace library/src/amd_detail/rocblaslt/src/CMakeLists.txt \
--replace-fail 'TensileCreateExtOpLibraries("' '# skipping TensileCreateExtOpLibraries'
substituteInPlace library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh \
--replace-fail '${"\${rocm_path}"}/bin/' ""
'';
# Apply patches to allow building without a target arch if we need to do that
patches = lib.optionals (!supportsTargetArches) [
# Add ability to build without specitying any arch.
(fetchpatch {
sha256 = "sha256-VW3bPzmQvfo8+iKsVfpn4sbqAe41fLzCEUfBh9JxVyk=";
url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.1.1-no-arch.patch";
})
# Followup to above patch for 6.3.x
(fetchpatch {
sha256 = "sha256-GCsrne6BiWzwj8TMAfFuaYz1Pij97hoCc6E3qJhWb10=";
url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.3.0-no-arch-extra.patch";
})
];
doCheck = false;
doInstallCheck = false;
nativeBuildInputs = [
cmake
rocm-cmake
py
clr
gfortran
# need make to get streaming console output so nix knows build is still running
# so deliberately not using ninja
];
buildInputs =
[
hipblas-common
tensile'
openmp
libffi
ncurses
# Tensile deps - not optional, building without tensile isn't actually supported
msgpack # FIXME: not included in cmake!
libxml2
python3Packages.msgpack
python3Packages.joblib
zlib
zstd
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals (buildTests || buildBenchmarks) [
lapack-reference
];
cmakeFlags =
[
"-Wno-dev"
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_VERBOSE_MAKEFILE=ON"
"-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}"
"-DTENSILE_USE_HIP=ON"
"-DTENSILE_BUILD_CLIENT=OFF"
"-DTENSILE_USE_FLOAT16_BUILTIN=ON"
"-DCMAKE_CXX_COMPILER=${compiler}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DHIPBLASLT_ENABLE_MARKER=Off"
# FIXME what are the implications of hardcoding this?
"-DTensile_CODE_OBJECT_VERSION=V5"
"-DTensile_COMPILER=${compiler}"
"-DAMDGPU_TARGETS=${gpuTargets'}"
"-DGPU_TARGETS=${gpuTargets'}"
"-DTensile_LIBRARY_FORMAT=msgpack"
]
++ lib.optionals (!supportsTargetArches) [
"-DBUILD_WITH_TENSILE=OFF"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_BENCHMARKS=ON"
]
++ lib.optionals buildSamples [
"-DBUILD_CLIENTS_SAMPLES=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/hipblas-test $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/hipblas-bench $benchmark/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv $out/bin/example-* $sample/bin
''
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
rmdir $out/bin
'';
# If this is false there are no kernels in the output lib
# and it's useless at runtime
# so if it's an optional dep it's best to not depend on it
# Some packages like torch need hipblaslt to compile
# and are fine ignoring it at runtime if it's not supported
# so we have to support building an empty hipblaslt
passthru.supportsTargetArches = supportsTargetArches;
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner repo;
};
passthru.tensilelite = tensile';
meta = with lib; {
description = "hipBLASLt is a library that provides general matrix-matrix operations with a flexible API";
homepage = "https://github.com/ROCm/hipBLASlt";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
}
)

View File

@@ -0,0 +1,39 @@
From f259eca77c592813e11752a46c4e1f9a74c64091 Mon Sep 17 00:00:00 2001
From: Luna Nova <git@lunnova.dev>
Date: Fri, 11 Oct 2024 02:56:22 -0700
Subject: [PATCH] [hipcc] Remove extra definition of hipBinUtilPtr_ in derived
platforms
Fixes UB when hipBinUtilPtr_ is used.
---
amd/hipcc/src/hipBin_amd.h | 1 -
amd/hipcc/src/hipBin_nvidia.h | 1 -
2 files changed, 2 deletions(-)
diff --git a/amd/hipcc/src/hipBin_amd.h b/amd/hipcc/src/hipBin_amd.h
index 0a782d1beab9..36cd625ae8bc 100644
--- a/src/hipBin_amd.h
+++ b/src/hipBin_amd.h
@@ -42,7 +42,6 @@ THE SOFTWARE.
class HipBinAmd : public HipBinBase {
private:
- HipBinUtil* hipBinUtilPtr_;
string hipClangPath_ = "";
string roccmPathEnv_, hipRocclrPathEnv_, hsaPathEnv_;
PlatformInfo platformInfoAMD_;
diff --git a/amd/hipcc/src/hipBin_nvidia.h b/amd/hipcc/src/hipBin_nvidia.h
index ff142cc1cea2..09b7b80979c7 100644
--- a/src/hipBin_nvidia.h
+++ b/src/hipBin_nvidia.h
@@ -31,7 +31,6 @@ THE SOFTWARE.
class HipBinNvidia : public HipBinBase {
private:
- HipBinUtil* hipBinUtilPtr_;
string cudaPath_ = "";
PlatformInfo platformInfoNV_;
string hipCFlags_, hipCXXFlags_, hipLdFlags_;
--
2.46.0

View File

@@ -0,0 +1,47 @@
{
lib,
stdenv,
rocm-merged-llvm,
cmake,
lsb-release,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hipcc";
# In-tree with ROCm LLVM
inherit (rocm-merged-llvm) version;
src = rocm-merged-llvm.llvm-src;
sourceRoot = "${finalAttrs.src.name}/amd/hipcc";
nativeBuildInputs = [ cmake ];
buildInputs = [ rocm-merged-llvm ];
patches = [
# https://github.com/ROCm/llvm-project/pull/183
# Fixes always-invoked UB in hipcc
./0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch
];
postPatch = ''
substituteInPlace src/hipBin_amd.h \
--replace-fail "/usr/bin/lsb_release" "${lsb-release}/bin/lsb_release"
'';
cmakeFlags = [
"-DCMAKE_BUILD_TYPE=Release"
];
postInstall = ''
rm -r $out/hip/bin
ln -s $out/bin $out/hip/bin
'';
meta = with lib; {
description = "Compiler driver utility that calls clang or nvcc";
homepage = "https://github.com/ROCm/HIPCC";
license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,102 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocprim,
clr,
gtest,
gbenchmark,
buildTests ? false,
buildBenchmarks ? false,
gpuTargets ? [ ],
}:
# CUB can also be used as a backend instead of rocPRIM.
stdenv.mkDerivation (finalAttrs: {
pname = "hipcub";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipCUB";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-pwCAsRx5XyuCvppTmZ4VG83iYl9ilAQCZds4oKINhSI=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
];
buildInputs =
[
rocprim
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals buildBenchmarks [
gbenchmark
];
cmakeFlags =
[
"-DHIP_ROOT_DIR=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_TEST=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARK=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/benchmark_* $benchmark/bin
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Thin wrapper library on top of rocPRIM or CUB";
homepage = "https://github.com/ROCm/hipCUB";
license = with licenses; [ bsd3 ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,125 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
git,
rocfft,
gtest,
boost,
fftw,
fftwFloat,
openmp,
buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
gpuTargets ? [ ],
}:
# Can also use cuFFT
stdenv.mkDerivation (finalAttrs: {
pname = "hipfft";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
]
++ lib.optionals buildSamples [
"sample"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipFFT";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-VA9OC/TvvQyFCVVox/9ihuE2W0Ia87O0R3YsLM4Jzuk=";
fetchSubmodules = true;
};
nativeBuildInputs = [
clr
git
cmake
rocm-cmake
];
buildInputs =
[
rocfft
]
++ lib.optionals (buildTests || buildBenchmarks || buildSamples) [
gtest
boost
fftw
fftwFloat
openmp
];
cmakeFlags =
[
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
"-DHIP_ROOT_DIR=${clr}"
"-DHIP_PATH=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_RIDER=ON"
]
++ lib.optionals buildSamples [
"-DBUILD_CLIENTS_SAMPLES=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/hipfft-test $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/hipfft-rider $benchmark/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv clients/staging/hipfft_* $sample/bin
patchelf $sample/bin/hipfft_* --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE"
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "FFT marshalling library";
homepage = "https://github.com/ROCm/hipFFT";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,66 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
gfortran,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hipfort";
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipfort";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-a2YPGAWP+gF2EykpKmkG/fEIW6blm2ChOybmLAHQQdw=";
};
nativeBuildInputs = [
cmake
rocm-cmake
gfortran
];
cmakeFlags = [
"-DHIPFORT_COMPILER=${gfortran}/bin/gfortran"
"-DHIPFORT_AR=${gfortran.cc}/bin/gcc-ar"
"-DHIPFORT_RANLIB=${gfortran.cc}/bin/gcc-ranlib"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
postPatch = ''
patchShebangs bin
substituteInPlace bin/hipfc bin/mymcpu \
--replace "/bin/cat" "cat"
substituteInPlace bin/CMakeLists.txt \
--replace "/bin/mkdir" "mkdir" \
--replace "/bin/cp" "cp" \
--replace "/bin/sed" "sed" \
--replace "/bin/chmod" "chmod" \
--replace "/bin/ln" "ln"
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Fortran interfaces for ROCm libraries";
homepage = "https://github.com/ROCm/hipfort";
license = with licenses; [ mit ]; # mitx11
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,64 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
clang,
libxml2,
rocm-merged-llvm,
zlib,
zstd,
perl,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hipify";
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "HIPIFY";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-uj25WmGCpwouS1yzW9Oil5Vyrbyj5yRITvWF9WaGozM=";
};
nativeBuildInputs = [
cmake
];
buildInputs = [
libxml2
rocm-merged-llvm
zlib
zstd
perl
];
postPatch = ''
substituteInPlace CMakeLists.txt \
--replace "\''${LLVM_TOOLS_BINARY_DIR}/clang" "${clang}/bin/clang"
chmod +x bin/*
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
postInstall = ''
chmod +x $out/bin/*
chmod +x $out/libexec/*
patchShebangs $out/bin/
patchShebangs $out/libexec/
'';
meta = with lib; {
description = "Convert CUDA to Portable C++ Code";
homepage = "https://github.com/ROCm/HIPIFY";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,79 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
rocrand,
gtest,
buildTests ? false,
gpuTargets ? [ ],
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hiprand";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipRAND";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-ISl4bVW/JvT81gJ/10JlKliv7ds5WtP2f/Dnc9qvh9Q=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
];
buildInputs = [ rocrand ] ++ (lib.optionals buildTests [ gtest ]);
cmakeFlags =
[
"-DHIP_ROOT_DIR=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_TEST=ON"
];
postInstall = lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
rm -r $out/bin/hipRAND
# Fail if bin/ isn't actually empty
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "HIP wrapper for rocRAND and cuRAND";
homepage = "https://github.com/ROCm/hipRAND";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,60 @@
{
lib,
stdenv,
fetchFromGitHub,
cmake,
clr,
gcc,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hiprt";
version = "2.5.a21e075.3";
src = fetchFromGitHub {
owner = "GPUOpen-LibrariesAndSDKs";
repo = "HIPRT";
tag = finalAttrs.version;
sha256 = "sha256-3yGhwIsFHlFMCEzuYnXuXNzs99m7f2LTkYaTGs0GEcI=";
};
postPatch = ''
g++ contrib/easy-encryption/cl.cpp -o contrib/easy-encryption/bin/linux/ee64 #replacing prebuilt binary
'';
nativeBuildInputs = [
gcc # required for replacing easy-encryption binary
cmake
clr
];
buildInputs = [
# TODO: do we need anything here?
];
cmakeFlags = [
#TODO: mostly copied from the Arch package, verify these:
"-D CMAKE_BUILD_TYPE=Release"
"-D HIP_PATH=${clr}"
"-D BAKE_KERNEL=OFF"
"-D BAKE_COMPILED_KERNEL=OFF"
"-D BITCODE=ON"
"-D PRECOMPILE=ON"
"-D NO_UNITTEST=ON"
"-D FORCE_DISABLE_CUDA=ON"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-D CMAKE_INSTALL_BINDIR=bin"
"-D CMAKE_INSTALL_LIBDIR=lib"
"-D CMAKE_INSTALL_INCLUDEDIR=include"
];
meta = {
homepage = "https://github.com/GPUOpen-LibrariesAndSDKs/HIPRT";
description = "";
license = lib.licenses.mit;
maintainers = with lib.maintainers; [
mksafavi
];
platforms = lib.platforms.linux;
};
})

View File

@@ -0,0 +1,119 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
gfortran,
rocblas,
rocsolver,
rocsparse,
suitesparse,
gtest,
lapack-reference,
buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
}:
# Can also use cuSOLVER
stdenv.mkDerivation (finalAttrs: {
pname = "hipsolver";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
]
++ lib.optionals buildSamples [
"sample"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipSOLVER";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-4ig8/P7JQCx3WB+PRHlhSlRhzdbnDo8QrFnWxsxJdwk=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
gfortran
];
buildInputs =
[
rocblas
rocsolver
rocsparse
suitesparse
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals (buildTests || buildBenchmarks) [
lapack-reference
];
cmakeFlags =
[
"-DCMAKE_CXX_COMPILER=hipcc"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DBUILD_WITH_SPARSE=OFF" # FIXME: broken - can't find suitesparse/cholmod, looks fixed in master
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_BENCHMARKS=ON"
]
++ lib.optionals buildSamples [
"-DBUILD_CLIENTS_SAMPLES=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/hipsolver-test $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/hipsolver-bench $benchmark/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv clients/staging/example-* $sample/bin
patchelf $sample/bin/example-* --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE"
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "ROCm SOLVER marshalling library";
homepage = "https://github.com/ROCm/hipSOLVER";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,152 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocsparse,
clr,
gfortran,
git,
gtest,
openmp,
buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
gpuTargets ? [ ],
}:
# This can also use cuSPARSE as a backend instead of rocSPARSE
stdenv.mkDerivation (finalAttrs: {
pname = "hipsparse";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildSamples [
"sample"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipSPARSE";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-pRR/3t7YXgtPQwGFb5lA6DI2OTF6AnDcfkydRIEod2Q=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
gfortran
];
buildInputs =
[
rocsparse
git
]
++ lib.optionals (buildTests || buildBenchmarks) [
gtest
]
++ lib.optionals (buildTests || buildSamples) [
openmp
];
cmakeFlags =
[
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
(lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests)
(lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks)
(lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildSamples)
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
];
# We have to manually generate the matrices
# CMAKE_MATRICES_DIR seems to be reset in clients/tests/CMakeLists.txt
postPatch = lib.optionalString buildTests ''
mkdir -p matrices
ln -s ${rocsparse.passthru.matrices.matrix-01}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-02}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-03}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-04}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-05}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-06}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-07}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-08}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-09}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-10}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-11}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-12}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-13}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-14}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-15}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-16}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-17}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-18}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-19}/*.mtx matrices
# Not used by the original cmake, causes an error
rm matrices/*_b.mtx
echo "deps/convert.cpp -> deps/mtx2csr"
hipcc deps/convert.cpp -O3 -o deps/mtx2csr
for mat in $(ls -1 matrices | cut -d "." -f 1); do
echo "mtx2csr: $mat.mtx -> $mat.bin"
deps/mtx2csr matrices/$mat.mtx matrices/$mat.bin
unlink matrices/$mat.mtx
done
substituteInPlace clients/tests/CMakeLists.txt \
--replace "\''${PROJECT_BINARY_DIR}/matrices" "/build/source/matrices"
'';
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/hipsparse-test $test/bin
mv /build/source/matrices $test
rmdir $out/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv clients/staging/example_* $sample/bin
patchelf --set-rpath $out/lib:${
lib.makeLibraryPath (
finalAttrs.buildInputs
++ [
clr
gfortran.cc
]
)
} $sample/bin/example_*
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "ROCm SPARSE marshalling library";
homepage = "https://github.com/ROCm/hipSPARSE";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,53 @@
{
lib,
stdenv,
fetchurl,
callPackage,
dpkg,
rocm-core,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hsa-amd-aqlprofile-bin";
version = "6.3.3";
src =
let
inherit (finalAttrs) version;
patch = rocm-core.ROCM_LIBPATCH_VERSION;
majorMinor = lib.versions.majorMinor version;
poolVersion = if majorMinor + ".0" == version then majorMinor else version;
incremental = "74";
osRelease = "22.04";
in
fetchurl {
url = "https://repo.radeon.com/rocm/apt/${poolVersion}/pool/main/h/hsa-amd-aqlprofile/hsa-amd-aqlprofile_1.0.0.${patch}-${incremental}~${osRelease}_amd64.deb";
hash = "sha256-Lo6gU9ywkujtsKvnOAwL3L8qQNPwjjm0Pm4OyzoUYao=";
};
nativeBuildInputs = [ dpkg ];
dontPatch = true;
dontConfigure = true;
dontBuild = true;
installPhase = ''
runHook preInstall
mkdir -p $out
cp -a opt/rocm-${finalAttrs.version}*/* $out
chmod +x $out/lib/libhsa-amd-aqlprofile64.so.1.*
chmod +x $out/lib/hsa-amd-aqlprofile/librocprofv2_att.so
runHook postInstall
'';
passthru.updateScript = (callPackage ./update.nix { }) { inherit (finalAttrs) version; };
meta = with lib; {
description = "AQLPROFILE library for AMD HSA runtime API extension support";
homepage = "https://rocm.docs.amd.com/en/latest/";
license = with licenses; [ unfree ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,61 @@
{
lib,
writeScript,
}:
{ version }:
let
prefix = "hsa-amd-aqlprofile";
extVersion = lib.strings.concatStrings (
lib.strings.intersperse "0" (lib.versions.splitVersion version)
);
major = lib.versions.major version;
minor = lib.versions.minor version;
patch = lib.versions.patch version;
updateScript = writeScript "update.sh" ''
#!/usr/bin/env nix-shell
#!nix-shell -i bash -p curl common-updater-scripts
apt="https://repo.radeon.com/rocm/apt"
pool="pool/main/h/${prefix}/"
url="$apt/latest/$pool"
res="$(curl -sL "$url")"
deb="${prefix}$(echo "$res" | grep -o -P "(?<=href=\"${prefix}).*(?=\">)" | tail -1)"
patch="${patch}"
# Try up to 10 patch versions
for i in {1..10}; do
((patch++))
extVersion="$(echo "$deb" | grep -o -P "(?<=\.....).*(?=\..*-)")"
if (( ''${#extVersion} == 6 )) && (( $extVersion <= ${extVersion} )); then
url="https://repo.radeon.com/rocm/apt/${major}.${minor}.$patch/pool/main/h/${prefix}/"
res="$(curl -sL "$url")"
deb="${prefix}$(echo "$res" | grep -o -P "(?<=href=\"${prefix}).*(?=\">)" | tail -1)"
else
break
fi
done
extVersion="$(echo $deb | grep -o -P "(?<=\.....).*(?=\..*-)")"
version="$(echo $extVersion | sed "s/0/./1" | sed "s/0/./1")"
IFS='.' read -a version_arr <<< "$version"
if (( ''${version_arr[0]} > 6 )); then
echo "'rocmPackages_6.${prefix}-bin' is already at it's maximum allowed version.''\nAny further upgrades should go into 'rocmPackages_X.${prefix}-bin'." 1>&2
exit 1
fi
if (( ''${#extVersion} == 6 )); then
repoVersion="$version"
if (( ''${version:4:1} == 0 )); then
repoVersion=''${version:0:3}
fi
update-source-version rocmPackages_6.${prefix}-bin "$version" "" "$apt/$repoVersion/$pool$deb" --ignore-same-hash
fi
'';
in
[ updateScript ]

View File

@@ -0,0 +1,70 @@
diff --git a/cmake/modules/AddClang.cmake b/cmake/modules/AddClang.cmake
index 75b0080f6..c895b884c 100644
--- a/cmake/modules/AddClang.cmake
+++ b/cmake/modules/AddClang.cmake
@@ -119,8 +119,8 @@ macro(add_clang_library name)
install(TARGETS ${lib}
COMPONENT ${lib}
${export_to_clangtargets}
- LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX}
- ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX}
+ LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}"
+ ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}"
RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
if (NOT LLVM_ENABLE_IDE)
diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt
index e6ae4e19e..5ef01aea2 100644
--- a/lib/Headers/CMakeLists.txt
+++ b/lib/Headers/CMakeLists.txt
@@ -337,6 +337,7 @@ set(llvm_libc_wrapper_files
include(GetClangResourceDir)
get_clang_resource_dir(output_dir PREFIX ${LLVM_LIBRARY_OUTPUT_INTDIR}/.. SUBDIR include)
+set(header_install_dir ${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION_MAJOR}/include)
set(out_files)
set(generated_files)
diff --git a/tools/libclang/CMakeLists.txt b/tools/libclang/CMakeLists.txt
index b5b6d2807..6b592d255 100644
--- a/tools/libclang/CMakeLists.txt
+++ b/tools/libclang/CMakeLists.txt
@@ -246,7 +246,7 @@ foreach(PythonVersion ${CLANG_PYTHON_BINDINGS_VERSIONS})
COMPONENT
libclang-python-bindings
DESTINATION
- "lib${LLVM_LIBDIR_SUFFIX}/python${PythonVersion}/site-packages")
+ "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}/python${PythonVersion}/site-packages")
endforeach()
if(NOT LLVM_ENABLE_IDE)
add_custom_target(libclang-python-bindings)
diff --git a/tools/scan-build-py/CMakeLists.txt b/tools/scan-build-py/CMakeLists.txt
index 3aca22c0b..3115353e3 100644
--- a/tools/scan-build-py/CMakeLists.txt
+++ b/tools/scan-build-py/CMakeLists.txt
@@ -88,7 +88,7 @@ foreach(lib ${LibScanbuild})
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libscanbuild/${lib})
list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libscanbuild/${lib})
install(FILES lib/libscanbuild/${lib}
- DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libscanbuild
+ DESTINATION "${CMAKE_INSTALL_LIBDIR}/libscanbuild"
COMPONENT scan-build-py)
endforeach()
@@ -106,7 +106,7 @@ foreach(resource ${LibScanbuildResources})
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libscanbuild/resources/${resource})
list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libscanbuild/resources/${resource})
install(FILES lib/libscanbuild/resources/${resource}
- DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libscanbuild/resources
+ DESTINATION "${CMAKE_INSTALL_LIBDIR}/libscanbuild/resources"
COMPONENT scan-build-py)
endforeach()
@@ -122,7 +122,7 @@ foreach(lib ${LibEar})
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libear/${lib})
list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libear/${lib})
install(FILES lib/libear/${lib}
- DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libear
+ DESTINATION "${CMAKE_INSTALL_LIBDIR}/libear"
COMPONENT scan-build-py)
endforeach()

View File

@@ -0,0 +1,23 @@
diff --git a/lib/Driver/ToolChains/Linux.cpp b/lib/Driver/ToolChains/Linux.cpp
index 57368104c914..71c57f72078e 100644
--- a/lib/Driver/ToolChains/Linux.cpp
+++ b/lib/Driver/ToolChains/Linux.cpp
@@ -640,6 +640,7 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
return;
// LOCAL_INCLUDE_DIR
+ if (!SysRoot.empty())
addSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/local/include"));
// TOOL_INCLUDE_DIR
AddMultilibIncludeArgs(DriverArgs, CC1Args);
@@ -672,8 +673,10 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
// Add an include of '/include' directly. This isn't provided by default by
// system GCCs, but is often used with cross-compiling GCCs, and harmless to
// add even when Clang is acting as-if it were a system compiler.
+ if (!SysRoot.empty())
addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/include"));
+ if (!SysRoot.empty())
addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/include"));
if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && getTriple().isMusl())

View File

@@ -0,0 +1,40 @@
diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp
index 06f5e7e7e335..8407d664886a 100644
--- a/lib/Driver/Compilation.cpp
+++ b/lib/Driver/Compilation.cpp
@@ -340,6 +340,9 @@ private:
void Compilation::ExecuteJobs(const JobList &Jobs,
FailingCommandList &FailingCommands,
bool LogOnly) const {
+ // If >1 job, log as each job finishes so can see progress while building many offloads
+ const bool logJobs = Jobs.size() > 1;
+ auto start_time = std::chrono::steady_clock::now();
// According to UNIX standard, driver need to continue compiling all the
// inputs on the command line even one of them failed.
// In all but CLMode, execute all the jobs unless the necessary inputs for the
@@ -364,11 +367,25 @@ void Compilation::ExecuteJobs(const JobList &Jobs,
JS.setJobState(Next, JobScheduler::JS_RUN);
auto Work = [&, Next]() {
+ auto job_start_time = std::chrono::steady_clock::now();
const Command *FailingCommand = nullptr;
if (int Res = ExecuteCommand(*Next, FailingCommand, LogOnly)) {
FailingCommands.push_back(std::make_pair(Res, FailingCommand));
JS.setJobState(Next, JobScheduler::JS_FAIL);
} else {
+ if (logJobs && Next) {
+ auto now = std::chrono::steady_clock::now();
+ auto job_duration = std::chrono::duration_cast<std::chrono::seconds>(now - job_start_time).count();
+ auto duration = std::chrono::duration_cast<std::chrono::seconds>(now - start_time).count();
+ if (duration > 10 && job_duration > 0) {
+ if (Next->getOutputFilenames().empty())
+ if (Next->getExecutable()) llvm::errs() << "Job completed: " << Next->getExecutable() << "\n";
+ else (llvm::errs() << "Job completed: "), Next->Print(llvm::errs(), "\n", true);
+ else
+ llvm::errs() << "Job completed: " << Next->getOutputFilenames().front().c_str() << "\n";
+ }
+ }
+
JS.setJobState(Next, JobScheduler::JS_DONE);
}
};

View File

@@ -0,0 +1,570 @@
{
lib,
stdenv,
llvmPackages_19,
overrideCC,
rocm-device-libs,
rocm-runtime,
fetchFromGitHub,
runCommand,
symlinkJoin,
rdfind,
wrapBintoolsWith,
emptyDirectory,
zstd,
zlib,
gcc-unwrapped,
glibc,
replaceVars,
libffi,
libxml2,
removeReferencesTo,
fetchpatch,
writeShellScript,
makeWrapper,
# Build compilers and stdenv suitable for profiling
# compressed line tables (-g1 -gz) and
# frame pointers for sampling profilers (-fno-omit-frame-pointer -momit-leaf-frame-pointer)
# TODO: Should also apply to downstream packages which use rocmClangStdenv
profilableStdenv ? false,
}:
let
llvmPackagesNoBintools = llvmPackages_19.override {
bootBintools = null;
bootBintoolsNoLibc = null;
};
useLibcxx = false; # whether rocm stdenv uses libcxx (clang c++ stdlib) instead of gcc stdlibc++
llvmStdenv = overrideCC llvmPackagesNoBintools.libcxxStdenv llvmPackagesNoBintools.clangUseLLVM;
llvmLibstdcxxStdenv = overrideCC llvmPackagesNoBintools.stdenv (
llvmPackagesNoBintools.libstdcxxClang.override {
inherit (llvmPackages_19) bintools;
}
);
stdenvToBuildRocmLlvm = if useLibcxx then llvmStdenv else llvmLibstdcxxStdenv;
gcc-include = runCommand "gcc-include" { } ''
mkdir -p $out
ln -s ${gcc-unwrapped}/include/ $out/
ln -s ${gcc-unwrapped}/lib/ $out/
'';
# A prefix for use as the GCC prefix when building rocmcxx
disallowedRefsForToolchain = [
stdenv.cc
stdenv.cc.cc
stdenv.cc.bintools
gcc-unwrapped
stdenvToBuildRocmLlvm
];
gcc-prefix =
let
gccPrefixPaths = [
gcc-unwrapped
gcc-unwrapped.lib
glibc.dev
];
in
symlinkJoin {
name = "gcc-prefix";
paths = gccPrefixPaths ++ [
glibc
];
disallowedRequisites = gccPrefixPaths;
postBuild = ''
rm -rf $out/{bin,libexec,nix-support,lib64,share,etc}
rm $out/lib/gcc/x86_64-unknown-linux-gnu/*/plugin/include/auto-host.h
mkdir /build/tmpout
mv $out/* /build/tmpout
cp -Lr --no-preserve=mode /build/tmpout/* $out/
set -x
versionedIncludePath="$(echo $out/include/c++/*/)"
mv $versionedIncludePath/* $out/include/c++/
rm -rf $versionedIncludePath/
find $out/lib -type f -exec ${removeReferencesTo}/bin/remove-references-to -t ${gcc-unwrapped.lib} {} +
ln -s $out $out/x86_64-unknown-linux-gnu
'';
};
version = "6.4.1";
# major version of this should be the clang version ROCm forked from
rocmLlvmVersion = "19.0.0-${llvmSrc.rev}";
usefulOutputs =
drv:
builtins.filter (x: x != null) [
drv
(drv.lib or null)
(drv.dev or null)
];
listUsefulOutputs = builtins.concatMap usefulOutputs;
# llvmSrc = fetchFromGitHub {
# # Performance improvements cherry-picked on top of rocm-6.3.x
# # most importantly, amdgpu-early-alwaysinline memory usage fix
# owner = "LunNova";
# repo = "llvm-project-rocm";
# rev = "4182046534deb851753f0d962146e5176f648893";
# hash = "sha256-sPmYi1WiiAqnRnHVNba2nPUxGflBC01FWCTNLPlYF9c=";
# };
llvmSrc = fetchFromGitHub {
owner = "ROCm";
repo = "llvm-project";
# rev = "873e9660026931bbd2cbce41475090039f81f8c7";
# hash = "sha256-7B4NQ1LBN3btHjrh9Ht2S+BYYqhKNPAwMoP18qYJw4E=";
# hash = "sha256-cIkKG5rB34G+AqonOS76acnhynmI29PIW7TuE0SQzO4=";
# hash = "sha256-DtbPZ75KqfPqPsGwmT1sUyeI1HyICDDs3SxwQQl72BM=";
rev = "rocm-${version}";
# hash = "sha256-h4mD6gu0Gt4zYme7qtlm9QpsqWZ6XoH+XKd3hsQly1I=";
# hash = "sha256-jJsmPainHOd4BJ0bQbf1M3Kd4+aLbx3ENxtuzJ9+lLY=";
# hash = "sha256-5n3EQby17JEgr3kh1pUNuo/La4hUxMf10O7CckVMS5U=";
hash = "sha256-84+ZsKjIhXip2yLU5jpoV53+ejxy2dzgamVU6AcAngU=";
# hash = "sha256-4b1d9a2c7f0e8c3f5b6d8c1e4f0b2c5f3a6b7c8d9e0f1a2b3c4d5e6f7g8h9i0j";
};
llvmSrcFixed = llvmSrc;
llvmMajorVersion = lib.versions.major rocmLlvmVersion;
# An llvmPackages (pkgs/development/compilers/llvm/) built from ROCm LLVM's source tree
# optionally using LLVM libcxx
llvmPackagesRocm = llvmPackages_19.override (_old: {
stdenv = stdenvToBuildRocmLlvm; # old.stdenv #llvmPackagesNoBintools.libcxxStdenv;
# not setting gitRelease = because that causes patch selection logic to use git patches
# ROCm LLVM is closer to 18 official
# gitRelease = {
# rev-version = rocmLlvmVersion;
# };
# gitRelease = null;
# officialRelease = null;
officialRelease = { }; # Set but empty because we're overriding everything from it.
version = rocmLlvmVersion;
src = llvmSrcFixed;
monorepoSrc = llvmSrcFixed;
doCheck = false;
});
sysrootCompiler =
cc: name: paths:
let
linked = symlinkJoin { inherit name paths; };
in
runCommand name { } ''
set -x
mkdir -p $out/
cp --reflink=auto -rL ${linked}/* $out/
chmod -R +rw $out
mkdir -p $out/usr
ln -s $out/ $out/usr/local
mkdir -p $out/nix-support/
rm -rf $out/lib64 # we don't need mixed 32 bit
echo 'export CC=clang' >> $out/nix-support/setup-hook
echo 'export CXX=clang++' >> $out/nix-support/setup-hook
mkdir -p $out/lib/clang/${llvmMajorVersion}/lib/linux/
ln -s $out/lib/linux/libclang_rt.* $out/lib/clang/${llvmMajorVersion}/lib/linux/
file $out/bin/.clang-wrapped
file $out/bin/.clang++-wrapped
find $out -type f -exec sed -i "s|${cc.out}|$out|g" {} +
find $out -type f -exec sed -i "s|${cc.dev}|$out|g" {} +
file $out/bin/.clang-wrapped
file $out/bin/.clang++-wrapped
# our /include now has more than clang expects, so this specific dir still needs to point to cc.dev
# FIXME: could copy into a different subdir?
sed -i 's|set(CLANG_INCLUDE_DIRS.*$|set(CLANG_INCLUDE_DIRS "${cc.dev}/include")|g' $out/lib/cmake/clang/ClangConfig.cmake
# ${lib.getExe rdfind} -makesymlinks true $out/ # create links *within* the sysroot to save space
'';
findClangNostdlibincPatch =
x:
(
(lib.strings.hasSuffix "add-nostdlibinc-flag.patch" (builtins.baseNameOf x))
|| (lib.strings.hasSuffix "clang-at-least-16-LLVMgold-path.patch" (builtins.baseNameOf x))
);
llvmTargetsFlag = "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${
{
"x86_64" = "X86";
"aarch64" = "AArch64";
}
.${llvmStdenv.targetPlatform.parsed.cpu.name}
}";
# -ffat-lto-objects = emit LTO object files that are compatible with non-LTO-supporting builds too
# FatLTO objects are a special type of fat object file that contain LTO compatible IR in addition to generated object code,
# instead of containing object code for multiple target architectures. This allows users to defer the choice of whether to
# use LTO or not to link-time, and has been a feature available in other compilers, like GCC, for some time.
tablegenUsage = x: !(lib.strings.hasInfix "llvm-tblgen" x);
addGccLtoCmakeFlags = !llvmPackagesRocm.stdenv.cc.isClang;
llvmExtraCflags =
"-O3 -DNDEBUG -march=skylake -mtune=znver3"
+ (lib.optionalString addGccLtoCmakeFlags " -D_GLIBCXX_USE_CXX11_ABI=0 -flto -ffat-lto-objects -flto-compression-level=19 -Wl,-flto")
+ (lib.optionalString llvmPackagesRocm.stdenv.cc.isClang " -flto=thin -ffat-lto-objects")
+ (lib.optionalString profilableStdenv " -fno-omit-frame-pointer -momit-leaf-frame-pointer -gz -g1");
in
rec {
inherit llvmSrc;
inherit (llvmPackagesRocm) libunwind;
inherit (llvmPackagesRocm) libcxx;
# Pass through original attrs for debugging where non-overridden llvm/clang is getting used
# llvm-orig = llvmPackagesRocm.llvm; # nix why-depends --derivation .#rocmPackages.clr .#rocmPackages.llvm.llvm-orig
# clang-orig = llvmPackagesRocm.clang; # nix why-depends --derivation .#rocmPackages.clr .#rocmPackages.llvm.clang-orig
llvm = (llvmPackagesRocm.llvm.override { ninja = emptyDirectory; }).overrideAttrs (old: {
patches = old.patches ++ [
./rocm-llvm-6.4-llvm-gold-plugin-fix-ModuleName.patch
];
# patches = builtins.filter (
# x:
# (
# !(lib.strings.hasSuffix "gnu-install-dirs.patch" (builtins.baseNameOf x))
# && !(lib.strings.hasSuffix "gnu-install-dirs-polly.patch" (builtins.baseNameOf x))
# )
# ) old.patches;
dontStrip = profilableStdenv;
nativeBuildInputs = old.nativeBuildInputs ++ [ removeReferencesTo ];
buildInputs = old.buildInputs ++ [
zstd
zlib
];
env.NIX_BUILD_ID_STYLE = "fast";
postPatch = ''
${old.postPatch or ""}
patchShebangs lib/OffloadArch/make_generated_offload_arch_h.sh
'';
LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib";
cmakeFlags =
(builtins.filter tablegenUsage old.cmakeFlags)
++ [
llvmTargetsFlag
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
"-DLLVM_ENABLE_THREADS=ON"
"-DLLVM_ENABLE_LTO=Thin"
"-DLLVM_USE_LINKER=lld"
(lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx)
"-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}"
]
++ lib.optionals addGccLtoCmakeFlags [
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
"-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm"
]
++ lib.optionals useLibcxx [
"-DLLVM_ENABLE_LTO=Thin"
"-DLLVM_USE_LINKER=lld"
"-DLLVM_ENABLE_LIBCXX=ON"
];
preConfigure = ''
${old.preConfigure or ""}
cmakeFlagsArray+=(
'-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}'
'-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}'
)
'';
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
postFixup = ''
${old.postFixup or ""}
remove-references-to -t "${stdenv.cc}" "$lib/lib/libLLVMSupport.a"
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenvToBuildRocmLlvm.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
'';
doCheck = false;
});
lld =
(llvmPackagesRocm.lld.override {
libllvm = llvm;
ninja = emptyDirectory;
}).overrideAttrs
(old: {
patches = builtins.filter (
x: !(lib.strings.hasSuffix "more-openbsd-program-headers.patch" (builtins.baseNameOf x))
) old.patches;
dontStrip = profilableStdenv;
nativeBuildInputs = old.nativeBuildInputs ++ [
llvmPackagesNoBintools.lld
removeReferencesTo
];
buildInputs = old.buildInputs ++ [
zstd
zlib
];
env.NIX_BUILD_ID_STYLE = "fast";
LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib";
cmakeFlags =
(builtins.filter tablegenUsage old.cmakeFlags)
++ [
llvmTargetsFlag
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
"-DLLVM_ENABLE_THREADS=ON"
"-DLLVM_ENABLE_LTO=Thin"
"-DLLVM_USE_LINKER=lld"
(lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx)
"-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}"
]
++ lib.optionals addGccLtoCmakeFlags [
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
"-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm"
]
++ lib.optionals useLibcxx [
"-DLLVM_ENABLE_LIBCXX=ON"
];
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
postFixup = ''
${old.postFixup or ""}
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
'';
preConfigure = ''
${old.preConfigure or ""}
cmakeFlagsArray+=(
'-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}'
'-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}'
)
'';
});
clang-unwrapped =
(
(llvmPackagesRocm.clang-unwrapped.override {
libllvm = llvm;
ninja = emptyDirectory;
}).overrideAttrs
(
old:
let
# filteredPatches = builtins.filter (x: !(findClangNostdlibincPatch x)) old.patches;
filteredPatches = builtins.filter (
x:
(
!(lib.strings.hasSuffix "gnu-install-dirs.patch" (builtins.baseNameOf x))
# && !(lib.strings.hasSuffix "gnu-install-dirs-polly.patch" (builtins.baseNameOf x))
&& !(findClangNostdlibincPatch x)
)
) old.patches;
in
{
meta.platforms = [
"x86_64-linux"
];
pname = "${old.pname}-rocm";
patches = filteredPatches ++ [
./96cbfymn788ssbhmay4sy7h268qg81fl-gnu-install-dirs.patch
# Never add FHS include paths
./clang-bodge-ignore-systemwide-incls.diff
# Prevents builds timing out if a single compiler invocation is very slow but
# per-arch jobs are completing by ensuring there's terminal output
./clang-log-jobs.diff
(fetchpatch {
# [ClangOffloadBundler]: Add GetBundleIDsInFile to OffloadBundler
sha256 = "sha256-G/mzUdFfrJ2bLJgo4+mBcR6Ox7xGhWu5X+XxT4kH2c8=";
url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/6d296f879b0fed830c54b2a9d26240da86c8bb3a.patch";
relative = "clang";
})
# FIXME: Needed due to https://github.com/NixOS/nixpkgs/issues/375431
# Once we can switch to overrideScope this can be removed
# (replaceVars ./../../../compilers/llvm/common/clang/clang-at-least-16-LLVMgold-path.patch {
# libllvmLibdir = "${llvm.lib}/lib";
# })
];
nativeBuildInputs = old.nativeBuildInputs ++ [
llvmPackagesNoBintools.lld
removeReferencesTo
];
buildInputs = old.buildInputs ++ [
zstd
zlib
];
dontStrip = profilableStdenv;
LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib";
env = (old.env or { }) // {
NIX_BUILD_ID_STYLE = "fast";
};
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
requiredSystemFeatures = (old.requiredSystemFeatures or [ ]) ++ [ "big-parallel" ];
# https://github.com/llvm/llvm-project/blob/6976deebafa8e7de993ce159aa6b82c0e7089313/clang/cmake/caches/DistributionExample-stage2.cmake#L9-L11
cmakeFlags =
(builtins.filter tablegenUsage old.cmakeFlags)
++ [
llvmTargetsFlag
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
"-DLLVM_ENABLE_THREADS=ON"
"-DLLVM_ENABLE_LTO=Thin"
"-DLLVM_USE_LINKER=lld"
(lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx)
"-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}"
]
++ lib.optionals addGccLtoCmakeFlags [
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
"-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm"
]
++ lib.optionals useLibcxx [
"-DLLVM_ENABLE_LTO=Thin"
"-DLLVM_ENABLE_LIBCXX=ON"
"-DLLVM_USE_LINKER=lld"
"-DCLANG_DEFAULT_RTLIB=compiler-rt"
];
# ++ lib.optionals (!useLibcxx) [
# # FIXME: Config file in rocmcxx instead of GCC_INSTALL_PREFIX?
# "-DGCC_INSTALL_PREFIX=${gcc-prefix}"
# ];
postFixup =
(old.postFixup or "")
+ ''
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
'';
preConfigure =
(old.preConfigure or "")
+ ''
cmakeFlagsArray+=(
'-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}'
'-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}'
)
'';
postInstall =
(old.postInstall or "")
+ ''
echo "--gcc-toolchain=${gcc-prefix}" > $out/bin/clang.cfg
echo "--gcc-toolchain=${gcc-prefix}" > $out/bin/clang++.cfg
'';
}
)
)
// {
libllvm = llvm;
};
# A clang that understands standard include searching in a GNU sysroot and will put GPU libs in include path
# in the right order
# and expects its libc to be in the sysroot
rocmcxx =
(sysrootCompiler clang-unwrapped "rocmcxx" (
listUsefulOutputs (
[
clang-unwrapped
bintools
compiler-rt
]
++ (lib.optionals useLibcxx [
libcxx
])
++ (lib.optionals (!useLibcxx) [
gcc-include
glibc
glibc.dev
])
)
))
// {
version = llvmMajorVersion;
cc = rocmcxx;
libllvm = llvm;
isClang = true;
isGNU = false;
};
clang-tools = llvmPackagesRocm.clang-tools.override {
inherit clang-unwrapped clang;
};
compiler-rt-libc = llvmPackagesRocm.compiler-rt-libc.overrideAttrs (old: {
patches = old.patches ++ [
(fetchpatch {
name = "avoid-overload-ambiguity-for-interceptors.patch";
url = "https://github.com/ROCm/llvm-project/commit/155b7a12820ec45095988b6aa6e057afaf2bc892.patch";
hash = "sha256-pgpN1q1vIQrPXHPxNSZ6zfgV2EflHO5Amzl+2BDjXbs=";
relative = "compiler-rt";
})
];
});
compiler-rt = compiler-rt-libc;
bintools = wrapBintoolsWith {
bintools = llvmPackagesRocm.bintools-unwrapped.override {
inherit lld llvm;
};
};
clang = rocmcxx;
# Emulate a monolithic ROCm LLVM build to support building ROCm's in-tree LLVM projects
rocm-merged-llvm = symlinkJoin {
name = "rocm-llvm-merge";
paths =
[
llvm
llvm.dev
lld
lld.lib
lld.dev
libunwind
libunwind.dev
compiler-rt
compiler-rt.dev
rocmcxx
]
++ lib.optionals useLibcxx [
libcxx
libcxx.out
libcxx.dev
];
postBuild = builtins.unsafeDiscardStringContext ''
found_files=$(find $out -name '*.cmake')
if [ -z "$found_files" ]; then
>&2 echo "Error: No CMake files found in $out"
exit 1
fi
for target in ${clang-unwrapped.out} ${clang-unwrapped.lib} ${clang-unwrapped.dev}; do
if grep "$target" $found_files; then
>&2 echo "Unexpected ref to $target (clang-unwrapped) found"
# exit 1
# # FIXME: enable this to reduce closure size
fi
done
'';
inherit version;
llvm-src = llvmSrc;
};
rocmClangStdenv = overrideCC (
if useLibcxx then llvmPackagesRocm.libcxxStdenv else llvmPackagesRocm.stdenv
) clang;
# Projects
openmp =
(llvmPackagesRocm.openmp.override {
stdenv = rocmClangStdenv;
llvm = rocm-merged-llvm;
targetLlvm = rocm-merged-llvm;
clang-unwrapped = clang;
}).overrideAttrs
(old: {
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ removeReferencesTo ];
cmakeFlags =
old.cmakeFlags
++ [
"-DDEVICELIBS_ROOT=${rocm-device-libs.src}"
# OMPD support is broken in ROCm 6.3. Haven't investigated why.
"-DLIBOMP_OMPD_SUPPORT:BOOL=FALSE"
"-DLIBOMP_OMPD_GDB_SUPPORT:BOOL=FALSE"
]
++ lib.optionals addGccLtoCmakeFlags [
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
];
env.LLVM = "${rocm-merged-llvm}";
env.LLVM_DIR = "${rocm-merged-llvm}";
buildInputs = old.buildInputs ++ [
rocm-device-libs
rocm-runtime
zlib
zstd
libxml2
libffi
];
});
}

View File

@@ -0,0 +1,28 @@
From 6c2872afcd9ae8e313621eb6cb7f407e89097304 Mon Sep 17 00:00:00 2001
From: Tom Rix <Tom.Rix@amd.com>
Date: Sun, 13 Apr 2025 07:41:34 -0700
Subject: [PATCH] rocm-llvm: gold-plugin: fix ModuleName
---
llvm/tools/gold/gold-plugin.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp
index 0d4ca5299689..dd577206408c 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -1100,10 +1100,8 @@ static std::vector<std::pair<SmallString<128>, bool>> runLTO() {
};
auto AddBuffer = [&](size_t Task, const Twine &moduleName,
- std::unique_ptr<MemoryBuffer> MB) {
- auto Stream = *AddStream(Task, ModuleName);
- Stream->OS << MB->getBuffer();
- check(Stream->commit(), "Failed to commit cache");
+ std::unique_ptr<MemoryBuffer> MB) {
+ *AddStream(Task, moduleName)->OS << MB->getBuffer();
};
FileCache Cache;
--
2.48.1

View File

@@ -0,0 +1,194 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
pkg-config,
cmake,
rocm-cmake,
clr,
openmp,
rocblas,
hipblas-common,
hipblas,
hipblaslt,
rocmlir,
miopen,
protobuf,
abseil-cpp,
half,
nlohmann_json,
msgpack,
sqlite,
oneDNN_2,
blaze,
texliveSmall,
doxygen,
sphinx,
docutils,
ghostscript,
python3Packages,
buildDocs ? false,
buildTests ? false,
gpuTargets ? clr.gpuTargets,
}:
let
latex = lib.optionalAttrs buildDocs (
texliveSmall.withPackages (
ps: with ps; [
latexmk
tex-gyre
fncychap
wrapfig
capt-of
framed
needspace
tabulary
varwidth
titlesec
epstopdf
]
)
);
in
stdenv.mkDerivation (finalAttrs: {
pname = "migraphx";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildDocs [
"doc"
]
++ lib.optionals buildTests [
"test"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "AMDMIGraphX";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-PytBEnLBHyp0JzkSLbLRHslqGBk4mabKC62JZoXwHxE=";
};
nativeBuildInputs =
[
pkg-config
cmake
rocm-cmake
clr
python3Packages.python
]
++ lib.optionals buildDocs [
latex
doxygen
sphinx
docutils
ghostscript
python3Packages.sphinx-rtd-theme
python3Packages.breathe
];
buildInputs = [
openmp
rocblas
hipblas-common
hipblas
hipblaslt
rocmlir
miopen
protobuf
half
nlohmann_json
msgpack
sqlite
oneDNN_2
blaze
python3Packages.pybind11
python3Packages.onnx
];
LDFLAGS = "-Wl,--allow-shlib-undefined";
cmakeFlags = [
"-DMIGRAPHX_ENABLE_GPU=ON"
"-DMIGRAPHX_ENABLE_CPU=ON"
"-DMIGRAPHX_ENABLE_FPGA=ON"
"-DMIGRAPHX_ENABLE_MLIR=OFF" # LLVM or rocMLIR mismatch?
"-DCMAKE_C_COMPILER=amdclang"
"-DCMAKE_CXX_COMPILER=amdclang++"
"-DCMAKE_VERBOSE_MAKEFILE=ON"
"-DEMBED_USE=CArrays" # Fixes error with lld
"-DDMIGRAPHX_ENABLE_PYTHON=ON"
"-DROCM_PATH=${clr}"
"-DHIP_ROOT_DIR=${clr}"
# migraphx relies on an incompatible fork of composable_kernel
# migraphxs relies on miopen which relies on current composable_kernel
# impossible to build with this ON; we can't link both of them even if we package both
"-DMIGRAPHX_USE_COMPOSABLEKERNEL=OFF"
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
];
postPatch =
''
export CXXFLAGS+=" -w -isystem${rocmlir}/include/rocmlir -I${half}/include -I${abseil-cpp}/include -I${hipblas-common}/include"
patchShebangs tools
# `error: '__clang_hip_runtime_wrapper.h' file not found [clang-diagnostic-error]`
substituteInPlace CMakeLists.txt \
--replace "set(MIGRAPHX_TIDY_ERRORS ALL)" ""
''
+ lib.optionalString (!buildDocs) ''
substituteInPlace CMakeLists.txt \
--replace "add_subdirectory(doc)" ""
''
+ lib.optionalString (!buildTests) ''
substituteInPlace CMakeLists.txt \
--replace "add_subdirectory(test)" ""
'';
# Unfortunately, it seems like we have to call make on this manually
preInstall = lib.optionalString buildDocs ''
export HOME=$(mktemp -d)
make -j$NIX_BUILD_CORES doc
cd ../doc/pdf
make -j$NIX_BUILD_CORES
cd -
'';
postInstall =
lib.optionalString buildDocs ''
mv ../doc/html $out/share/doc/migraphx
mv ../doc/pdf/MIGraphX.pdf $out/share/doc/migraphx
''
+ lib.optionalString buildTests ''
mkdir -p $test/bin
mv bin/test_* $test/bin
patchelf $test/bin/test_* --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE"
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "AMD's graph optimization engine";
homepage = "https://github.com/ROCm/AMDMIGraphX";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,324 @@
{
lib,
stdenv,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
runCommand,
pkg-config,
cmake,
rocm-cmake,
rocblas,
rocmlir,
rocrand,
rocm-runtime,
rocm-merged-llvm,
hipblas-common,
hipblas,
hipblaslt,
clr,
composable_kernel,
frugally-deep,
rocm-docs-core,
half,
boost,
sqlite,
bzip2,
lbzip2,
nlohmann_json,
texliveSmall,
doxygen,
sphinx,
zlib,
gtest,
rocm-comgr,
roctracer,
python3Packages,
# FIXME: should be able to use all clr targets
gpuTargets ? [
"gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
"gfx1030"
"gfx1100"
"gfx1101"
"gfx1102"
"gfx1201"
], # clr.gpuTargets
buildDocs ? false, # Needs internet because of rocm-docs-core
buildTests ? false,
withComposableKernel ? composable_kernel.anyGfx9Target,
}:
let
# FIXME: cmake files need patched to include this properly
cFlags = "-O3 -DNDEBUG -Wno-documentation-pedantic --offload-compress -I${hipblas-common}/include -I${hipblas}/include -I${roctracer}/include -I${nlohmann_json}/include -I${sqlite.dev}/include -I${rocrand}/include";
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "MIOpen";
rev = "rocm-${version}";
hash = "sha256-DEcVj2vOwIYYyNKEKFqZ0fb9o+/QRpwiSksxwnmgEMc=";
fetchLFS = true;
fetchSubmodules = true;
# WORKAROUND: .lfsconfig is incorrectly set to exclude everything upstream
leaveDotGit = true;
postFetch = ''
export HOME=$(mktemp -d)
cd $out
set -x
git remote add origin $url
git fetch origin +refs/tags/rocm-${version}:refs/tags/rocm-${version}
git clean -fdx
git switch -c rocm-${version} refs/tags/rocm-${version}
git config lfs.fetchexclude "none"
rm .lfsconfig
git lfs install
git lfs track "*.kdb.bz2"
GIT_TRACE=1 git lfs fetch --include="src/kernels/**"
GIT_TRACE=1 git lfs pull --include="src/kernels/**"
git lfs checkout
rm -rf .git
'';
};
latex = lib.optionalAttrs buildDocs (
texliveSmall.withPackages (
ps: with ps; [
latexmk
tex-gyre
fncychap
wrapfig
capt-of
framed
needspace
tabulary
varwidth
titlesec
]
)
);
gfx900 = runCommand "miopen-gfx900.kdb" { preferLocalBuild = true; } ''
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx900.kdb.bz2 > $out
'';
gfx906 = runCommand "miopen-gfx906.kdb" { preferLocalBuild = true; } ''
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx906.kdb.bz2 > $out
'';
gfx908 = runCommand "miopen-gfx908.kdb" { preferLocalBuild = true; } ''
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx908.kdb.bz2 > $out
'';
gfx90a = runCommand "miopen-gfx90a.kdb" { preferLocalBuild = true; } ''
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx90a.kdb.bz2 > $out
'';
gfx1030 = runCommand "miopen-gfx1030.kdb" { preferLocalBuild = true; } ''
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx1030.kdb.bz2 > $out
'';
gfx1201 = runCommand "miopen-gfx1201.kdb" { preferLocalBuild = true; } ''
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx1201.kdb.bz2 > $out
'';
in
stdenv.mkDerivation (finalAttrs: {
inherit version src;
pname = "miopen";
env.CFLAGS = cFlags;
env.CXXFLAGS = cFlags;
# Find zstd and add to target. Mainly for torch.
patches = [
./skip-preexisting-dbs.patch
./fix-isnan.patch # https://github.com/ROCm/MIOpen/pull/3448
(fetchpatch {
url = "https://github.com/ROCm/MIOpen/commit/e608b4325646afeabb5e52846997b926d2019d19.patch";
hash = "sha256-oxa3qlIC2bzbwGxrQOZXoY/S7CpLsMrnWRB7Og0tk0M=";
})
(fetchpatch {
url = "https://github.com/ROCm/MIOpen/commit/3413d2daaeb44b7d6eadcc03033a5954a118491e.patch";
hash = "sha256-ST4snUcTmmSI1Ogx815KEX9GdMnmubsavDzXCGJkiKs=";
})
# FIXME: We need to rebase or drop this arch compat patch
# https://github.com/ROCm/MIOpen/issues/3540 suggests that
# arch compat patching doesn't work correctly for gfx1031
# (fetchpatch {
# name = "Extend-MIOpen-ISA-compatibility.patch";
# url = "https://github.com/GZGavinZhao/MIOpen/commit/416088b534618bd669a765afce59cfc7197064c1.patch";
# hash = "sha256-OwONCA68y8s2GqtQj+OtotXwUXQ5jM8tpeM92iaD4MU=";
# })
];
outputs =
[
"out"
]
++ lib.optionals buildDocs [
"doc"
]
++ lib.optionals buildTests [
"test"
];
enableParallelBuilding = true;
env.ROCM_PATH = clr;
env.LD_LIBRARY_PATH = lib.makeLibraryPath [ rocm-runtime ];
env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin";
nativeBuildInputs = [
pkg-config
cmake
rocm-cmake
clr
];
buildInputs =
[
hipblas
hipblas-common
rocblas
rocmlir
half
boost
sqlite
bzip2
nlohmann_json
frugally-deep
roctracer
rocrand
hipblaslt
]
++ lib.optionals withComposableKernel [
composable_kernel
]
++ lib.optionals buildDocs [
latex
doxygen
sphinx
rocm-docs-core
python3Packages.sphinx-rtd-theme
python3Packages.breathe
python3Packages.myst-parser
]
++ lib.optionals buildTests [
gtest
zlib
];
cmakeFlags =
[
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
"-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}"
"-DMIOPEN_USE_SQLITE_PERFDB=ON"
"-DCMAKE_VERBOSE_MAKEFILE=ON"
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
"-DCMAKE_BUILD_TYPE=Release"
# needs to stream to stdout so bzcat rather than bunzip2
"-DUNZIPPER=${bzip2}/bin/bzcat"
"-DCMAKE_C_COMPILER=amdclang"
"-DCMAKE_CXX_COMPILER=amdclang++"
"-DROCM_PATH=${clr}"
"-DHIP_ROOT_DIR=${clr}"
(lib.cmakeBool "MIOPEN_USE_ROCBLAS" true)
(lib.cmakeBool "MIOPEN_USE_HIPBLASLT" true)
(lib.cmakeBool "MIOPEN_USE_COMPOSABLEKERNEL" withComposableKernel)
(lib.cmakeBool "MIOPEN_USE_HIPRTC" true)
(lib.cmakeBool "MIOPEN_USE_COMGR" true)
"-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DMIOPEN_BACKEND=HIP"
]
++ lib.optionals buildTests [
"-DBUILD_TESTS=ON"
"-DMIOPEN_TEST_ALL=ON"
];
postPatch = ''
substituteInPlace cmake/ClangTidy.cmake \
--replace-fail 'macro(enable_clang_tidy)' 'macro(enable_clang_tidy)
endmacro()
macro(enable_clang_tidy_unused)' \
--replace-fail 'function(clang_tidy_check TARGET)' 'function(clang_tidy_check TARGET)
return()'
patchShebangs test src/composable_kernel fin utils install_deps.cmake
ln -sf ${gfx900} src/kernels/gfx900.kdb
ln -sf ${gfx906} src/kernels/gfx906.kdb
ln -sf ${gfx908} src/kernels/gfx908.kdb
ln -sf ${gfx90a} src/kernels/gfx90a.kdb
ln -sf ${gfx1030} src/kernels/gfx1030.kdb
ln -sf ${gfx1201} src/kernels/gfx1201.kdb
mkdir -p build/share/miopen/db/
ln -sf ${gfx900} build/share/miopen/db/gfx900.kdb
ln -sf ${gfx906} build/share/miopen/db/gfx906.kdb
ln -sf ${gfx908} build/share/miopen/db/gfx908.kdb
ln -sf ${gfx90a} build/share/miopen/db/gfx90a.kdb
ln -sf ${gfx1030} build/share/miopen/db/gfx1030.kdb
ln -sf ${gfx1201} build/share/miopen/db/gfx1201.kdb
'';
# Unfortunately, it seems like we have to call make on these manually
postBuild =
lib.optionalString buildDocs ''
python -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html
''
+ lib.optionalString buildTests ''
make -j$NIX_BUILD_CORES check
'';
postInstall =
''
rm $out/bin/install_precompiled_kernels.sh
ln -sf ${gfx900} $out/share/miopen/db/gfx900.kdb
ln -sf ${gfx906} $out/share/miopen/db/gfx906.kdb
ln -sf ${gfx908} $out/share/miopen/db/gfx908.kdb
ln -sf ${gfx90a} $out/share/miopen/db/gfx90a.kdb
ln -sf ${gfx1030} $out/share/miopen/db/gfx1030.kdb
ln -sf ${gfx1201} $out/share/miopen/db/gfx1201.kdb
''
+ lib.optionalString buildDocs ''
mv ../doc/html $out/share/doc/miopen-hip
''
+ lib.optionalString buildTests ''
mkdir -p $test/bin
mv bin/test_* $test/bin
patchelf --set-rpath $out/lib:${
lib.makeLibraryPath (
finalAttrs.buildInputs
++ [
clr
rocm-comgr
]
)
} $test/bin/*
'';
requiredSystemFeatures = [ "big-parallel" ];
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Machine intelligence library for ROCm";
homepage = "https://github.com/ROCm/MIOpen";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,31 @@
From 17f67e0aa31cd2f1c1cb012d3858abf6956acc72 Mon Sep 17 00:00:00 2001
From: "Sv. Lockal" <lockalsash@gmail.com>
Date: Tue, 24 Dec 2024 14:43:10 +0000
Subject: [PATCH] Fix missing isnan definition on libstdc++ >=14 systems
Closes #3441
---
driver/reducecalculation_driver.hpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/driver/reducecalculation_driver.hpp b/driver/reducecalculation_driver.hpp
index 8226b3c953..2001969509 100644
--- a/driver/reducecalculation_driver.hpp
+++ b/driver/reducecalculation_driver.hpp
@@ -33,6 +33,7 @@
#include "random.hpp"
#include <algorithm>
#include <cfloat>
+#include <cmath>
#include <cstdlib>
#include <memory>
#include <miopen/miopen.h>
@@ -77,7 +78,7 @@ int32_t mloReduceCalculationForwardRunHost(miopenTensorDescriptor_t inputDesc,
for(size_t i = 0; i < reduce_size; ++i)
{
Tcheck val = static_cast<Tcheck>(input[input_idx]);
- if(nanPropagation && isnan(val))
+ if(nanPropagation && std::isnan(val))
{
val = 0.0f;
}

View File

@@ -0,0 +1,22 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d0ffaf983..0b9ed0952 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -554,7 +554,7 @@ endif()
function(unpack_db db_bzip2_file)
get_filename_component(__fname ${db_bzip2_file} NAME_WLE)
add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname}
- COMMAND ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname})
+ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname} || ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname})
string(REPLACE "." "_" __tname ${__fname})
add_custom_target(generate_${__tname} ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname})
@@ -563,7 +563,7 @@ function(unpack_db db_bzip2_file)
if(NOT MIOPEN_USE_SQLITE_PERFDB AND __extension STREQUAL ".db")
add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname}.txt
DEPENDS sqlite2txt generate_${__tname}
- COMMAND $<TARGET_FILE:sqlite2txt> ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt
+ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname}.txt || $<TARGET_FILE:sqlite2txt> ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt
)
add_custom_target(generate_${__tname}_txt ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname}.txt)
add_dependencies(generate_kernels generate_${__tname}_txt)

View File

@@ -0,0 +1,25 @@
From f0e66bd446d44df1d30faaad520613f5fb7f5916 Mon Sep 17 00:00:00 2001
From: Martin Schwaighofer <mschwaig@users.noreply.github.com>
Date: Sat, 30 Mar 2024 15:36:52 +0100
Subject: [PATCH] set __STDC_CONSTANT_MACROS to make rocAL compile
---
CMakeLists.txt | 2 ++
1 file changed, 2 insertions(+)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 42b139b6..509915f1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -149,6 +149,8 @@ message("-- ${Cyan} -D MIGRAPHX=${MIGRAPHX} [Turn ON/OFF MIGraphX Module (de
message("-- ${Cyan} -D BACKEND=${BACKEND} [Select MIVisionX Backend [options:CPU/OPENCL/HIP](default:HIP)]${ColourReset}")
message("-- ${Cyan} -D BUILD_WITH_AMD_ADVANCE=${BUILD_WITH_AMD_ADVANCE} [Turn ON/OFF Build for AMD advanced GPUs(default:OFF)]${ColourReset}")
+add_definitions(-D__STDC_CONSTANT_MACROS)
+
add_subdirectory(amd_openvx)
add_subdirectory(amd_openvx_extensions)
add_subdirectory(utilities)
--
2.43.0

View File

@@ -0,0 +1,150 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocm-device-libs,
clr,
pkg-config,
rpp,
rocblas,
miopen,
migraphx,
openmp,
protobuf,
qtcreator,
opencv,
ffmpeg,
boost,
libjpeg_turbo,
half,
lmdb,
rapidjson,
rocm-docs-core,
python3Packages,
useOpenCL ? false,
useCPU ? false,
buildDocs ? false, # Needs internet
gpuTargets ? [ ],
}:
stdenv.mkDerivation (finalAttrs: {
pname =
"mivisionx-"
+ (
if (!useOpenCL && !useCPU) then
"hip"
else if (!useOpenCL && !useCPU) then
"opencl"
else
"cpu"
);
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "MIVisionX";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-07MivgCYmKLnhGDjOYsFBfwIxEoQLYNoRbOo3MPpVzE=";
};
patches = [
./0001-set-__STDC_CONSTANT_MACROS-to-make-rocAL-compile.patch
];
nativeBuildInputs =
[
cmake
rocm-cmake
clr
pkg-config
]
++ lib.optionals buildDocs [
rocm-docs-core
python3Packages.python
];
buildInputs = [
miopen
migraphx
rpp
rocblas
openmp
half
protobuf
qtcreator
opencv
ffmpeg
boost
libjpeg_turbo
lmdb
rapidjson
python3Packages.pybind11
python3Packages.numpy
python3Packages.torchWithRocm
];
cmakeFlags =
[
"-DROCM_PATH=${clr}"
"-DAMDRPP_PATH=${rpp}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DCMAKE_INSTALL_PREFIX_PYTHON=lib"
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
# "-DAMD_FP16_SUPPORT=ON" `error: typedef redefinition with different types ('__half' vs 'half_float::half')`
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals (!useOpenCL && !useCPU) [
"-DBACKEND=HIP"
]
++ lib.optionals (useOpenCL && !useCPU) [
"-DBACKEND=OCL"
]
++ lib.optionals useCPU [
"-DBACKEND=CPU"
];
postPatch = ''
# We need to not use hipcc and define the CXXFLAGS manually due to `undefined hidden symbol: tensorflow:: ...`
export CXXFLAGS+=" --rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode"
# Properly find miopen, fix ffmpeg version detection
substituteInPlace amd_openvx_extensions/CMakeLists.txt \
--replace-fail "miopen PATHS \''${ROCM_PATH} QUIET" "miopen PATHS ${miopen} QUIET" \
--replace-fail "\''${ROCM_PATH}/include/miopen/config.h" "${miopen}/include/miopen/config.h"
# Properly find turbojpeg
substituteInPlace cmake/FindTurboJpeg.cmake \
--replace-fail "\''${TURBO_JPEG_PATH}/include" "${libjpeg_turbo.dev}/include" \
--replace-fail "\''${TURBO_JPEG_PATH}/lib" "${libjpeg_turbo.out}/lib"
'';
postBuild = lib.optionalString buildDocs ''
python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Set of comprehensive computer vision and machine intelligence libraries, utilities, and applications";
homepage = "https://github.com/ROCm/MIVisionX";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
broken = useOpenCL;
};
})

View File

@@ -0,0 +1,42 @@
{
fetchFromGitHub,
stdenv,
cmake,
clr,
numactl,
nlohmann_json,
}:
stdenv.mkDerivation {
pname = "mscclpp";
version = "unstable-2024-12-13";
src = fetchFromGitHub {
owner = "microsoft";
repo = "mscclpp";
rev = "ee75caf365a27b9ab7521cfdda220b55429e5c37";
hash = "sha256-/mi9T9T6OIVtJWN3YoEe9az/86rz7BrX537lqaEh3ig=";
};
nativeBuildInputs = [
cmake
];
buildInputs = [
clr
numactl
];
postPatch = ''
substituteInPlace CMakeLists.txt \
--replace-fail "gfx90a gfx941 gfx942" "gfx908 gfx90a gfx942 gfx1030 gfx1100"
'';
cmakeFlags = [
"-DMSCCLPP_BYPASS_GPU_CHECK=ON"
"-DMSCCLPP_USE_ROCM=ON"
"-DMSCCLPP_BUILD_TESTS=OFF"
"-DGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100"
"-DAMDGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100"
"-DMSCCLPP_BUILD_APPS_NCCL=ON"
"-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF"
"-DFETCHCONTENT_QUIET=OFF"
"-DFETCHCONTENT_TRY_FIND_PACKAGE_MODE=ALWAYS"
"-DFETCHCONTENT_SOURCE_DIR_JSON=${nlohmann_json.src}"
];
env.ROCM_PATH = clr;
}

View File

@@ -0,0 +1,144 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocm-smi,
rocm-core,
clr,
mscclpp,
perl,
hipify,
gtest,
chrpath,
rocprofiler,
rocprofiler-register,
autoPatchelfHook,
buildTests ? false,
gpuTargets ? (clr.localGpuTargets or [ ]),
}:
let
useAsan = buildTests;
useUbsan = buildTests;
san = lib.optionalString (useAsan || useUbsan) (
"-fno-gpu-sanitize -fsanitize=undefined "
+ (lib.optionalString useAsan "-fsanitize=address -shared-libsan ")
);
in
# Note: we can't properly test or make use of multi-node collective ops
# https://github.com/NixOS/nixpkgs/issues/366242 tracks kernel support
# kfd_peerdirect support which is on out-of-tree amdkfd in ROCm/ROCK-Kernel-Driver
# infiniband ib_peer_mem support isn't in the mainline kernel but is carried by some distros
stdenv.mkDerivation (finalAttrs: {
pname = "rccl${clr.gpuArchSuffix}";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
];
patches = [
./fix-mainline-support-and-ub.diff
./enable-mscclpp-on-all-gfx9.diff
./rccl-test-missing-iomanip.diff
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rccl";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-6lQBpoJKszgvt+UpNEKdiw74s3ZhC4zpA4HP+F6u7X4=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
perl
hipify
autoPatchelfHook # ASAN doesn't add rpath without this
];
buildInputs =
[
rocm-smi
gtest
rocprofiler
rocprofiler-register
mscclpp
]
++ lib.optionals buildTests [
chrpath
];
cmakeFlags =
[
"-DHIP_CLANG_NUM_PARALLEL_JOBS=4"
"-DCMAKE_BUILD_TYPE=Release"
"-DROCM_PATH=${clr}"
"-DHIP_COMPILER=${clr}/bin/amdclang++"
"-DCMAKE_CXX_COMPILER=${clr}/bin/amdclang++"
"-DROCM_PATCH_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}"
"-DROCM_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}"
"-DBUILD_BFD=OFF" # Can't get it to detect bfd.h
"-DENABLE_MSCCL_KERNEL=ON"
"-DENABLE_MSCCLPP=ON"
"-DMSCCLPP_ROOT=${mscclpp}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
# AMD can't make up their minds and keep changing which one is used in different projects.
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_TESTS=ON"
];
# -O2 and -fno-strict-aliasing due to UB issues in RCCL :c
# Reported upstream
env.CFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer";
env.CXXFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer";
env.LDFLAGS = "${san}";
postPatch = ''
patchShebangs src tools
'';
postInstall =
lib.optionalString useAsan ''
patchelf --add-needed ${clr}/llvm/lib/linux/libclang_rt.asan-${stdenv.hostPlatform.parsed.cpu.name}.so $out/lib/librccl.so
''
+ lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/* $test/bin
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "ROCm communication collectives library";
homepage = "https://github.com/ROCm/rccl";
license = with licenses; [
bsd2
bsd3
];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,13 @@
diff --git a/src/init.cc b/src/init.cc
index 738f756..1b0e4fc 100644
--- a/src/init.cc
+++ b/src/init.cc
@@ -2049,7 +2049,7 @@ static ncclResult_t ncclCommInitRankFunc(struct ncclAsyncJob* job_) {
if (mscclEnabled() && (comm->topo->mscclEnabled || mscclForceEnabled()) && mscclppCommCompatible(comm)) {
hipDeviceProp_t devProp;
CUDACHECK(hipGetDeviceProperties(&devProp, cudaDev));
- comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx94");
+ comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx9");
if (comm->mscclppCompatible) {
bool mapContainsId = (mscclpp_uniqueIdMap.count(job->commId) > 0);
auto& mscclppUniqueId = mscclpp_uniqueIdMap[job->commId];

View File

@@ -0,0 +1,178 @@
diff --git a/src/include/bootstrap.h b/src/include/bootstrap.h
index 8c5f081..9922b79 100644
--- a/src/include/bootstrap.h
+++ b/src/include/bootstrap.h
@@ -10,11 +10,13 @@
#include "nccl.h"
#include "comm.h"
+// this is accessed through unaligned ptrs because ncclUniqueId is a typedef of char[128]
struct ncclBootstrapHandle {
uint64_t magic;
union ncclSocketAddress addr;
};
static_assert(sizeof(struct ncclBootstrapHandle) <= sizeof(ncclUniqueId), "Bootstrap handle is too large to fit inside NCCL unique ID");
+static_assert(alignof(struct ncclBootstrapHandle) == alignof(ncclUniqueId), "Bootstrap handle must have same alignment as NCCL unique ID to avoid UB");
ncclResult_t bootstrapNetInit();
ncclResult_t bootstrapCreateRoot(struct ncclBootstrapHandle* handle, bool idFromEnv);
diff --git a/src/misc/rocmwrap.cc b/src/misc/rocmwrap.cc
index b3063d5..464b80d 100644
--- a/src/misc/rocmwrap.cc
+++ b/src/misc/rocmwrap.cc
@@ -131,9 +131,12 @@ static void initOnceFunc() {
//format and store the kernel conf file location
snprintf(kernel_conf_file, sizeof(kernel_conf_file), "/boot/config-%s", utsname.release);
fp = fopen(kernel_conf_file, "r");
- if (fp == NULL) INFO(NCCL_INIT,"Could not open kernel conf file");
+ if (fp == NULL) {
+ INFO(NCCL_INIT,"Could not open kernel conf file, will assume CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA are enabled");
+ }
//look for kernel_opt1 and kernel_opt2 in the conf file and check
- while (fgets(buf, sizeof(buf), fp) != NULL) {
+ // FIXME: This check is broken, CONFIG_DMABUF_MOVE_NOTIFY could be across a buf boundary.
+ while (fp && fgets(buf, sizeof(buf), fp) != NULL) {
if (strstr(buf, kernel_opt1) != NULL) {
found_opt1 = 1;
INFO(NCCL_INIT,"CONFIG_DMABUF_MOVE_NOTIFY=y in /boot/config-%s", utsname.release);
@@ -143,11 +146,12 @@ static void initOnceFunc() {
INFO(NCCL_INIT,"CONFIG_PCI_P2PDMA=y in /boot/config-%s", utsname.release);
}
}
- if (!found_opt1 || !found_opt2) {
+ if (fp && (!found_opt1 || !found_opt2)) {
dmaBufSupport = 0;
INFO(NCCL_INIT, "CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA should be set for DMA_BUF in /boot/config-%s", utsname.release);
INFO(NCCL_INIT, "DMA_BUF_SUPPORT Failed due to OS kernel support");
}
+ if (fp) fclose(fp);
if(dmaBufSupport) INFO(NCCL_INIT, "DMA_BUF Support Enabled");
else goto error;
diff --git a/src/nccl.h.in b/src/nccl.h.in
index 1d127b0..6296073 100644
--- a/src/nccl.h.in
+++ b/src/nccl.h.in
@@ -39,7 +39,7 @@ typedef struct ncclComm* ncclComm_t;
#define NCCL_UNIQUE_ID_BYTES 128
/*! @brief Opaque unique id used to initialize communicators
@details The ncclUniqueId must be passed to all participating ranks */
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId;
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId;
/*! @defgroup rccl_result_code Result Codes
@details The various result codes that RCCL API calls may return
diff --git a/src/proxy.cc b/src/proxy.cc
index 50e5437..51bb401 100644
--- a/src/proxy.cc
+++ b/src/proxy.cc
@@ -965,7 +965,11 @@ struct ncclProxyConnectionPool {
static ncclResult_t ncclProxyNewConnection(struct ncclProxyConnectionPool* pool, int* id) {
if (pool->offset == NCCL_PROXY_CONN_POOL_SIZE) {
- NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1));
+ if (pool->pools) {
+ NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1));
+ } else {
+ NCCLCHECK(ncclCalloc(&pool->pools, pool->banks+1));
+ }
NCCLCHECK(ncclCalloc(pool->pools+pool->banks, NCCL_PROXY_CONN_POOL_SIZE));
pool->banks++;
pool->offset = 0;
diff --git a/src/transport/net_ib.cc b/src/transport/net_ib.cc
index 6d77784..49762d3 100644
--- a/src/transport/net_ib.cc
+++ b/src/transport/net_ib.cc
@@ -573,7 +573,7 @@ ncclResult_t ncclIbGdrSupport() {
// Requires support from NIC driver modules
// Use ONLY for debugging!
moduleLoaded = 1;
- INFO(NCCL_INIT, "RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1");
+ INFO(NCCL_INIT, "ncclIbGdrSupport: RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1");
}
if (moduleLoaded == -1) {
@@ -586,13 +586,14 @@ ncclResult_t ncclIbGdrSupport() {
// or created under a different path like `/sys/kernel/` or `/sys/` (depending on your ib_peer_mem module)
const char* memory_peers_paths[] = {"/sys/kernel/mm/memory_peers/amdkfd/version",
"/sys/kernel/memory_peers/amdkfd/version",
- "/sys/memory_peers/amdkfd/version"};
+ "/sys/memory_peers/amdkfd/version",
+ NULL};
int i = 0;
while (memory_peers_paths[i]) {
if (access(memory_peers_paths[i], F_OK) == 0) {
moduleLoaded = 1;
- INFO(NCCL_INIT,"Found %s", memory_peers_paths[i]);
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Found %s", memory_peers_paths[i]);
break;
} else {
moduleLoaded = 0;
@@ -612,22 +613,23 @@ ncclResult_t ncclIbGdrSupport() {
if (moduleLoaded == 0) {
// Check for `ib_register_peer_memory_client` symbol in `/proc/kallsyms`
// if your system uses native OS ib_peer module
- char buf[256];
- FILE *fp = NULL;
- fp = fopen("/proc/kallsyms", "r");
+ FILE *fp = fopen("/proc/kallsyms", "r");
+ char *line = NULL;
+ size_t len = 0;
if (fp == NULL) {
- INFO(NCCL_INIT,"Could not open /proc/kallsyms");
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Could not open /proc/kallsyms to check for ib_register_peer_memory_client");
} else {
- while (fgets(buf, sizeof(buf), fp) != NULL) {
- if (strstr(buf, "t ib_register_peer_memory_client") != NULL ||
- strstr(buf, "T ib_register_peer_memory_client") != NULL) {
+ while (getline(&line, &len, fp) > 0) {
+ if (line && strstr(line, "ib_register_peer_memory_client") != NULL) {
moduleLoaded = 1;
- INFO(NCCL_INIT,"Found ib_register_peer_memory_client in /proc/kallsyms");
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Found ib_register_peer_memory_client in /proc/kallsyms");
break;
}
}
}
+ if (line) free(line);
+ if (fp) fclose(fp);
}
#else
// Check for the nv_peer_mem module being loaded
@@ -637,7 +639,7 @@ ncclResult_t ncclIbGdrSupport() {
#endif
}
if (moduleLoaded == 0) {
- INFO(NCCL_INIT,"GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol");
+ INFO(NCCL_INIT,"ncclIbGdrSupport: GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol");
return ncclSystemError;
}
return ncclSuccess;
diff --git a/tools/ib-test/include/nccl.h b/tools/ib-test/include/nccl.h
index 2c86c33..5801c61 100755
--- a/tools/ib-test/include/nccl.h
+++ b/tools/ib-test/include/nccl.h
@@ -31,7 +31,7 @@ extern "C" {
typedef struct ncclComm* ncclComm_t;
#define NCCL_UNIQUE_ID_BYTES 128
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
/* Error type */
typedef enum { ncclSuccess = 0,
diff --git a/tools/topo_expl/include/nccl.h b/tools/topo_expl/include/nccl.h
index 729561b..4e4bdd9 100644
--- a/tools/topo_expl/include/nccl.h
+++ b/tools/topo_expl/include/nccl.h
@@ -35,7 +35,7 @@ typedef struct ncclComm* ncclComm_t;
#define NCCL_COMM_NULL NULL
#define NCCL_UNIQUE_ID_BYTES 128
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
/*! @brief Error type */
typedef enum { ncclSuccess = 0,

View File

@@ -0,0 +1,10 @@
--- a/test/common/TestBed.cpp
+++ b/test/common/TestBed.cpp
@@ -4,6 +4,7 @@
* See LICENSE.txt for license information
************************************************************************/
#include <unistd.h>
+#include <iomanip>
#include "TestBed.hpp"
#include <rccl/rccl.h>

View File

@@ -0,0 +1,146 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
amdsmi,
rocm-smi,
rocm-runtime,
libcap,
libdrm,
grpc,
protobuf,
openssl,
doxygen,
graphviz,
texliveSmall,
gtest,
buildDocs ? true,
buildTests ? false,
}:
let
latex = lib.optionalAttrs buildDocs (
texliveSmall.withPackages (
ps: with ps; [
changepage
latexmk
varwidth
multirow
hanging
adjustbox
collectbox
stackengine
enumitem
alphalph
wasysym
sectsty
tocloft
newunicodechar
etoc
helvetic
wasy
courier
]
)
);
in
stdenv.mkDerivation (finalAttrs: {
pname = "rdc";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildDocs [
"doc"
]
++ lib.optionals buildTests [
"test"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rdc";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-HkGumwag7mDERHiWwZ7cRQz0tzH+vIovY1HgX2g69d4=";
};
nativeBuildInputs =
[
cmake
protobuf
]
++ lib.optionals buildDocs [
doxygen
graphviz
latex
];
buildInputs =
[
amdsmi
rocm-smi
rocm-runtime
libcap
libdrm
grpc
openssl
]
++ lib.optionals buildTests [
gtest
];
CXXFLAGS = "-I${libcap.dev}/include";
cmakeFlags =
[
"-DCMAKE_VERBOSE_MAKEFILE=OFF"
"-DRDC_INSTALL_PREFIX=${placeholder "out"}"
"-DBUILD_ROCRTEST=ON"
"-DRSMI_INC_DIR=${rocm-smi}/include"
"-DRSMI_LIB_DIR=${rocm-smi}/lib"
"-DGRPC_ROOT=${grpc}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DCMAKE_INSTALL_LIBEXECDIR=libexec"
"-DCMAKE_INSTALL_DOCDIR=doc"
]
++ lib.optionals buildTests [
"-DBUILD_TESTS=ON"
];
postPatch = ''
substituteInPlace CMakeLists.txt \
--replace "file(STRINGS /etc/os-release LINUX_DISTRO LIMIT_COUNT 1 REGEX \"NAME=\")" "set(LINUX_DISTRO \"NixOS\")"
'';
postInstall =
''
find $out/bin -executable -type f -exec \
patchelf {} --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE" \;
''
+ lib.optionalString buildTests ''
mkdir -p $test
mv $out/bin/rdctst_tests $test/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Simplifies administration and addresses infrastructure challenges in cluster and datacenter environments";
homepage = "https://github.com/ROCm/rdc";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,135 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocblas,
rocsparse,
rocprim,
rocrand,
clr,
git,
pkg-config,
openmp,
openmpi,
gtest,
buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
gpuTargets ? [ ], # gpuTargets = [ "gfx803" "gfx900:xnack-" "gfx906:xnack-" ... ]
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocalution";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
]
++ lib.optionals buildSamples [
"sample"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocALUTION";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-bZx1Cc2jcIfysohKCKzj5mowM3IeCelRhVaBU73KnTo=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
git
pkg-config
];
buildInputs =
[
rocblas
rocsparse
rocprim
rocrand
openmp
openmpi
]
++ lib.optionals buildTests [
gtest
];
CXXFLAGS = "-I${openmp.dev}/include";
cmakeFlags =
[
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
"-DROCM_PATH=${clr}"
"-DHIP_ROOT_DIR=${clr}"
"-DSUPPORT_HIP=ON"
"-DSUPPORT_OMP=ON"
"-DSUPPORT_MPI=ON"
"-DBUILD_CLIENTS_SAMPLES=${if buildSamples then "ON" else "OFF"}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}"
"-DGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_BENCHMARKS=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/rocalution-test $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/rocalution-bench $benchmark/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv clients/staging/* $sample/bin
rm $sample/bin/rocalution-test || true
rm $sample/bin/rocalution-bench || true
patchelf --set-rpath \
$out/lib:${lib.makeLibraryPath (finalAttrs.buildInputs ++ [ clr ])} \
$sample/bin/*
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Iterative sparse solvers for ROCm";
homepage = "https://github.com/ROCm/rocALUTION";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,200 @@
{
lib,
stdenv,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
python3,
tensile,
msgpack,
libxml2,
gtest,
gfortran,
openmp,
git,
amd-blis,
zstd,
hipblas-common,
hipblaslt,
python3Packages,
rocm-smi,
libdrm,
roctracer,
buildTensile ? true,
buildTests ? true,
buildBenchmarks ? true,
# https://github.com/ROCm/Tensile/issues/1757
# Allows gfx101* users to use rocBLAS normally.
# Turn the below two values to `true` after the fix has been cherry-picked
# into a release. Just backporting that single fix is not enough because it
# depends on some previous commits.
tensileSepArch ? true,
tensileLazyLib ? true,
withHipBlasLt ? true,
# `gfx940`, `gfx941` are not present in this list because they are early
# engineering samples, and all final MI300 hardware are `gfx942`:
# https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130
#
# `gfx1012` is not present in this list because the ISA compatibility patches
# would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will
# always try to use `gfx1010` code objects, hence building for `gfx1012` is
# useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152
gpuTargets ? (
clr.localGpuTargets or [
"gfx1010"
# "gfx1012"
# "gfx1030"
# "gfx1100"
# "gfx1101"
# "gfx1102"
"gfx1201"
]
),
}:
let
gpuTargets' = lib.concatStringsSep ";" gpuTargets;
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocblas${clr.gpuArchSuffix}";
version = "6.4.1";
outputs = [
"out"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocBLAS";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-To5V5bydYR5iuUxkwpx79RrNdncvWmR/v/w9VnlKBq4=";
};
nativeBuildInputs =
[
cmake
# no ninja, it buffers console output and nix times out long periods of no output
rocm-cmake
clr
git
]
++ lib.optionals buildTensile [
tensile
];
buildInputs =
[
python3
hipblas-common
]
++ lib.optionals withHipBlasLt [
hipblaslt
]
++ lib.optionals buildTensile [
zstd
msgpack
libxml2
python3Packages.msgpack
python3Packages.zstandard
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals (buildTests || buildBenchmarks) [
gfortran
openmp
amd-blis
rocm-smi
]
++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [
python3Packages.pyyaml
];
dontStrip = true;
env.CXXFLAGS =
"-O3 -DNDEBUG -I${hipblas-common}/include -I${roctracer}/include -I${libdrm.dev}/include"
+ lib.optionalString (buildTests || buildBenchmarks) " -I${amd-blis}/include/blis";
# Fails to link tests if we don't add amd-blis libs
env.LDFLAGS =
"-L${roctracer}/lib -L${libdrm.dev}/lib"
+ lib.optionalString (
buildTests || buildBenchmarks
) " -Wl,--as-needed -L${amd-blis}/lib -lblis-mt -lcblas";
env.TENSILE_ROCM_ASSEMBLER_PATH = "${stdenv.cc}/bin/clang++";
cmakeFlags =
[
(lib.cmakeFeature "CMAKE_BUILD_TYPE" "Release")
(lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true)
(lib.cmakeFeature "CMAKE_EXECUTE_PROCESS_COMMAND_ECHO" "STDERR")
(lib.cmakeFeature "CMAKE_Fortran_COMPILER" "${lib.getBin gfortran}/bin/gfortran")
(lib.cmakeFeature "CMAKE_Fortran_COMPILER_AR" "${lib.getBin gfortran}/bin/ar")
(lib.cmakeFeature "CMAKE_Fortran_COMPILER_RANLIB" "${lib.getBin gfortran}/bin/ranlib")
(lib.cmakeFeature "python" "python3")
(lib.cmakeFeature "SUPPORTED_TARGETS" gpuTargets')
(lib.cmakeFeature "AMDGPU_TARGETS" gpuTargets')
(lib.cmakeFeature "GPU_TARGETS" gpuTargets')
(lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile)
(lib.cmakeBool "ROCM_SYMLINK_LIBS" false)
(lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas")
(lib.cmakeBool "BUILD_WITH_HIPBLASLT" withHipBlasLt)
(lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests)
(lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks)
(lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildBenchmarks)
(lib.cmakeBool "BUILD_OFFLOAD_COMPRESS" true)
# Temporarily set variables to work around upstream CMakeLists issue
# Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DCMAKE_INSTALL_LIBDIR=lib"
]
++ lib.optionals buildTensile [
"-DCPACK_SET_DESTDIR=OFF"
"-DLINK_BLIS=ON"
"-DTensile_CODE_OBJECT_VERSION=default"
"-DTensile_LOGIC=asm_full"
"-DTensile_LIBRARY_FORMAT=msgpack"
(lib.cmakeBool "BUILD_WITH_PIP" false)
(lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch)
(lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib)
];
passthru.amdgpu_targets = gpuTargets';
patches = [
# (fetchpatch {
# name = "Extend-rocBLAS-HIP-ISA-compatibility.patch";
# url = "https://github.com/GZGavinZhao/rocBLAS/commit/89b75ff9cc731f71f370fad90517395e117b03bb.patch";
# hash = "sha256-W/ohOOyNCcYYLOiQlPzsrTlNtCBdJpKVxO8s+4G7sjo=";
# })
];
# Pass $NIX_BUILD_CORES to Tensile
postPatch = ''
substituteInPlace cmake/build-options.cmake \
--replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
# substituteInPlace CMakeLists.txt \
# --replace-fail "4.42.0" "4.43.0"
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
enableParallelBuilding = true;
requiredSystemFeatures = [ "big-parallel" ];
meta = with lib; {
description = "BLAS implementation for ROCm platform";
homepage = "https://github.com/ROCm/rocBLAS";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,114 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
git,
rocm-comgr,
rocm-runtime,
hwdata,
texliveSmall,
doxygen,
graphviz,
buildDocs ? true,
}:
let
latex = lib.optionalAttrs buildDocs (
texliveSmall.withPackages (
ps: with ps; [
changepage
latexmk
varwidth
multirow
hanging
adjustbox
collectbox
stackengine
enumitem
alphalph
wasysym
sectsty
tocloft
newunicodechar
etoc
helvetic
wasy
courier
]
)
);
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocdbgapi";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildDocs [
"doc"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "ROCdbgapi";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-Rr8+SNeFps0rjk4Jn2+rFmtRJfL42l0tNOz13oZQy+I=";
};
nativeBuildInputs =
[
cmake
rocm-cmake
git
]
++ lib.optionals buildDocs [
latex
doxygen
graphviz
];
buildInputs = [
rocm-comgr
rocm-runtime
hwdata
];
cmakeFlags = [
"-DPCI_IDS_PATH=${hwdata}/share/hwdata"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
# Unfortunately, it seems like we have to call make on this manually
postBuild = lib.optionalString buildDocs ''
export HOME=$(mktemp -d)
make -j$NIX_BUILD_CORES doc
'';
postInstall = lib.optionalString buildDocs ''
mv $out/share/html/amd-dbgapi $doc/share/doc/amd-dbgapi/html
rmdir $out/share/html
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Debugger support for control of execution and inspection state";
homepage = "https://github.com/ROCm/ROCdbgapi";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,174 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
clr,
python3,
rocm-cmake,
sqlite,
boost,
fftw,
fftwFloat,
gtest,
openmp,
rocrand,
gpuTargets ? clr.localGpuTargets or clr.gpuTargets,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocfft${clr.gpuArchSuffix}";
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocFFT";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-z8O//2lihXeVnYZklR8uUIgCS9RyNNRb+hM5keR5aYA=";
};
nativeBuildInputs = [
cmake
clr
python3
rocm-cmake
];
# FIXME: rocfft_aot_helper runs at the end of the build and has a risk of timing it out
# due to a long period with no terminal output
buildInputs = [ sqlite ];
cmakeFlags =
[
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
"-DSQLITE_USE_SYSTEM_PACKAGE=ON"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
];
passthru = {
test = stdenv.mkDerivation {
pname = "${finalAttrs.pname}-test";
inherit (finalAttrs) version src;
sourceRoot = "${finalAttrs.src.name}/clients/tests";
nativeBuildInputs = [
cmake
clr
rocm-cmake
];
buildInputs = [
boost
fftw
fftwFloat
finalAttrs.finalPackage
gtest
openmp
rocrand
];
cmakeFlags = [
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
];
postInstall = ''
rm -r "$out/lib/fftw"
rmdir "$out/lib"
'';
};
benchmark = stdenv.mkDerivation {
pname = "${finalAttrs.pname}-benchmark";
inherit (finalAttrs) version src;
sourceRoot = "${finalAttrs.src.name}/clients/rider";
nativeBuildInputs = [
cmake
clr
rocm-cmake
];
buildInputs = [
boost
finalAttrs.finalPackage
openmp
(python3.withPackages (
ps: with ps; [
pandas
scipy
]
))
rocrand
];
cmakeFlags = [
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
];
postInstall = ''
cp -a ../../../scripts/perf "$out/bin"
'';
};
samples = stdenv.mkDerivation {
pname = "${finalAttrs.pname}-samples";
inherit (finalAttrs) version src;
sourceRoot = "${finalAttrs.src.name}/clients/samples";
nativeBuildInputs = [
cmake
clr
rocm-cmake
];
buildInputs = [
boost
finalAttrs.finalPackage
openmp
rocrand
];
cmakeFlags = [
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
];
installPhase = ''
runHook preInstall
mkdir "$out"
cp -a bin "$out"
runHook postInstall
'';
};
updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
};
requiredSystemFeatures = [ "big-parallel" ];
meta = with lib; {
description = "FFT implementation for ROCm";
homepage = "https://github.com/ROCm/rocFFT";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,117 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
pkg-config,
texinfo,
bison,
flex,
glibc,
zlib,
zstd,
gmp,
mpfr,
ncurses,
expat,
rocdbgapi,
perl,
python3,
babeltrace,
sourceHighlight,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocgdb";
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "ROCgdb";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-evDWg2w2FHv6OU5BQOCAXTlDm7JpwdJ3Wh5a2i5r1gQ=";
};
nativeBuildInputs = [
pkg-config
texinfo # For makeinfo
bison
flex
perl # used in mkinstalldirs script during installPhase
python3
];
buildInputs = [
zlib
zstd
gmp
mpfr
ncurses
expat
rocdbgapi
python3
babeltrace
sourceHighlight
];
configureFlags = [
# Ensure we build the amdgpu target
"--enable-targets=${stdenv.targetPlatform.config},amdgcn-amd-amdhsa"
"--with-amd-dbgapi=yes"
"--with-iconv-path=${glibc.bin}"
"--enable-tui"
"--with-babeltrace=${babeltrace}"
"--with-python=python3"
"--with-system-zlib"
"--with-system-zstd"
"--enable-64-bit-bfd"
"--with-gmp=${gmp.dev}"
"--with-mpfr=${mpfr.dev}"
"--with-expat=${expat}"
# So the installed binary is called "rocgdb" instead on plain "gdb"
"--program-prefix=roc"
# Disable building many components not used or incompatible with the amdgcn target
"--disable-sim"
"--disable-gdbserver"
"--disable-ld"
"--disable-gas"
"--disable-gdbserver"
"--disable-gdbtk"
"--disable-gprofng"
"--disable-shared"
];
postPatch = ''
for file in *; do
if [ -f "$file" ]; then
patchShebangs "$file"
fi
done
'';
# The source directory for ROCgdb (based on upstream GDB) contains multiple project
# of GNUs toolchain (binutils and onther), we only need to install the GDB part.
installPhase = ''
make install-gdb
'';
env.CFLAGS = "-Wno-switch -Wno-format-nonliteral -I${zstd.dev}/include -I${zlib.dev}/include -I${expat.dev}/include -I${ncurses.dev}/include";
env.CXXFLAGS = finalAttrs.env.CFLAGS;
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "ROCm source-level debugger for Linux, based on GDB";
homepage = "https://github.com/ROCm/ROCgdb";
license = licenses.gpl3Plus;
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,38 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
rocm-core,
cmake,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-cmake";
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocm-cmake";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-wAipNWAB66YNf7exLSNPAzg3NgkGD9LPKfKiulL5yak=";
};
nativeBuildInputs = [ cmake ];
buildInputs = [ rocm-core ];
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "CMake modules for common build tasks for the ROCm stack";
homepage = "https://github.com/ROCm/rocm-cmake";
license = licenses.mit;
teams = [ teams.rocm ];
platforms = platforms.unix;
};
})

View File

@@ -0,0 +1,73 @@
{
lib,
stdenv,
fetchpatch,
cmake,
python3,
rocm-merged-llvm,
rocm-device-libs,
zlib,
zstd,
libxml2,
}:
let
llvmNativeTarget =
if stdenv.hostPlatform.isx86_64 then
"X86"
else if stdenv.hostPlatform.isAarch64 then
"AArch64"
else
throw "Unsupported ROCm LLVM platform";
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-comgr";
# In-tree with ROCm LLVM
inherit (rocm-merged-llvm) version;
src = rocm-merged-llvm.llvm-src;
sourceRoot = "${finalAttrs.src.name}/amd/comgr";
patches = [
# [Comgr] Extend ISA compatibility
# (fetchpatch {
# sha256 = "sha256-dgow0kwSWM1TnkqWOZDRQrh5nuF8p5jbYyOLCpQsH4k=";
# url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/a439e4f37ce71de48d4a979594276e3be0e6278f.patch";
# relative = "amd/comgr";
# })
#[Comgr] Extend ISA compatibility for CCOB
# (fetchpatch {
# sha256 = "sha256-6Rwz12Lk4R2JK3olii3cr2Zd0ZLYe7VSpK1YRCOsJWY=";
# url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/2d8c459a4d4c0567a7a275b4b54560d88e5c6919.patch";
# relative = "amd/comgr";
# })
];
nativeBuildInputs = [
cmake
python3
];
buildInputs = [
rocm-device-libs
libxml2
zlib
zstd
rocm-merged-llvm
];
cmakeFlags = [
"-DCMAKE_VERBOSE_MAKEFILE=ON"
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
];
meta = with lib; {
description = "APIs for compiling and inspecting AMDGPU code objects";
homepage = "https://github.com/ROCm/ROCm-CompilerSupport/tree/amd-stg-open/lib/comgr";
license = licenses.ncsa;
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,53 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
writeText,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-core";
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocm-core";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-dDTCEAbeA88deLgUbdbulaHpHI9zcTze0mZeS49TsAM=";
};
nativeBuildInputs = [ cmake ];
# FIXME: What's the correct way to set this?
env.ROCM_LIBPATCH_VERSION = "${lib.versions.major finalAttrs.version}0${lib.versions.minor finalAttrs.version}0${lib.versions.patch finalAttrs.version}";
env.BUILD_ID = "nixos-${finalAttrs.env.ROCM_LIBPATCH_VERSION}";
env.ROCM_BUILD_ID = "release-${finalAttrs.env.BUILD_ID}";
cmakeFlags = [
"-DROCM_LIBPATCH_VERSION=${finalAttrs.env.ROCM_LIBPATCH_VERSION}"
"-DROCM_VERSION=${finalAttrs.version}"
"-DBUILD_ID=${finalAttrs.env.BUILD_ID}"
];
setupHook = writeText "setupHook.sh" ''
export ROCM_LIBPATCH_VERSION="${finalAttrs.env.ROCM_LIBPATCH_VERSION}"
export BUILD_ID="${finalAttrs.env.BUILD_ID}"
export ROCM_BUILD_ID="${finalAttrs.env.ROCM_BUILD_ID}"
'';
passthru.ROCM_LIBPATCH_VERSION = finalAttrs.env.ROCM_LIBPATCH_VERSION;
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
page = "tags?per_page=4";
};
meta = with lib; {
description = "Utility for getting the ROCm release version";
homepage = "https://github.com/ROCm/rocm-core";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,43 @@
diff --git a/cmake/Packages.cmake b/cmake/Packages.cmake
index 07c60eb..c736b3e 100644
--- a/amd/device-libs/cmake/Packages.cmake
+++ b/amd/device-libs/cmake/Packages.cmake
@@ -12,24 +12,29 @@ set_target_properties(${target} PROPERTIES
IMPORTED_LOCATION \"${target_path}\")")
endforeach()
configure_file(AMDDeviceLibsConfig.cmake.in
- ${PACKAGE_PREFIX}/AMDDeviceLibsConfig.cmake
+ lib/cmake/AMDDeviceLibs/AMDDeviceLibsConfig.cmake
@ONLY)
set(install_path_suffix "amdgcn/bitcode")
# Generate the install-tree package.
-# We do not know the absolute path to the intall tree until we are installed,
-# so we calculate it dynamically in AMD_DEVICE_LIBS_PREFIX_CODE and use
-# relative paths in the target imports in AMD_DEVICE_LIBS_TARGET_CODE.
-set(AMD_DEVICE_LIBS_PREFIX_CODE "
+if(IS_ABSOLUTE "${CMAKE_INSTALL_PREFIX}")
+ set(AMD_DEVICE_LIBS_PREFIX_CODE "set(AMD_DEVICE_LIBS_PREFIX \"${CMAKE_INSTALL_PREFIX}\")")
+else()
+ # We do not know the absolute path to the install tree until we are installed,
+ # so we calculate it dynamically in AMD_DEVICE_LIBS_PREFIX_CODE and use
+ # relative paths in the target imports in AMD_DEVICE_LIBS_TARGET_CODE.
+ set(AMD_DEVICE_LIBS_PREFIX_CODE "
# Derive absolute install prefix from config file path.
get_filename_component(AMD_DEVICE_LIBS_PREFIX \"\${CMAKE_CURRENT_LIST_FILE}\" PATH)")
-string(REGEX REPLACE "/" ";" count "${PACKAGE_PREFIX}")
-foreach(p ${count})
- set(AMD_DEVICE_LIBS_PREFIX_CODE "${AMD_DEVICE_LIBS_PREFIX_CODE}
+ string(REGEX REPLACE "/" ";" count "${PACKAGE_PREFIX}")
+ foreach(p ${count})
+ set(AMD_DEVICE_LIBS_PREFIX_CODE "${AMD_DEVICE_LIBS_PREFIX_CODE}
get_filename_component(AMD_DEVICE_LIBS_PREFIX \"\${AMD_DEVICE_LIBS_PREFIX}\" PATH)")
-endforeach()
+ endforeach()
+endif()
+
set(AMD_DEVICE_LIBS_TARGET_CODE)
foreach(target ${AMDGCN_LIB_LIST})
get_target_property(target_name ${target} ARCHIVE_OUTPUT_NAME)

View File

@@ -0,0 +1,62 @@
{
lib,
stdenv,
cmake,
ninja,
libxml2,
zlib,
zstd,
ncurses,
rocm-merged-llvm,
python3,
}:
let
llvmNativeTarget =
if stdenv.hostPlatform.isx86_64 then
"X86"
else if stdenv.hostPlatform.isAarch64 then
"AArch64"
else
throw "Unsupported ROCm LLVM platform";
in
stdenv.mkDerivation {
pname = "rocm-device-libs";
# In-tree with ROCm LLVM
inherit (rocm-merged-llvm) version;
src = rocm-merged-llvm.llvm-src;
postPatch = ''
cd amd/device-libs
'';
patches = [ ./cmake.patch ];
nativeBuildInputs = [
cmake
ninja
python3
];
buildInputs = [
libxml2
zlib
zstd
ncurses
rocm-merged-llvm
];
cmakeFlags = [
"-DCMAKE_RELEASE_TYPE=Release"
"-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
];
meta = with lib; {
description = "Set of AMD-specific device-side language runtime libraries";
homepage = "https://github.com/ROCm/ROCm-Device-Libs";
license = licenses.ncsa;
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
}

View File

@@ -0,0 +1,71 @@
{
lib,
fetchFromGitHub,
gitUpdater,
buildPythonPackage,
setuptools,
beautifulsoup4,
gitpython,
pydata-sphinx-theme,
pygithub,
sphinx,
breathe,
myst-nb,
myst-parser,
sphinx-book-theme,
sphinx-copybutton,
sphinx-design,
sphinx-external-toc,
sphinx-notfound-page,
pyyaml,
fastjsonschema,
}:
# FIXME: Move to rocmPackages_common
buildPythonPackage rec {
pname = "rocm-docs-core";
version = "1.19.0";
format = "pyproject";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocm-docs-core";
rev = "v${version}";
hash = "sha256-vmtOf9e8RhWQ0ecL+Sn1HJGK+ILCaQxeQvUkQ8W8YX8=";
};
buildInputs = [ setuptools ];
propagatedBuildInputs = [
beautifulsoup4
gitpython
pydata-sphinx-theme
pygithub
sphinx
breathe
myst-nb
myst-parser
sphinx-book-theme
sphinx-copybutton
sphinx-design
sphinx-external-toc
sphinx-notfound-page
pyyaml
fastjsonschema
];
pythonImportsCheck = [ "rocm_docs" ];
passthru.updateScript = gitUpdater { rev-prefix = "v"; };
meta = with lib; {
description = "ROCm Documentation Python package for ReadTheDocs build standardization";
homepage = "https://github.com/ROCm/rocm-docs-core";
license = with licenses; [
mit
cc-by-40
];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
}

View File

@@ -0,0 +1,27 @@
{
symlinkJoin,
linkFarm,
clr,
hipblas,
hipblas-common,
rocblas,
rocsolver,
rocsparse,
rocm-device-libs,
rocm-smi,
llvm,
}:
symlinkJoin {
name = "rocm-path-${clr.version}";
paths = [
clr
hipblas-common
hipblas
rocblas
rocsolver
rocsparse
rocm-device-libs
rocm-smi
(linkFarm "rocm-llvm-subdir" { llvm = llvm.clang; })
];
}

View File

@@ -0,0 +1,115 @@
{
lib,
stdenv,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
pkg-config,
cmake,
ninja,
xxd,
rocm-device-libs,
elfutils,
libdrm,
numactl,
valgrind,
libxml2,
rocm-merged-llvm,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-runtime";
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "ROCR-Runtime";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-LOILnvjGwlLoB99+TdZib7VJsgp45yGJiEPgrlwXItI=";
};
env.CFLAGS = "-I${numactl.dev}/include -I${elfutils.dev}/include -w";
env.CXXFLAGS = "-I${numactl.dev}/include -I${elfutils.dev}/include -w";
nativeBuildInputs = [
pkg-config
cmake
ninja
xxd
rocm-merged-llvm
];
buildInputs = [
elfutils
libdrm
numactl
# without valgrind, additional work for "kCodeCopyAligned11" is done in the installPhase
valgrind
libxml2
];
cmakeFlags = [
"-DBUILD_SHARED_LIBS=ON"
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
patches = [
# Patches for UB at runtime https://github.com/ROCm/ROCR-Runtime/issues/272
# (fetchpatch {
# # [PATCH] hsa-runtime: set underlying type of hsa_region_info_t and hsa_amd_region_info_t to int
# url = "https://github.com/ROCm/ROCR-Runtime/commit/39a6a168fa07e289a10f6e20e6ead4e303e99ba0.patch";
# hash = "sha256-CshJJDvII1nNyNmt+YjwMwfBHUTlrdsxkhwfgBwO+WE=";
# })
# (fetchpatch {
# # [PATCH] rocr: refactor of runtime.cpp based on Coverity
# url = "https://github.com/ROCm/ROCR-Runtime/commit/441bd9fe6c7bdb5c4c31f71524ed642786bc923e.patch";
# hash = "sha256-7bQXxGkipzgT2aXRxCuh3Sfmo/zc/IOmA0x1zB+fMb0=";
# })
(fetchpatch {
# [PATCH] queues: fix UB due to 1 << 31
url = "https://github.com/ROCm/ROCR-Runtime/commit/9b8a0f5dbee1903fa990a7d8accc1c5fbc549636.patch";
hash = "sha256-KlZWjfngH8yKly08iwC+Bzpvp/4dkaTpRIKdFYwRI+U=";
})
(fetchpatch {
# [PATCH] topology: fix UB due to 1 << 31
url = "https://github.com/ROCm/ROCR-Runtime/commit/d1d00bfee386d263e13c2b64fb6ffd1156deda7c.patch";
hash = "sha256-u70WEZaphQ7qTfgQPFATwdKWtHytu7CFH7Pzv1rOM8w=";
})
(fetchpatch {
# [PATCH] kfd_ioctl: fix UB due to 1 << 31
url = "https://github.com/ROCm/ROCR-Runtime/commit/41bfc66aef437a5b349f71105fa4b907cc7e17d5.patch";
hash = "sha256-A7VhPR3eSsmjq2cTBSjBIz9i//WiNjoXm0EsRKtF+ns=";
})
./remove-hsa-aqlprofile-dep.patch
];
postPatch = ''
patchShebangs --build \
runtime/hsa-runtime/core/runtime/trap_handler/create_trap_handler_header.sh \
runtime/hsa-runtime/core/runtime/blit_shaders/create_blit_shader_header.sh \
runtime/hsa-runtime/image/blit_src/create_hsaco_ascii_file.sh
patchShebangs --host image core runtime
substituteInPlace CMakeLists.txt \
--replace 'hsa/include/hsa' 'include/hsa'
export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode"
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Platform runtime for ROCm";
homepage = "https://github.com/ROCm/ROCR-Runtime";
license = with licenses; [ ncsa ];
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,27 @@
libhsa-amd-aqlprofile64 library is unfree
Bug: https://github.com/ROCm/ROCm/issues/1781
--- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp
+++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp
@@ -1333,11 +1333,6 @@ hsa_status_t GpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const {
setFlag(HSA_EXTENSION_AMD_PC_SAMPLING);
}
- if (os::LibHandle lib = os::LoadLib(kAqlProfileLib)) {
- os::CloseLib(lib);
- setFlag(HSA_EXTENSION_AMD_AQLPROFILE);
- }
-
setFlag(HSA_EXTENSION_AMD_PROFILER);
break;
--- a/runtime/hsa-runtime/core/runtime/hsa.cpp
+++ b/runtime/hsa-runtime/core/runtime/hsa.cpp
@@ -490,7 +490,7 @@ hsa_status_t hsa_system_get_major_extension_table(uint16_t extension, uint16_t v
return HSA_STATUS_SUCCESS;
}
- if (extension == HSA_EXTENSION_AMD_AQLPROFILE) {
+ if (0) {
if (version_major != hsa_ven_amd_aqlprofile_VERSION_MAJOR) {
debug_print("aqlprofile API incompatible ver %d, current ver %d\n",
version_major, hsa_ven_amd_aqlprofile_VERSION_MAJOR);

View File

@@ -0,0 +1,89 @@
diff --git a/rocm_smi-backward-compat.cmake b/rocm_smi-backward-compat.cmake
index aa8fd9c..59afce5 100644
--- a/rocm_smi-backward-compat.cmake
+++ b/rocm_smi-backward-compat.cmake
@@ -72,7 +72,12 @@ function(generate_wrapper_header)
set(include_guard "${include_guard}COMGR_WRAPPER_INCLUDE_${INC_GAURD_NAME}_H")
#set #include statement
get_filename_component(file_name ${header_file} NAME)
- set(include_statements "${include_statements}#include \"../../../${CMAKE_INSTALL_INCLUDEDIR}/${ROCM_SMI}/${file_name}\"\n")
+ if(IS_ABSOLUTE ${CMAKE_INSTALL_INCLUDEDIR})
+ set(include_dir "${CMAKE_INSTALL_INCLUDEDIR}")
+ else()
+ set(include_dir "../../../${CMAKE_INSTALL_INCLUDEDIR}")
+ endif()
+ set(include_statements "${include_statements}#include \"${include_dir}/${ROCM_SMI}/${file_name}\"\n")
configure_file(${RSMI_WRAPPER_DIR}/header.hpp.in ${RSMI_WRAPPER_INC_DIR}/${file_name})
unset(include_guard)
unset(include_statements)
@@ -90,7 +95,12 @@ function(generate_wrapper_header)
set(include_guard "${include_guard}COMGR_WRAPPER_INCLUDE_${INC_GAURD_NAME}_H")
#set #include statement
get_filename_component(file_name ${header_file} NAME)
- set(include_statements "${include_statements}#include \"../../../${CMAKE_INSTALL_INCLUDEDIR}/${OAM_TARGET_NAME}/${file_name}\"\n")
+ if(IS_ABSOLUTE ${CMAKE_INSTALL_INCLUDEDIR})
+ set(include_dir "${CMAKE_INSTALL_INCLUDEDIR}")
+ else()
+ set(include_dir "../../../${CMAKE_INSTALL_INCLUDEDIR}")
+ endif()
+ set(include_statements "${include_statements}#include \"${include_dir}/${OAM_TARGET_NAME}/${file_name}\"\n")
configure_file(${RSMI_WRAPPER_DIR}/header.hpp.in ${OAM_WRAPPER_INC_DIR}/${file_name})
unset(include_guard)
unset(include_statements)
@@ -123,11 +133,16 @@ function(create_library_symlink)
set(library_files "${LIB_RSMI}")
endif()
+ if(IS_ABSOLUTE ${CMAKE_INSTALL_LIBDIR})
+ set(install_libdir "${CMAKE_INSTALL_LIBDIR}")
+ else()
+ set(install_libdir "../../${CMAKE_INSTALL_LIBDIR}")
+ endif()
foreach(file_name ${library_files})
add_custom_target(link_${file_name} ALL
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink
- ../../${CMAKE_INSTALL_LIBDIR}/${file_name} ${RSMI_WRAPPER_LIB_DIR}/${file_name})
+ ${install_libdir}/${file_name} ${RSMI_WRAPPER_LIB_DIR}/${file_name})
endforeach()
file(MAKE_DIRECTORY ${OAM_WRAPPER_LIB_DIR})
@@ -151,11 +166,16 @@ function(create_library_symlink)
set(library_files "${LIB_OAM}")
endif()
+ if(IS_ABSOLUTE ${CMAKE_INSTALL_LIBDIR})
+ set(install_libdir "${CMAKE_INSTALL_LIBDIR}")
+ else()
+ set(install_libdir "../../${CMAKE_INSTALL_LIBDIR}")
+ endif()
foreach(file_name ${library_files})
add_custom_target(link_${file_name} ALL
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink
- ../../${CMAKE_INSTALL_LIBDIR}/${file_name} ${OAM_WRAPPER_LIB_DIR}/${file_name})
+ ${install_libdir}/${file_name} ${OAM_WRAPPER_LIB_DIR}/${file_name})
endforeach()
endfunction()
diff --git a/rocm_smi/CMakeLists.txt b/rocm_smi/CMakeLists.txt
index c594eeb..d3ed39d 100755
--- a/rocm_smi/CMakeLists.txt
+++ b/rocm_smi/CMakeLists.txt
@@ -105,10 +105,15 @@ endif ()
#file reorganization changes
#rocm_smi.py moved to libexec/rocm_smi. so creating rocm-smi symlink
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
+if(IS_ABSOLUTE ${CMAKE_INSTALL_LIBEXECDIR})
+ set(install_libexecdir "${CMAKE_INSTALL_LIBEXECDIR}")
+else()
+ set(install_libexecdir "../${CMAKE_INSTALL_LIBEXECDIR}")
+endif()
add_custom_target(link-rocm-smi ALL
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink
- ../${CMAKE_INSTALL_LIBEXECDIR}/${ROCM_SMI}/rocm_smi.py ${CMAKE_CURRENT_BINARY_DIR}/bin/rocm-smi)
+ ${install_libexecdir}/${ROCM_SMI}/rocm_smi.py ${CMAKE_CURRENT_BINARY_DIR}/bin/rocm-smi)
## Add the install directives for the runtime library.
install(TARGETS ${ROCM_SMI_TARGET}

View File

@@ -0,0 +1,58 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
wrapPython,
libdrm,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-smi";
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocm_smi_lib";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-qshAMVhHJIA06fGOTJx5/l1t8wKv7KbmvdCSKNuEIKM=";
};
patches = [ ./cmake.patch ];
nativeBuildInputs = [
cmake
wrapPython
libdrm
];
cmakeFlags = [
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
postInstall = ''
wrapPythonProgramsIn $out
mv $out/libexec/rocm_smi/.rsmiBindingsInit.py-wrapped $out/libexec/rocm_smi/rsmiBindingsInit.py
mv $out/libexec/rocm_smi/.rsmiBindings.py-wrapped $out/libexec/rocm_smi/rsmiBindings.py
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "System management interface for AMD GPUs supported by ROCm";
homepage = "https://github.com/ROCm/rocm_smi_lib";
license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = [ "x86_64-linux" ];
};
})

View File

@@ -0,0 +1,32 @@
{
clr,
ollama,
python3Packages,
rocmPackages,
magma-hip,
emptyDirectory,
stdenv,
}:
# This package exists purely to have a bunch of passthru.tests attrs
stdenv.mkDerivation {
name = "rocm-tests";
nativeBuildInputs = [
clr
];
src = emptyDirectory;
postInstall = "mkdir -p $out";
passthru.tests = {
ollama = ollama.override {
inherit rocmPackages;
acceleration = "rocm";
};
torch = python3Packages.torch.override {
inherit rocmPackages;
rocmSupport = true;
cudaSupport = false;
magma-hip = magma-hip.override {
inherit rocmPackages;
};
};
};
}

View File

@@ -0,0 +1,65 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocm-runtime,
busybox,
python3,
gnugrep,
clr, # Only for localGpuTargets
# rocminfo requires that the calling user have a password and be in
# the video group. If we let rocm_agent_enumerator rely upon
# rocminfo's output, then it, too, has those requirements. Instead,
# we can specify the GPU targets for this system (e.g. "gfx803" for
# Polaris) such that no system call is needed for downstream
# compilers to determine the desired target.
defaultTargets ? (clr.localGpuTargets or [ ]),
}:
stdenv.mkDerivation (finalAttrs: {
version = "6.4.1";
pname = "rocminfo";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocminfo";
rev = "rocm-${finalAttrs.version}";
sha256 = "sha256-YscZ5sFsLOVBg98w2X6vTzniTvl9NfCkIE+HAH6vv5Y=";
};
nativeBuildInputs = [
cmake
rocm-cmake
];
buildInputs = [ rocm-runtime ];
propagatedBuildInputs = [ python3 ];
cmakeFlags = [ "-DROCRTST_BLD_TYPE=Release" ];
prePatch = ''
patchShebangs rocm_agent_enumerator
sed 's,lsmod | grep ,${busybox}/bin/lsmod | ${gnugrep}/bin/grep ,' -i rocminfo.cc
'';
postInstall = lib.optionalString (defaultTargets != [ ]) ''
echo '${lib.concatStringsSep "\n" defaultTargets}' > $out/bin/target.lst
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "ROCm Application for Reporting System Info";
homepage = "https://github.com/ROCm/rocminfo";
license = licenses.ncsa;
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,165 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocminfo,
clr,
git,
libxml2,
libedit,
zstd,
zlib,
ncurses,
python3Packages,
buildRockCompiler ? false,
buildTests ? false, # `argument of type 'NoneType' is not iterable`
}:
# FIXME: rocmlir has an entire separate LLVM build in a subdirectory this is silly
# It seems to be forked from AMD's own LLVM
# If possible reusing the rocmPackages.llvm build would be better
# Would have to confirm it is compatible with ROCm's tagged LLVM.
# Fairly likely it's not given AMD's track record with forking their own software in incompatible ways
# in subdirs
# Theoretically, we could have our MLIR have an output
# with the source and built objects so that we can just
# use it as the external LLVM repo for this
let
suffix = if buildRockCompiler then "-rock" else "";
llvmNativeTarget =
if stdenv.hostPlatform.isx86_64 then
"X86"
else if stdenv.hostPlatform.isAarch64 then
"AArch64"
else
throw "Unsupported ROCm LLVM platform";
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocmlir${suffix}";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals (!buildRockCompiler) [
"external"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocMLIR";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-p/gvr1Z6yZtO5N+ecSouXiCrf520jt1HMOy/tohUHfI=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
python3Packages.python
python3Packages.tomli
];
buildInputs = [
git
libxml2
libedit
];
propagatedBuildInputs = [
zstd
zlib
ncurses
];
patches = [
./initparamdata-sort-const.patch
];
cmakeFlags =
[
"-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_USE_LINKER=lld"
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
"-DLLVM_ENABLE_LIBCXX=ON"
"-DLLVM_ENABLE_TERMINFO=ON"
"-DROCM_PATH=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
(lib.cmakeBool "BUILD_FAT_LIBROCKCOMPILER" buildRockCompiler)
]
++ lib.optionals (!buildRockCompiler) [
"-DROCM_TEST_CHIPSET=gfx000"
];
postPatch = ''
patchShebangs mlir
patchShebangs external/llvm-project/mlir/lib/Dialect/GPU/AmdDeviceLibsIncGen.py
# Fixes mlir/lib/Analysis/BufferDependencyAnalysis.cpp:41:19: error: redefinition of 'read'
substituteInPlace mlir/lib/Analysis/BufferDependencyAnalysis.cpp \
--replace-fail "enum EffectType { read, write, unknown };" "enum class EffectType { read, write, unknown };"
# remove when no longer required
substituteInPlace mlir/test/{e2e/generateE2ETest.py,fusion/e2e/generate-fusion-tests.py} \
--replace-fail "\"/opt/rocm/bin" "\"${rocminfo}/bin"
substituteInPlace mlir/utils/performance/common/CMakeLists.txt \
--replace-fail "/opt/rocm" "${clr}"
'';
dontBuild = true;
doCheck = true;
# Certain libs aren't being generated, try enabling tests next update
checkTarget =
if buildRockCompiler then
"librockCompiler"
else if buildTests then
"check-rocmlir"
else
"check-rocmlir-build-only";
postInstall =
let
libPath = lib.makeLibraryPath [
zstd
zlib
ncurses
clr
stdenv.cc.cc
];
in
lib.optionals (!buildRockCompiler) ''
mkdir -p $external/lib
cp -a external/llvm-project/llvm/lib/{*.a*,*.so*} $external/lib
patchelf --set-rpath $external/lib:$out/lib:${libPath} $external/lib/*.so*
patchelf --set-rpath $out/lib:$external/lib:${libPath} $out/{bin/*,lib/*.so*}
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
page = "tags?per_page=4";
};
meta = with lib; {
description = "MLIR-based convolution and GEMM kernel generator";
homepage = "https://github.com/ROCm/rocMLIR";
license = with licenses; [ asl20 ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,13 @@
diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h b/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h
index 3f5ee596819a..590d53788822 100644
--- a/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h
+++ b/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h
@@ -209,7 +209,7 @@ private:
size_t original_pos;
int64_t padding_amount;
- bool operator<(const InitParamData &rhs) {
+ bool operator<(const InitParamData &rhs) const {
if (this->padding_amount < rhs.padding_amount) {
return true;
} else if (this->padding_amount == rhs.padding_amount) {

View File

@@ -0,0 +1,98 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
gtest,
gbenchmark,
buildTests ? false,
buildBenchmarks ? false,
gpuTargets ? [ ],
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocprim";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocPRIM";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-EP61n4syYMWjTDkjC0dWLj9yau6KL2qu1Bs5IBtw580=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
];
buildInputs =
lib.optionals buildTests [
gtest
]
++ lib.optionals buildBenchmarks [
gbenchmark
];
cmakeFlags =
[
"-DCMAKE_BUILD_TYPE=Release"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_TEST=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARK=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
mv $out/bin/rocprim $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/benchmark_* $benchmark/bin
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "ROCm parallel primitives";
homepage = "https://github.com/ROCm/rocPRIM";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,79 @@
{
lib,
stdenv,
rocm-runtime,
rocprofiler,
numactl,
libpciaccess,
libxml2,
elfutils,
fetchFromGitHub,
rocmUpdateScript,
cmake,
clang,
clr,
python3Packages,
gpuTargets ? clr.gpuTargets,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocprofiler-register";
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocprofiler-register";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-HaN4XMHuCRDfKOpfuZ2SkOEQfAZKouh6luqbtATUYm0=";
fetchSubmodules = true;
};
nativeBuildInputs = [
cmake
clang
clr
];
buildInputs = [
numactl
libpciaccess
libxml2
elfutils
rocm-runtime
rocprofiler.rocmtoolkit-merged
python3Packages.lxml
python3Packages.cppheaderparser
python3Packages.pyyaml
python3Packages.barectf
python3Packages.pandas
];
cmakeFlags = [
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
"-DHIP_ROOT_DIR=${clr}"
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
"-DBUILD_TEST=OFF"
"-DROCPROFILER_BUILD_TESTS=0"
"-DROCPROFILER_BUILD_SAMPLES=0"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
passthru.updateScript = rocmUpdateScript {
name = "rocprofiler-register";
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Profiling with perf-counters and derived metrics";
homepage = "https://github.com/ROCm/rocprofiler";
license = with licenses; [ mit ]; # mitx11
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,15 @@
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 46efbd5..ca2cc3b 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -127,10 +127,6 @@ function(generate_hsaco TARGET_ID INPUT_FILE OUTPUT_FILE)
DEPENDS ${INPUT_FILE} clang
COMMENT "Building ${OUTPUT_FILE}..."
VERBATIM)
- install(
- FILES ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
- DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests-v1
- COMPONENT tests)
set(HSACO_TARGET_LIST
${HSACO_TARGET_LIST} ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
PARENT_SCOPE)

View File

@@ -0,0 +1,142 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
symlinkJoin,
cmake,
clang,
clr,
rocm-core,
rocm-runtime,
rocm-device-libs,
roctracer,
rocdbgapi,
numactl,
libpciaccess,
libxml2,
elfutils,
mpi,
systemd,
gtest,
git,
python3Packages,
gpuTargets ? clr.gpuTargets,
}:
let
rocmtoolkit-merged = symlinkJoin {
name = "rocmtoolkit-merged";
paths = [
rocm-core
rocm-runtime
rocm-device-libs
roctracer
rocdbgapi
clr
];
postBuild = ''
rm -rf $out/nix-support
'';
};
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocprofiler";
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocprofiler";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-CgW8foM4W3K19kUK/l8IsH2Q9DHi/z88viXTxhNqlHQ=";
fetchSubmodules = true;
};
patches = [
# These just simply won't build
./0000-dont-install-tests-hsaco.patch
./optional-aql-in-cmake.patch
];
nativeBuildInputs = [
cmake
clang
clr
git
python3Packages.lxml
python3Packages.cppheaderparser
python3Packages.pyyaml
python3Packages.barectf
python3Packages.pandas
];
buildInputs = [
numactl
libpciaccess
libxml2
elfutils
mpi
systemd
gtest
];
propagatedBuildInputs = [ rocmtoolkit-merged ];
# HACK: allow building without aqlprofile, probably explodes at runtime if use profiling
env.LDFLAGS = "-z nodefs -Wl,-undefined,dynamic_lookup,--unresolved-symbols=ignore-all";
#HACK: rocprofiler's cmake doesn't add these deps properly
env.CXXFLAGS = "-I${libpciaccess}/include -I${numactl.dev}/include -I${rocmtoolkit-merged}/include -I${elfutils.dev}/include -w";
cmakeFlags = [
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
"-DHIP_ROOT_DIR=${clr}"
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DBUILD_TEST=OFF"
"-DROCPROFILER_BUILD_TESTS=0"
"-DROCPROFILER_BUILD_SAMPLES=0"
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
postPatch = ''
patchShebangs .
substituteInPlace cmake_modules/rocprofiler_utils.cmake \
--replace-fail 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE)' 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE)
return()'
substituteInPlace CMakeLists.txt \
--replace-fail 'set(ROCPROFILER_BUILD_TESTS ON)' ""
substituteInPlace tests-v2/featuretests/profiler/CMakeLists.txt \
--replace "--build-id=sha1" "--build-id=sha1 --rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode"
substituteInPlace test/CMakeLists.txt \
--replace "\''${ROCM_ROOT_DIR}/amdgcn/bitcode" "${rocm-device-libs}/amdgcn/bitcode"
'';
postInstall = ''
# Why do these have the executable bit set?
chmod -x $out/libexec/rocprofiler/counters/*.xml
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
passthru.rocmtoolkit-merged = rocmtoolkit-merged;
meta = with lib; {
description = "Profiling with perf-counters and derived metrics";
homepage = "https://github.com/ROCm/rocprofiler";
license = with licenses; [ mit ]; # mitx11
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,147 @@
From https://raw.githubusercontent.com/AphidGit/rocm_compile/refs/heads/main/rocprofiler.patch
diff --git a/cmake_modules/rocprofiler_env.cmake b/cmake_modules/rocprofiler_env.cmake
index 7b7c472..0aba3ed 100644
--- a/cmake_modules/rocprofiler_env.cmake
+++ b/cmake_modules/rocprofiler_env.cmake
@@ -36,6 +36,7 @@ if(ROCPROFILER_DEBUG_TRACE)
target_compile_definitions(rocprofiler-build-flags INTERFACE DEBUG_TRACE=1)
endif()
+set(ROCPROFILER_LD_AQLPROFILE false)
# Enable direct loading of AQL-profile HSA extension
if(ROCPROFILER_LD_AQLPROFILE)
target_compile_definitions(rocprofiler-build-flags INTERFACE ROCP_LD_AQLPROFILE=1)
@@ -80,9 +81,3 @@ if("${ROCM_ROOT_DIR}" STREQUAL "")
message(FATAL_ERROR "ROCM_ROOT_DIR is not found.")
endif()
-find_library(
- HSA_AMD_AQLPROFILE_LIBRARY
- NAMES hsa-amd-aqlprofile64
- HINTS ${CMAKE_PREFIX_PATH}
- PATHS ${ROCM_ROOT_DIR}
- PATH_SUFFIXES lib REQUIRED)
diff --git a/src/api/CMakeLists.txt b/src/api/CMakeLists.txt
index 61782f0..16c83bf 100644
--- a/src/api/CMakeLists.txt
+++ b/src/api/CMakeLists.txt
@@ -51,15 +51,6 @@ find_file(
NO_DEFAULT_PATH REQUIRED)
get_filename_component(HSA_RUNTIME_INC_PATH ${HSA_H} DIRECTORY)
-find_library(
- AQLPROFILE_LIB "libhsa-amd-aqlprofile64.so"
- HINTS ${CMAKE_PREFIX_PATH}
- PATHS ${ROCM_PATH}
- PATH_SUFFIXES lib)
-
-if(NOT AQLPROFILE_LIB)
- message(FATAL_ERROR "AQL_PROFILE not installed. Please install hsa-amd-aqlprofile!")
-endif()
# ########################################################################################
# Adding Old Library Files
@@ -247,7 +238,7 @@ target_include_directories(
PUBLIC $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include/rocprofiler>
PRIVATE ${LIB_DIR} ${ROOT_DIR} ${PROJECT_SOURCE_DIR}/include/rocprofiler)
target_link_libraries(
- ${ROCPROFILER_TARGET} PRIVATE ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 c stdc++
+ ${ROCPROFILER_TARGET} PRIVATE hsa-runtime64::hsa-runtime64 c stdc++
dl rocprofiler::build-flags rocprofiler::memcheck)
get_target_property(ROCPROFILER_LIBRARY_V1_NAME ${ROCPROFILER_TARGET} NAME)
@@ -325,8 +316,7 @@ target_link_options(
-Wl,--no-undefined)
target_link_libraries(
rocprofiler-v2
- PRIVATE ${AQLPROFILE_LIB}
- hsa-runtime64::hsa-runtime64
+ PRIVATE hsa-runtime64::hsa-runtime64
Threads::Threads
atomic
numa
diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp
index 2c47186..6b39634 100644
--- a/src/util/hsa_rsrc_factory.cpp
+++ b/src/util/hsa_rsrc_factory.cpp
@@ -155,17 +155,6 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
if (kern_arg_pool_ == nullptr)
CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR);
- // Get AqlProfile API table
- aqlprofile_api_ = {};
-#ifdef ROCP_LD_AQLPROFILE
- status = LoadAqlProfileLib(&aqlprofile_api_);
-#else
- status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE,
- hsa_ven_amd_aqlprofile_VERSION_MAJOR,
- sizeof(aqlprofile_api_), &aqlprofile_api_);
-#endif
- CHECK_STATUS("aqlprofile API table load failed", status);
-
// Get Loader API table
loader_api_ = {};
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1,
diff --git a/test/util/hsa_rsrc_factory.cpp b/test/util/hsa_rsrc_factory.cpp
index 0a44d18..fab5b75 100644
--- a/test/util/hsa_rsrc_factory.cpp
+++ b/test/util/hsa_rsrc_factory.cpp
@@ -137,17 +137,6 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR);
if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR);
- // Get AqlProfile API table
- aqlprofile_api_ = {0};
-#ifdef ROCP_LD_AQLPROFILE
- status = LoadAqlProfileLib(&aqlprofile_api_);
-#else
- status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE,
- hsa_ven_amd_aqlprofile_VERSION_MAJOR,
- sizeof(aqlprofile_api_), &aqlprofile_api_);
-#endif
- CHECK_STATUS("aqlprofile API table load failed", status);
-
// Get Loader API table
loader_api_ = {0};
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1,
diff --git a/tests-v2/unittests/core/CMakeLists.txt b/tests-v2/unittests/core/CMakeLists.txt
index 107cb51..0f6d4bf 100644
--- a/tests-v2/unittests/core/CMakeLists.txt
+++ b/tests-v2/unittests/core/CMakeLists.txt
@@ -235,8 +235,7 @@ set_target_properties(runCoreUnitTests PROPERTIES
INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests")
target_link_libraries(
runCoreUnitTests
- PRIVATE ${AQLPROFILE_LIB}
- test_hsatool_library
+ PRIVATE test_hsatool_library
hsa-runtime64::hsa-runtime64
Threads::Threads
GTest::gtest GTest::gtest_main
@@ -285,4 +284,4 @@ endif()
# for the *_FilePlugin tests
if(NOT EXISTS "${PROJECT_BINARY_DIR}/test-output")
file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/test-output")
-endif()
\ No newline at end of file
+endif()
diff --git a/tests-v2/unittests/profiler/CMakeLists.txt b/tests-v2/unittests/profiler/CMakeLists.txt
index 53180d5..0c4d4a7 100644
--- a/tests-v2/unittests/profiler/CMakeLists.txt
+++ b/tests-v2/unittests/profiler/CMakeLists.txt
@@ -122,7 +122,7 @@ target_compile_definitions(
PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
target_link_libraries(
- runUnitTests PRIVATE rocprofiler-v2 ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64
+ runUnitTests PRIVATE rocprofiler-v2 hsa-runtime64::hsa-runtime64
GTest::gtest GTest::gtest_main stdc++fs ${PCIACCESS_LIBRARIES} dw elf c dl)
add_dependencies(tests runUnitTests)
@@ -158,4 +158,4 @@ endif()
# for the *_FilePlugin tests
if(NOT EXISTS "${PROJECT_BINARY_DIR}/test-output")
file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/test-output")
-endif()
\ No newline at end of file
+endif()

View File

@@ -0,0 +1,59 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
clr,
git,
rocdbgapi,
elfutils,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocr-debug-agent";
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocr_debug_agent";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-otoxZ2NHkPDIFhvn4/nvaQ/W4LF38Nx9MZ9IYEf1DyY=";
};
nativeBuildInputs = [
cmake
clr
git
];
buildInputs = [
rocdbgapi
elfutils
];
cmakeFlags = [
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
"-DHIP_ROOT_DIR=${clr}"
"-DHIP_PATH=${clr}"
];
# Weird install target
postInstall = ''
rm -rf $out/src
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Library that provides some debugging functionality for ROCr";
homepage = "https://github.com/ROCm/rocr_debug_agent";
license = with licenses; [ ncsa ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,99 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
gtest,
gbenchmark,
buildTests ? false,
buildBenchmarks ? false,
gpuTargets ? clr.localGpuTargets or [ ],
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocrand${clr.gpuArchSuffix}";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocRAND";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-XqGPXx+LqjJs602vtG5u578B1hitGlsopA0izbClHro=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
];
buildInputs =
lib.optionals buildTests [
gtest
]
++ lib.optionals buildBenchmarks [
gbenchmark
];
cmakeFlags =
[
"-DHIP_ROOT_DIR=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_TEST=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARK=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/benchmark_* $benchmark/bin
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rm -r $out/bin/rocRAND
# Fail if bin/ isn't actually empty
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Generate pseudo-random and quasi-random numbers";
homepage = "https://github.com/ROCm/rocRAND";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,135 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocblas,
rocprim,
rocsparse,
clr,
fmt,
gtest,
gfortran,
lapack-reference,
buildTests ? false,
buildBenchmarks ? false,
gpuTargets ? (
clr.localGpuTargets or [
"gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
"gfx1010"
"gfx1030"
"gfx1100"
"gfx1101"
"gfx1102"
"gfx1201"
]
),
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocsolver${clr.gpuArchSuffix}";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocSOLVER";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-u5GRWetMnhEBJ9HZcXoEaqpdO8f0cuSPnq+XawljfUs=";
};
nativeBuildInputs =
[
cmake
# no ninja, it buffers console output and nix times out long periods of no output
rocm-cmake
clr
]
++ lib.optionals (buildTests || buildBenchmarks) [
gfortran
];
buildInputs =
[
# FIXME: rocblas and rocsolver can't build in parallel
# but rocsolver doesn't need rocblas' offload builds at build time
# could we build against a rocblas-minimal?
rocblas
rocprim
rocsparse
fmt
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals (buildTests || buildBenchmarks) [
lapack-reference
];
cmakeFlags =
[
"-DHIP_CLANG_NUM_PARALLEL_JOBS=4"
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_VERBOSE_MAKEFILE=ON"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_BENCHMARKS=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/rocsolver-test $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/rocsolver-bench $benchmark/bin
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = "rocsolver";
inherit (finalAttrs.src) owner repo;
};
requiredSystemFeatures = [ "big-parallel" ];
meta = with lib; {
description = "ROCm LAPACK implementation";
homepage = "https://github.com/ROCm/rocSOLVER";
license = with licenses; [ bsd2 ];
teams = [ teams.rocm ];
platforms = platforms.linux;
timeout = 14400; # 4 hours
maxSilent = 14400; # 4 hours
};
})

View File

@@ -0,0 +1,161 @@
{
lib,
stdenv,
fetchFromGitHub,
fetchzip,
rocmUpdateScript,
cmake,
rocm-cmake,
rocprim,
clr,
gfortran,
git,
gtest,
boost,
python3Packages,
buildTests ? false,
buildBenchmarks ? false, # Seems to depend on tests
gpuTargets ? clr.localGpuTargets or clr.gpuTargets,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocsparse${clr.gpuArchSuffix}";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals (buildTests || buildBenchmarks) [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocSPARSE";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-l7rOuVthfrSO5bnhgm49cjPnRbV/2sFhSRT+mShkBek=";
};
nativeBuildInputs = [
cmake
# no ninja, it buffers console output and nix times out long periods of no output
rocm-cmake
clr
gfortran
];
buildInputs =
[
rocprim
git
]
++ lib.optionals (buildTests || buildBenchmarks) [
gtest
boost
python3Packages.python
python3Packages.pyyaml
];
cmakeFlags =
[
"-DCMAKE_BUILD_TYPE=Release"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals (buildTests || buildBenchmarks) [
"-DBUILD_CLIENTS_TESTS=ON"
"-DCMAKE_MATRICES_DIR=/build/source/matrices"
"-Dpython=python3"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_BENCHMARKS=ON"
];
# We have to manually generate the matrices
postPatch = lib.optionalString (buildTests || buildBenchmarks) ''
mkdir -p matrices
ln -s ${finalAttrs.passthru.matrices.matrix-01}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-02}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-03}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-04}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-05}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-06}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-07}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-08}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-09}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-10}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-11}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-12}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-13}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-14}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-15}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-16}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-17}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-18}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-19}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-20}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-21}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-22}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-23}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-24}/*.mtx matrices
# Not used by the original cmake, causes an error
rm matrices/*_b.mtx
echo "deps/convert.cpp -> deps/mtx2csr"
hipcc deps/convert.cpp -O3 -o deps/mtx2csr
for mat in $(ls -1 matrices | cut -d "." -f 1); do
echo "mtx2csr: $mat.mtx -> $mat.csr"
deps/mtx2csr matrices/$mat.mtx matrices/$mat.csr
unlink matrices/$mat.mtx
done
'';
postInstall =
lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
cp -a $out/bin/* $benchmark/bin
rm $benchmark/bin/rocsparse-test
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
mkdir -p $test/bin
mv $out/bin/* $test/bin
rm $test/bin/rocsparse-bench || true
mv /build/source/matrices $test
rmdir $out/bin
'';
passthru = {
matrices = import ./deps.nix {
inherit fetchzip;
mirror1 = "https://sparse.tamu.edu/MM";
mirror2 = "https://www.cise.ufl.edu/research/sparse/MM";
};
updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
};
meta = with lib; {
description = "ROCm SPARSE implementation";
homepage = "https://github.com/ROCm/rocSPARSE";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,223 @@
{
fetchzip,
mirror1,
mirror2,
}:
{
matrix-01 = fetchzip {
sha256 = "sha256-AHur5ZIDZTFRrO2GV0ieXrffq4KUiGWiZ59pv0fUtEQ=";
urls = [
"${mirror1}/SNAP/amazon0312.tar.gz"
"${mirror2}/SNAP/amazon0312.tar.gz"
];
};
matrix-02 = fetchzip {
sha256 = "sha256-0rSxaN4lQcdaCLsvlgicG70FXUxXeERPiEmQ4MzbRdE=";
urls = [
"${mirror1}/Muite/Chebyshev4.tar.gz"
"${mirror2}/Muite/Chebyshev4.tar.gz"
];
};
matrix-03 = fetchzip {
sha256 = "sha256-hDzDWDUnHEyFedX/tMNq83ZH8uWyM4xtZYUUAD3rizo=";
urls = [
"${mirror1}/FEMLAB/sme3Dc.tar.gz"
"${mirror2}/FEMLAB/sme3Dc.tar.gz"
];
};
matrix-04 = fetchzip {
sha256 = "sha256-GmN2yOt/MoX01rKe05aTyB3ypUP4YbQGOITZ0BqPmC0=";
urls = [
"${mirror1}/Williams/webbase-1M.tar.gz"
"${mirror2}/Williams/webbase-1M.tar.gz"
];
};
matrix-05 = fetchzip {
sha256 = "sha256-gQNjfVyWzNM9RwImJGhkhahRmZz74LzDs1oijL7mI7k=";
urls = [
"${mirror1}/Williams/mac_econ_fwd500.tar.gz"
"${mirror2}/Williams/mac_econ_fwd500.tar.gz"
];
};
matrix-06 = fetchzip {
sha256 = "sha256-87cdZjntNcTuz5BtO59irhcuRbPllWSbhCEX3Td02qc=";
urls = [
"${mirror1}/Williams/mc2depi.tar.gz"
"${mirror2}/Williams/mc2depi.tar.gz"
];
};
matrix-07 = fetchzip {
sha256 = "sha256-WRamuJX3D8Tm+k0q67RjUDG3DeNAxhKiaPkk5afY5eU=";
urls = [
"${mirror1}/Bova/rma10.tar.gz"
"${mirror2}/Bova/rma10.tar.gz"
];
};
matrix-08 = fetchzip {
sha256 = "sha256-5dhkm293Mc3lzakKxHy5W5XIn4Rw+gihVh7gyrjEHXo=";
urls = [
"${mirror1}/JGD_BIBD/bibd_22_8.tar.gz"
"${mirror2}/JGD_BIBD/bibd_22_8.tar.gz"
];
};
matrix-09 = fetchzip {
sha256 = "sha256-czjLWCjXAjZCk5TGYHaEkwSAzQu3TQ3QyB6eNKR4G88=";
urls = [
"${mirror1}/Hamm/scircuit.tar.gz"
"${mirror2}/Hamm/scircuit.tar.gz"
];
};
matrix-10 = fetchzip {
sha256 = "sha256-bYuLnJViAIcIejAkh69/bsNAVIDU4wfTLtD+nmHd6FM=";
urls = [
"${mirror1}/Sandia/ASIC_320k.tar.gz"
"${mirror2}/Sandia/ASIC_320k.tar.gz"
];
};
matrix-11 = fetchzip {
sha256 = "sha256-aDwn8P1khYjo2Agbq5m9ZBInJUxf/knJNvyptt0fak0=";
urls = [
"${mirror1}/GHS_psdef/bmwcra_1.tar.gz"
"${mirror2}/GHS_psdef/bmwcra_1.tar.gz"
];
};
matrix-12 = fetchzip {
sha256 = "sha256-8OJqA/byhlAZd869TPUzZFdsOiwOoRGfKyhM+RMjXoY=";
urls = [
"${mirror1}/HB/nos1.tar.gz"
"${mirror2}/HB/nos1.tar.gz"
];
};
matrix-13 = fetchzip {
sha256 = "sha256-FS0rKqmg+uHwsM/yGfQLBdd7LH/rUrdutkNGBD/Mh1I=";
urls = [
"${mirror1}/HB/nos2.tar.gz"
"${mirror2}/HB/nos2.tar.gz"
];
};
matrix-14 = fetchzip {
sha256 = "sha256-DANnlrNJikrI7Pst9vRedtbuxepyHmCIu2yhltc4Qcs=";
urls = [
"${mirror1}/HB/nos3.tar.gz"
"${mirror2}/HB/nos3.tar.gz"
];
};
matrix-15 = fetchzip {
sha256 = "sha256-21mUgqjWGUfYgiWwSrKh9vH8Vdt3xzcefmqYNYRpxiY=";
urls = [
"${mirror1}/HB/nos4.tar.gz"
"${mirror2}/HB/nos4.tar.gz"
];
};
matrix-16 = fetchzip {
sha256 = "sha256-FOuXvGqBBFNkVS6cexmkluret54hCfCOdK+DOZllE4c=";
urls = [
"${mirror1}/HB/nos5.tar.gz"
"${mirror2}/HB/nos5.tar.gz"
];
};
matrix-17 = fetchzip {
sha256 = "sha256-+7NI1rA/qQxYPpjXKHvAaCZ+LSaAJ4xuJvMRMBEUYxg=";
urls = [
"${mirror1}/HB/nos6.tar.gz"
"${mirror2}/HB/nos6.tar.gz"
];
};
matrix-18 = fetchzip {
sha256 = "sha256-q3NxJjbwGGcFiQ9nhWfUKgZmdVwCfPmgQoqy0AqOsNc=";
urls = [
"${mirror1}/HB/nos7.tar.gz"
"${mirror2}/HB/nos7.tar.gz"
];
};
matrix-19 = fetchzip {
sha256 = "sha256-0GAN6qmVfD+tprIigzuUUUwm5KVhkN9X65wMEvFltDY=";
urls = [
"${mirror1}/DNVS/shipsec1.tar.gz"
"${mirror2}/DNVS/shipsec1.tar.gz"
];
};
matrix-20 = fetchzip {
sha256 = "sha256-f28Du/Urxsiq5NkRmRO10Zz9vvGRjEchquzHzbZpZ7U=";
urls = [
"${mirror1}/Cote/mplate.tar.gz"
"${mirror2}/Cote/mplate.tar.gz"
];
};
matrix-21 = fetchzip {
sha256 = "sha256-O+Wy0NfCU1hVUOfNR1dJpvDHLBwwa301IRJDrQJnhak=";
urls = [
"${mirror1}/Bai/qc2534.tar.gz"
"${mirror2}/Bai/qc2534.tar.gz"
];
};
matrix-22 = fetchzip {
sha256 = "sha256-oxMnt8U5Cf1ILWcBdU6W9jdSMMm+U6bIVl8nm3n3+OA=";
urls = [
"${mirror1}/Chevron/Chevron2.tar.gz"
"${mirror2}/Chevron/Chevron2.tar.gz"
];
};
matrix-23 = fetchzip {
sha256 = "sha256-MFD9BxFI/3IS7yatW121BAI04fbqrXpgYDT5UKjeKcU=";
urls = [
"${mirror1}/Chevron/Chevron3.tar.gz"
"${mirror2}/Chevron/Chevron3.tar.gz"
];
};
matrix-24 = fetchzip {
sha256 = "sha256-ikS8O51pe1nt3BNyhvfvqCbVL0+bg/da9bqGqeBDkTg=";
urls = [
"${mirror1}/Chevron/Chevron4.tar.gz"
"${mirror2}/Chevron/Chevron4.tar.gz"
];
};
}

View File

@@ -0,0 +1,94 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocprim,
clr,
gtest,
buildTests ? false,
buildBenchmarks ? false,
gpuTargets ? [ ],
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocthrust";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocThrust";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-bHyqG0pSt6bc6cDMnd1uY+0o+V3cxdp0mUEzWYRdd20=";
};
nativeBuildInputs = [
cmake
rocm-cmake
rocprim
clr
];
buildInputs = lib.optionals buildTests [
gtest
];
cmakeFlags =
[
"-DHIP_ROOT_DIR=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_TEST=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARKS=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/{test_*,*.hip} $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/benchmark_* $benchmark/bin
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rm -rf $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "ROCm parallel algorithm library";
homepage = "https://github.com/ROCm/rocThrust";
license = with licenses; [ asl20 ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,120 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
clr,
rocm-device-libs,
libxml2,
doxygen,
graphviz,
gcc-unwrapped,
libbacktrace,
rocm-runtime,
python3Packages,
buildDocs ? false, # Nothing seems to be generated, so not making the output
buildTests ? false,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "roctracer";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals buildDocs [
"doc"
]
++ lib.optionals buildTests [
"test"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "roctracer";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-Dwk5cBZLysmsVA2kwpQM0FQt2KXOGcaZcAw/d8VUaXw=";
};
nativeBuildInputs =
[
cmake
clr
]
++ lib.optionals buildDocs [
doxygen
graphviz
];
buildInputs = [
libxml2
libbacktrace
python3Packages.python
python3Packages.cppheaderparser
];
cmakeFlags = [
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
env.NIX_CFLAGS_COMPILE = toString [
# Needed with GCC 12
"-Wno-error=array-bounds"
];
postPatch =
''
export HIP_DEVICE_LIB_PATH=${rocm-device-libs}/amdgcn/bitcode
''
+ lib.optionalString (!buildTests) ''
substituteInPlace CMakeLists.txt \
--replace "add_subdirectory(test)" ""
'';
# Tests always fail, probably need GPU
# doCheck = buildTests;
postInstall =
lib.optionalString buildDocs ''
mkdir -p $doc
''
+ lib.optionalString buildTests ''
mkdir -p $test/bin
# Not sure why this is an install target
find $out/test -executable -type f -exec mv {} $test/bin \;
rm $test/bin/{*.sh,*.py}
patchelf --set-rpath $out/lib:${
lib.makeLibraryPath (
finalAttrs.buildInputs
++ [
clr
gcc-unwrapped.lib
rocm-runtime
]
)
} $test/bin/*
rm -rf $out/test
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Tracer callback/activity library";
homepage = "https://github.com/ROCm/roctracer";
license = with licenses; [ mit ]; # mitx11
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,35 @@
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 0d00883..86ce282 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -30,30 +30,6 @@ cmake_dependent_option( ROCWMMA_BUILD_VALIDATION_TESTS "Build validation tests"
cmake_dependent_option( ROCWMMA_BUILD_BENCHMARK_TESTS "Build benchmarking tests" OFF "ROCWMMA_BUILD_TESTS" OFF )
cmake_dependent_option( ROCWMMA_BUILD_EXTENDED_TESTS "Build extended test parameter coverage" OFF "ROCWMMA_BUILD_TESTS" OFF )
-# Test/benchmark requires additional dependencies
-include( FetchContent )
-
-FetchContent_Declare(
- googletest
- GIT_REPOSITORY https://github.com/google/googletest.git
- GIT_TAG release-1.12.1
-)
-FetchContent_GetProperties(googletest)
-if(NOT googletest_POPULATED)
-
- # Fetch the content using default details
- FetchContent_Populate(googletest)
- # Save the shared libs setting, then force to static libs
- set(BUILD_SHARED_LIBS_OLD ${BUILD_SHARED_LIBS})
- set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "Build SHARED libraries" FORCE)
-
- # Add gtest targets as static libs
- add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR})
-
- # Restore shared libs setting
- set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_OLD} CACHE INTERNAL "Build SHARED libraries" FORCE)
-endif()
-
set(ROCWMMA_TEST_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR})
set(ROCWMMA_COMMON_TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/hip_device.cpp
${CMAKE_CURRENT_SOURCE_DIR}/rocwmma_gtest_main.cpp)

View File

@@ -0,0 +1,121 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocm-smi,
clr,
openmp,
gtest,
rocblas,
buildTests ? false, # Will likely fail building because wavefront shifts are not supported for certain archs
buildExtendedTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
gpuTargets ? [ ],
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocwmma";
version = "6.4.1";
outputs =
[
"out"
]
++ lib.optionals (buildTests || buildBenchmarks) [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
]
++ lib.optionals buildSamples [
"sample"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocWMMA";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-fjyxMrzt74rE7Gf4v4WawYltuw1fvahwZUpauMIE3qc=";
};
patches = lib.optionals (buildTests || buildBenchmarks) [
./0000-dont-fetch-googletest.patch
];
nativeBuildInputs = [
cmake
rocm-cmake
clr
];
buildInputs =
[
openmp
]
++ lib.optionals (buildTests || buildBenchmarks) [
rocm-smi
gtest
rocblas
];
cmakeFlags =
[
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
"-DROCWMMA_BUILD_TESTS=${if buildTests || buildBenchmarks then "ON" else "OFF"}"
"-DROCWMMA_BUILD_SAMPLES=${if buildSamples then "ON" else "OFF"}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildExtendedTests [
"-DROCWMMA_BUILD_EXTENDED_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DROCWMMA_BUILD_BENCHMARK_TESTS=ON"
"-DROCWMMA_BENCHMARK_WITH_ROCBLAS=ON"
];
postInstall =
lib.optionalString (buildTests || buildBenchmarks) ''
mkdir -p $test/bin
mv $out/bin/{*_test,*-validate} $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/*-bench $benchmark/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv $out/bin/sgemmv $sample/bin
mv $out/bin/simple_gemm $sample/bin
mv $out/bin/simple_dlrm $sample/bin
''
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
rm -rf $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Mixed precision matrix multiplication and accumulation";
homepage = "https://github.com/ROCm/rocWMMA";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,104 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocm-docs-core,
half,
clr,
openmp,
boost,
python3Packages,
buildDocs ? false, # Needs internet
useOpenCL ? false,
useCPU ? false,
gpuTargets ? [ ],
}:
stdenv.mkDerivation (finalAttrs: {
pname =
"rpp-"
+ (
if (!useOpenCL && !useCPU) then
"hip"
else if (!useOpenCL && !useCPU) then
"opencl"
else
"cpu"
);
version = "6.4.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rpp";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-rccVjSrOVIe4ZDtloCoCCI3u9UIcUqdirHIzS7ffAas=";
};
nativeBuildInputs =
[
cmake
rocm-cmake
clr
]
++ lib.optionals buildDocs [
rocm-docs-core
python3Packages.python
];
buildInputs = [
half
openmp
boost
];
CFLAGS = "-I${openmp.dev}/include";
CXXFLAGS = "-I${openmp.dev}/include";
cmakeFlags =
[
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
"-DROCM_PATH=${clr}"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals (!useOpenCL && !useCPU) [
"-DBACKEND=HIP"
]
++ lib.optionals (useOpenCL && !useCPU) [
"-DBACKEND=OCL"
]
++ lib.optionals useCPU [
"-DBACKEND=CPU"
];
postPatch = lib.optionalString (!useOpenCL && !useCPU) ''
# Bad path
substituteInPlace CMakeLists.txt \
--replace "COMPILER_FOR_HIP \''${ROCM_PATH}/llvm/bin/clang++" "COMPILER_FOR_HIP ${clr}/bin/hipcc"
'';
postBuild = lib.optionalString buildDocs ''
python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Comprehensive high-performance computer vision library for AMD processors";
homepage = "https://github.com/ROCm/rpp";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
broken = useOpenCL;
};
})

View File

@@ -0,0 +1,115 @@
{
lib,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
buildPythonPackage,
pytestCheckHook,
setuptools,
pyyaml,
msgpack,
simplejson,
ujson,
distro,
orjson,
pandas,
joblib,
filelock,
clr,
rich,
isTensileLite ? false,
}:
buildPythonPackage rec {
pname = if isTensileLite then "tensilelite" else "tensile";
# Using a specific commit which has code object compression support from after the 6.3 release
# Without compression packages are too large for hydra
version = "6.4.1";
format = "pyproject";
src = fetchFromGitHub {
owner = "ROCm";
repo = "Tensile";
rev = "1752af518190500891a865379a4569b8abf6ba01";
hash = "sha256-Wvz4PVs//3Ox7ykZHpjPzOVwlyATyc+MmVVenfTzWK4=";
};
# TODO: It should be possible to run asm caps test ONCE for all supported arches
# We currently disable the test because it's slow and runs each time tensile launches
postPatch =
lib.optionalString (!isTensileLite) ''
if grep -F .SafeLoader Tensile/LibraryIO.py; then
substituteInPlace Tensile/LibraryIO.py \
--replace-fail "yaml.SafeLoader" "yaml.CSafeLoader"
fi
# See TODO above about asm caps test
substituteInPlace Tensile/Common.py \
--replace-fail 'if globalParameters["AssemblerPath"] is not None:' "if False:"
''
+ ''
# Add an assert that the fallback 9,0,0 is supported before setting the kernel to it
# If it's not detected as supported we have an issue with compiler paths or the compiler is broken
# and it's better to stop immediately
substituteInPlace Tensile/KernelWriter.py \
--replace-fail '= (9,0,0)' '= (9,0,0);assert(globalParameters["AsmCaps"][(9,0,0)]["SupportedISA"])'
find . -type f -iname "*.sh" -exec chmod +x {} \;
patchShebangs Tensile
'';
buildInputs = [ setuptools ];
propagatedBuildInputs =
[
pyyaml
msgpack
pandas
joblib
distro
]
++ lib.optionals (!isTensileLite) [
rich
]
++ lib.optionals isTensileLite [
simplejson
ujson
orjson
];
patches =
lib.optional (!isTensileLite) ./tensile-solutionstructs-perf-fix.diff
++ lib.optional (!isTensileLite) ./tensile-create-library-dont-copy-twice.diff
++ lib.optional (!isTensileLite) (fetchpatch {
# [PATCH] Extend Tensile HIP ISA compatibility
sha256 = "sha256-d+fVf/vz+sxGqJ96vuxe0jRMgbC5K6j5FQ5SJ1e3Sl8=";
url = "https://github.com/GZGavinZhao/Tensile/commit/855cb15839849addb0816a6dde45772034a3e41f.patch";
})
++ lib.optional isTensileLite ./tensilelite-create-library-dont-copy-twice.diff
++ lib.optional isTensileLite ./tensilelite-gen_assembly-venv-err-handling.diff;
# ++ lib.optional isTensileLite ./tensilelite-compression.diff;
doCheck = false; # Too many errors, not sure how to set this up properly
nativeCheckInputs = [
pytestCheckHook
filelock
clr
];
env.ROCM_PATH = "${clr}";
pythonImportsCheck = [ "Tensile" ];
passthru.updateScript = rocmUpdateScript {
name = pname;
inherit (src) owner repo;
};
meta = with lib; {
description = "GEMMs and tensor contractions";
homepage = "https://github.com/ROCm/Tensile";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
}

View File

@@ -0,0 +1,20 @@
diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py
index a1644606..c6ca2882 100644
--- a/Tensile/TensileCreateLibrary.py
+++ b/Tensile/TensileCreateLibrary.py
@@ -852,9 +852,14 @@ def copyStaticFiles(outputPath=None):
"KernelHeader.h",
]
+ import filecmp
for fileName in libraryStaticFiles:
# copy file
- shutil.copy(os.path.join(globalParameters["SourcePath"], fileName), outputPath)
+ # no need to copy twice if it has already been copied
+ src = os.path.join(globalParameters["SourcePath"], fileName)
+ dst = os.path.join(outputPath, os.path.basename(src))
+ if not os.path.isfile(dst) or not filecmp.cmp(src, dst):
+ shutil.copyfile(src, dst)
return libraryStaticFiles

View File

@@ -0,0 +1,48 @@
diff --git a/Tensile/SolutionStructs.py b/Tensile/SolutionStructs.py
index f663c6f1..17bcf897 100644
--- a/Tensile/SolutionStructs.py
+++ b/Tensile/SolutionStructs.py
@@ -4828,24 +4828,26 @@ class Solution(collections.abc.Mapping):
# create a dictionary of lists of parameter values
@staticmethod
def getSerialNaming(objs):
+ valid_params = sorted(validParameters.keys())
data = {}
- for objIdx in range(0, len(objs)):
- obj = objs[objIdx]
- for paramName in sorted(obj.keys()):
- if paramName in list(validParameters.keys()):
- paramValue = obj[paramName]
- if paramName in data:
- if paramValue not in data[paramName]:
- data[paramName].append(paramValue)
- else:
- data[paramName] = [ paramValue ]
- maxObjs = 1
- for paramName in data:
- if not isinstance(data[paramName][0],dict):
- data[paramName] = sorted(data[paramName])
- maxObjs *= len(data[paramName])
- numDigits = len(str(maxObjs))
- return [ data, numDigits ]
+
+ objs = [getattr(obj, "_state", obj) for obj in objs]
+
+ for param in valid_params:
+ d = []
+ for obj in objs:
+ if param in obj:
+ v = obj[param]
+ if v not in d:
+ d.append(v)
+ if len(d):
+ if not isinstance(d[0], dict): d.sort()
+ data[param] = d
+
+ # Calculate max objects using prod() from math module
+ max_objs = math.prod(len(values) for values in data.values())
+ num_digits = len(str(max_objs))
+ return data, num_digits
########################################
# Get Name Serial

View File

@@ -0,0 +1,345 @@
diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py
index b8cea84558..1bc24bd1dd 100644
--- a/Tensile/TensileCreateLibrary.py
+++ b/Tensile/TensileCreateLibrary.py
@@ -41,6 +41,7 @@
from .SolutionLibrary import MasterSolutionLibrary
from .SolutionStructs import Solution
from .CustomYamlLoader import load_logic_gfx_arch
+from .Utilities.Profile import profile
import argparse
import collections
@@ -1233,7 +1234,7 @@ def validateLibrary(masterLibraries: MasterSolutionLibrary,
################################################################################
# Tensile Create Library
################################################################################
-@timing
+@profile
def TensileCreateLibrary():
print1("")
print1(HR)
@@ -1558,7 +1559,6 @@ def param(key, value):
print1("# Check if generated files exists.")
- @timing
def checkFileExistence(files):
for filePath in files:
if not os.path.exists(filePath):
diff --git a/Tensile/Utilities/Profile.py b/Tensile/Utilities/Profile.py
new file mode 100644
index 0000000000..cc3c7eb44c
--- /dev/null
+++ b/Tensile/Utilities/Profile.py
@@ -0,0 +1,77 @@
+################################################################################
+#
+# Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+################################################################################
+
+import cProfile
+import pstats
+import os
+
+from pathlib import Path
+from datetime import datetime, timezone
+from typing import Callable, Tuple
+
+PROFILE_ENV_VAR = "TENSILE_PROFILE"
+
+def profile(func: Callable) -> Callable:
+ """Profiling decorator.
+
+ Add ``@profile`` to mark a function for profiling; set the environment variable
+ TENSILE_PROFILE=ON to enable profiling decorated functions.
+ """
+ if not envVariableIsSet(PROFILE_ENV_VAR):
+ return func
+ def wrapper(*args, **kwargs):
+ path, filename = initProfileArtifacts(func.__name__)
+
+ prof = cProfile.Profile()
+ output = prof.runcall(func, *args, **kwargs)
+ result = pstats.Stats(prof)
+ result.sort_stats(pstats.SortKey.TIME)
+ result.dump_stats(path/filename)
+
+ return output
+ return wrapper
+
+def envVariableIsSet(varName: str) -> bool:
+ """Checks if the provided environment variable is set to "ON", "TRUE", or "1"
+ Args:
+ varName: Environment variable name.
+ Returns:
+ True if the environment variable is set, otherwise False.
+ """
+ value = os.environ.get(varName, "").upper()
+ return True if value in ["ON", "TRUE", "1"] else False
+
+def initProfileArtifacts(funcName: str) -> Tuple[Path, str]:
+ """Initializes filenames and paths for profiling artifacts based on the current datetime
+ Args:
+ funcName: The name of the function being profiled, nominally passed via func.__name__
+ Returns:
+ A tuple (path, filename) where the path is the artifact directory and filename is
+ a .prof file with the profiling results.
+ """
+ dt = datetime.now(timezone.utc)
+ filename = f"{funcName}-{dt.strftime('%Y-%m-%dT%H-%M-%SZ')}.prof"
+ path = Path().cwd()/f"profiling-results-{dt.strftime('%Y-%m-%d')}"
+ path.mkdir(exist_ok=True)
+ return path, filename
diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py
index e62b0072df..2c843ba936 100644
--- a/Tensile/TensileCreateLibrary.py
+++ b/Tensile/TensileCreateLibrary.py
@@ -56,7 +56,7 @@
import sys
from timeit import default_timer as timer
from pathlib import Path
-from typing import Sequence, List
+from typing import Sequence, List, Union
def timing(func):
def wrapper(*args, **kwargs):
@@ -90,87 +90,142 @@ def processKernelSource(kernel, kernelWriterAssembly, ti):
return (err, src, header, kernelName, filename)
-def getAssemblyCodeObjectFiles(kernels, kernelWriterAssembly, outputPath):
- destDir = ensurePath(os.path.join(outputPath, 'library'))
- asmDir = kernelWriterAssembly.getAssemblyDirectory()
- archs = collections.defaultdict(list)
+def linkIntoCodeObject(
+ objFiles: List[str], coPathDest: Union[Path, str], kernelWriterAssembly: KernelWriterAssembly
+):
+ """Links object files into a code object file.
- for k in filter(lambda k: k['KernelLanguage'] == 'Assembly', kernels):
- archs[tuple(k['ISA'])].append(k)
+ Args:
+ objectFiles: A list of object files to be linked.
+ coPathDest: The destination path for the code object file.
+ kernelWriterAssembly: An instance of KernelWriterAssembly to get link arguments.
- coFiles = []
+ Raises:
+ RuntimeError: If linker invocation fails.
+ """
+ if os.name == "nt":
+ # On Windows, the objectFiles list command line (including spaces)
+ # exceeds the limit of 8191 characters, so using response file
+
+ responseFile = os.path.join('/tmp', 'clangArgs.txt')
+ with open(responseFile, 'wt') as file:
+ file.write(" ".join(objFiles))
+ file.flush()
+
+ args = [globalParameters['AssemblerPath'], '-target', 'amdgcn-amd-amdhsa', '-o', coFileRaw, '@clangArgs.txt']
+ subprocess.check_call(args, cwd=asmDir)
+ else:
+ numObjFiles = len(objFiles)
+ maxObjFiles = 10000
+
+ if numObjFiles > maxObjFiles:
+ batchedObjFiles = [objFiles[i:i+maxObjFiles] for i in range(0, numObjFiles, maxObjFiles)]
+ batchSize = int(math.ceil(numObjFiles / maxObjFiles))
+
+ newObjFiles = [str(coPathDest) + "." + str(i) for i in range(0, batchSize)]
+ newObjFilesOutput = []
+
+ for batch, filename in zip(batchedObjFiles, newObjFiles):
+ if len(batch) > 1:
+ args = [globalParameters["ROCmLdPath"], "-r"] + batch + [ "-o", filename]
+ print2(f"Linking object files into fewer object files: {' '.join(args)}")
+ subprocess.check_call(args)
+ newObjFilesOutput.append(filename)
+ else:
+ newObjFilesOutput.append(batchedObjFiles[0])
+
+ args = kernelWriterAssembly.getLinkCodeObjectArgs(newObjFilesOutput, str(coPathDest))
+ print2(f"Linking object files into code object: {' '.join(args)}")
+ subprocess.check_call(args)
+ else:
+ args = kernelWriterAssembly.getLinkCodeObjectArgs(objFiles, str(coPathDest))
+ print2(f"Linking object files into code object: {' '.join(args)}")
+ subprocess.check_call(args)
+
+
+def compressCodeObject(
+ coPathSrc: Union[Path, str], coPathDest: Union[Path, str], gfx: str, bundler: str
+):
+ """Compresses a code object file using the provided bundler.
+
+ Args:
+ coPathSrc: The source path of the code object file to be compressed.
+ coPathDest: The destination path for the compressed code object file.
+ gfx: The target GPU architecture.
+ bundler: The path to the Clang Offload Bundler executable.
+
+ Raises:
+ RuntimeError: If compressing the code object file fails.
+ """
+ args = [
+ bundler,
+ "--compress",
+ "--type=o",
+ "--bundle-align=4096",
+ f"--targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--{gfx}",
+ "--input=/dev/null",
+ f"--input={str(coPathSrc)}",
+ f"--output={str(coPathDest)}",
+ ]
+
+ print2(f"Bundling/compressing code objects: {' '.join(args)}")
+ try:
+ out = subprocess.check_output(args, stderr=subprocess.STDOUT)
+ print2(f"Output: {out}")
+ except subprocess.CalledProcessError as err:
+ raise RuntimeError(
+ f"Error compressing code object via bundling: {err.output}\nFailed command: {' '.join(args)}"
+ )
+
+def buildAssemblyCodeObjectFiles(kernels, kernelWriterAssembly, outputPath):
+
+ isAsm = lambda k: k["KernelLanguage"] == "Assembly"
+
+ extObj = ".o"
+ extCo = ".co"
+ extCoRaw = ".co.raw"
- for arch, archKernels in archs.items():
+ destDir = Path(ensurePath(os.path.join(outputPath, 'library')))
+ asmDir = Path(kernelWriterAssembly.getAssemblyDirectory())
+
+ archKernelMap = collections.defaultdict(list)
+ for k in filter(isAsm, kernels):
+ archKernelMap[tuple(k['ISA'])].append(k)
+
+ coFiles = []
+ for arch, archKernels in archKernelMap.items():
if len(archKernels) == 0:
continue
- archName = getGfxName(arch)
+ gfx = getGfxName(arch)
if globalParameters["MergeFiles"] or globalParameters["NumMergedFiles"] > 1 or globalParameters["LazyLibraryLoading"]:
- objectFiles = [kernelWriterAssembly.getKernelFileBase(k) + '.o' for k in archKernels if 'codeObjectFile' not in k]
+ objectFiles = [str(asmDir / (kernelWriterAssembly.getKernelFileBase(k) + extObj)) for k in archKernels if 'codeObjectFile' not in k]
- #Group kernels from placeholder libraries
coFileMap = collections.defaultdict(list)
+
if len(objectFiles):
- coFileMap[os.path.join(destDir, "TensileLibrary_"+archName+".co")] = objectFiles
+ coFileMap[asmDir / ("TensileLibrary_"+ gfx + extCoRaw)] = objectFiles
for kernel in archKernels:
coName = kernel.get("codeObjectFile", None)
if coName:
- coFileMap[os.path.join(destDir, coName+".co")] += [kernelWriterAssembly.getKernelFileBase(kernel) + '.o']
+ coFileMap[asmDir / (coName + extCoRaw)].append(str(asmDir / (kernelWriterAssembly.getKernelFileBase(kernel) + extObj)))
- for coFile, objectFiles in coFileMap.items():
- if os.name == "nt":
- # On Windows, the objectFiles list command line (including spaces)
- # exceeds the limit of 8191 characters, so using response file
+ for coFileRaw, objFiles in coFileMap.items():
- responseArgs = objectFiles
- responseFile = os.path.join(asmDir, 'clangArgs.txt')
- with open(responseFile, 'wt') as file:
- file.write( " ".join(responseArgs) )
- file.flush()
-
- args = [globalParameters['AssemblerPath'], '-target', 'amdgcn-amd-amdhsa', '-o', coFile, '@clangArgs.txt']
- subprocess.check_call(args, cwd=asmDir)
- else:
- numOfObjectFiles = len(objectFiles)
- splitFiles = 10000
- if numOfObjectFiles > splitFiles:
- slicedObjectFilesList = [objectFiles[x:x+splitFiles] for x in range(0, numOfObjectFiles, splitFiles)]
- objectFileBasename = os.path.split(coFile)[-1].split('.')[0]
- numOfOneSliceOfObjectFiles = int(math.ceil(numOfObjectFiles / splitFiles))
- newObjectFiles = [ objectFileBasename + "_" + str(i) + ".o" for i in range(0, numOfOneSliceOfObjectFiles)]
- newObjectFilesOutput = []
- for slicedObjectFiles, objectFile in zip(slicedObjectFilesList, newObjectFiles):
- if len(slicedObjectFiles) > 1:
- args = [globalParameters["ROCmLdPath"], "-r"] + slicedObjectFiles + [ "-o", objectFile ]
- if globalParameters["PrintCodeCommands"]:
- print(asmDir)
- print(' '.join(args))
- subprocess.check_call(args, cwd=asmDir)
- newObjectFilesOutput.append(objectFile)
- else:
- newObjectFilesOutput.append(slicedObjectFiles[0])
- args = kernelWriterAssembly.getLinkCodeObjectArgs(newObjectFilesOutput, coFile)
- if globalParameters["PrintCodeCommands"]:
- print(asmDir)
- print(' '.join(args))
- subprocess.check_call(args, cwd=asmDir)
- else:
- args = kernelWriterAssembly.getLinkCodeObjectArgs(objectFiles, coFile)
- if globalParameters["PrintCodeCommands"]:
- print(asmDir)
- print(' '.join(args))
- subprocess.check_call(args, cwd=asmDir)
+ linkIntoCodeObject(objFiles, coFileRaw, kernelWriterAssembly)
+ coFile = destDir / coFileRaw.name.replace(extCoRaw, extCo)
+ compressCodeObject(coFileRaw, coFile, gfx, globalParameters["ClangOffloadBundlerPath"])
coFiles.append(coFile)
else:
# no mergefiles
def newCoFileName(kName):
if globalParameters["PackageLibrary"]:
- return os.path.join(destDir, archName, kName + '.co')
+ return os.path.join(destDir, gfx, kName + '.co')
else:
- return os.path.join(destDir, kName + '_' + archName + '.co')
+ return os.path.join(destDir, kName + '_' + gfx + '.co')
def orgCoFileName(kName):
return os.path.join(asmDir, kName + '.co')
@@ -179,6 +234,8 @@ def orgCoFileName(kName):
map(lambda k: kernelWriterAssembly.getKernelFileBase(k), archKernels)), "Copying code objects"):
shutil.copyfile(src, dst)
coFiles.append(dst)
+ printWarning("Code object files are not compressed in `--no-merge-files` build mode.")
+
return coFiles
def which(p):
@@ -645,7 +702,7 @@ def success(kernel):
if not globalParameters["GenerateSourcesAndExit"]:
codeObjectFiles += buildSourceCodeObjectFiles(CxxCompiler, kernelFiles, outputPath)
- codeObjectFiles += getAssemblyCodeObjectFiles(kernelsToBuild, kernelWriterAssembly, outputPath)
+ codeObjectFiles += buildAssemblyCodeObjectFiles(kernelsToBuild, kernelWriterAssembly, outputPath)
Common.popWorkingPath() # build_tmp
Common.popWorkingPath() # workingDir

View File

@@ -0,0 +1,37 @@
diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py
index 2b9da394..b001fa7c 100644
--- a/Tensile/TensileCreateLibrary.py
+++ b/Tensile/TensileCreateLibrary.py
@@ -808,10 +808,13 @@ def copyStaticFiles(outputPath=None):
"ReductionTemplate.h",
"memory_gfx.h" ]
+ import filecmp
for fileName in libraryStaticFiles:
- # copy file
- shutil.copy( os.path.join(globalParameters["SourcePath"], fileName), \
- outputPath )
+ src = os.path.join(globalParameters["SourcePath"], fileName)
+ dst = os.path.join(outputPath, os.path.basename(src))
+ # no need to copy twice if it has already been copied
+ if not os.path.isfile(dst) or not filecmp.cmp(src, dst):
+ shutil.copyfile(src, dst)
return libraryStaticFiles
@@ -1417,9 +1420,13 @@ def TensileCreateLibrary():
writeCMake(outputPath, solutionFiles, sourceKernelFiles, staticFiles, masterLibraries)
# Make sure to copy the library static files.
+ import filecmp
for fileName in staticFiles:
- shutil.copy( os.path.join(globalParameters["SourcePath"], fileName), \
- outputPath )
+ src = os.path.join(globalParameters["SourcePath"], fileName)
+ dst = os.path.join(outputPath, os.path.basename(src))
+ # no need to copy twice if it has already been copied
+ if not os.path.isfile(dst) or not filecmp.cmp(src, dst):
+ shutil.copyfile(src, dst)
# write solutions and kernels
codeObjectFiles = writeSolutionsAndKernels(outputPath, CxxCompiler, None, solutions,

View File

@@ -0,0 +1,36 @@
diff --git a/Tensile/Ops/gen_assembly.sh b/Tensile/Ops/gen_assembly.sh
index 0b21b6c6..609f1dd1 100755
--- a/Tensile/Ops/gen_assembly.sh
+++ b/Tensile/Ops/gen_assembly.sh
@@ -23,6 +23,8 @@
#
################################################################################
+set -x
+
archStr=$1
dst=$2
venv=$3
@@ -35,7 +37,13 @@ fi
toolchain=${rocm_path}/llvm/bin/clang++
-. ${venv}/bin/activate
+if ! [ -z ${TENSILE_GEN_ASSEMBLY_TOOLCHAIN+x} ]; then
+ toolchain="${TENSILE_GEN_ASSEMBLY_TOOLCHAIN}"
+fi
+
+if [ -f ${venv}/bin/activate ]; then
+ . ${venv}/bin/activate
+fi
IFS=';' read -r -a archs <<< "$archStr"
@@ -77,4 +85,6 @@ for arch in "${archs[@]}"; do
python3 ./ExtOpCreateLibrary.py --src=$dst --co=$dst/extop_$arch.co --output=$dst --arch=$arch
done
-deactivate
+if [ -f ${venv}/bin/activate ]; then
+ deactivate
+fi

View File

@@ -0,0 +1,56 @@
{
triton-no-cuda,
rocmPackages,
fetchFromGitHub,
}:
(triton-no-cuda.override (_old: {
inherit rocmPackages;
rocmSupport = true;
stdenv = rocmPackages.llvm.rocmClangStdenv;
llvm = rocmPackages.triton-llvm;
})).overridePythonAttrs
(old: {
doCheck = false;
stdenv = rocmPackages.llvm.rocmClangStdenv;
version = "3.2.0";
src = fetchFromGitHub {
owner = "triton-lang";
repo = "triton";
rev = "9641643da6c52000c807b5eeed05edaec4402a67"; # "release/3.2.x";
hash = "sha256-V1lpARwOLn28ZHfjiWR/JJWGw3MB34c+gz6Tq1GOVfo=";
};
buildInputs = old.buildInputs ++ [
rocmPackages.clr
];
dontStrip = true;
env = old.env // {
CXXFLAGS = "-O3 -I${rocmPackages.clr}/include -I/build/source/third_party/triton/third_party/nvidia/backend/include";
TRITON_OFFLINE_BUILD = 1;
};
patches = [ ];
postPatch = ''
# Remove nvidia backend so we don't depend on unfree nvidia headers
# when we only want to target ROCm
rm -rf third_party/nvidia
substituteInPlace CMakeLists.txt \
--replace-fail "add_subdirectory(test)" ""
sed -i '/nvidia\|NVGPU\|registerConvertTritonGPUToLLVMPass\|mlir::test::/Id' bin/RegisterTritonDialects.h
sed -i '/TritonTestAnalysis/Id' bin/CMakeLists.txt
substituteInPlace python/setup.py \
--replace-fail 'backends = [*BackendInstaller.copy(["nvidia", "amd"]), *BackendInstaller.copy_externals()]' \
'backends = [*BackendInstaller.copy(["amd"]), *BackendInstaller.copy_externals()]'
find . -type f -exec sed -i 's|[<]cupti.h[>]|"cupti.h"|g' {} +
find . -type f -exec sed -i 's|[<]cuda.h[>]|"cuda.h"|g' {} +
# remove any downloads
substituteInPlace python/setup.py \
--replace-fail "[get_json_package_info()]" "[]"\
--replace-fail "[get_llvm_package_info()]" "[]"\
--replace-fail "curr_version != version" "False"
# Don't fetch googletest
substituteInPlace cmake/AddTritonUnitTest.cmake \
--replace-fail 'include(''${PROJECT_SOURCE_DIR}/unittest/googletest.cmake)' "" \
--replace-fail "include(GoogleTest)" "find_package(GTest REQUIRED)"
substituteInPlace third_party/amd/backend/compiler.py \
--replace-fail '"/opt/rocm/llvm/bin/ld.lld"' "os.environ['ROCM_PATH']"' + "/llvm/bin/ld.lld"'
'';
})

View File

@@ -0,0 +1,62 @@
{
lib,
writeScript,
}:
{
name ? "",
owner ? "",
repo ? "",
page ? "releases",
# input: array of [ { tag_name: "rocm-6.x.x", }, ... ]. some entries may have bad names like rocm-test-date we want to skip
# output: first tag_name/name that's a proper version if any
filter ? "map(.tag_name // .name) | map(select(test(\"^rocm-[0-9]+\\\\.[0-9]+(\\\\.[0-9]+)?$\"))) | first | ltrimstr(\"rocm-\")",
}:
let
pname =
if lib.hasPrefix "rocm-llvm-" name then "llvm.${lib.removePrefix "rocm-llvm-" name}" else name;
updateScript = writeScript "update.sh" ''
#!/usr/bin/env nix-shell
#!nix-shell -i bash -p curl jq common-updater-scripts
set -euo pipefail
fetch_releases() {
local api_url="https://api.github.com/repos/${owner}/${repo}/${page}"
if [ "${page}" = "releases" ]; then
api_url="$api_url?per_page=4"
fi
>&2 echo $api_url
curl ''${GITHUB_TOKEN:+-u ":$GITHUB_TOKEN"} -sL "$api_url"
}
find_valid_version() {
local releases="$1"
>&2 echo "$releases"
# Wrap in array if not already an array to make handline specific release or tags page the same
>&2 echo jq -r 'if type == "array" then . else [.] end | ${filter}'
echo "$releases" | jq -r 'if type == "array" then . else [.] end | ${filter}'
}
releases="$(fetch_releases)"
version="$(find_valid_version "$releases")"
if [ -z "$version" ]; then
echo "No valid version found in the fetched release(s)." >&2
exit 1
fi
IFS='.' read -ra version_arr <<< "$version"
>&2 echo parsed version "$version_arr" from "$version"
if (( ''${version_arr[0]} > 6 )); then
echo "'rocmPackages_6.${pname}' is already at its maximum allowed version.''\nAny further upgrades should go into 'rocmPackages_X.${pname}'." >&2
exit 1
fi
update-source-version rocmPackages_6.${pname} "$version" --ignore-same-hash
'';
in
[ updateScript ]